Skip to content

Commit 10c033c

Browse files
committed
Add external rcache validation to memory domains
1 parent 2a29ef5 commit 10c033c

File tree

4 files changed

+110
-6
lines changed

4 files changed

+110
-6
lines changed

src/uct/cuda/gdr_copy/gdr_copy_md.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,55 @@ static void uct_gdr_copy_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache
340340
(void)uct_gdr_copy_mem_dereg_internal(&md->super, &region->memh);
341341
}
342342

343+
static ucs_status_t
344+
uct_gdr_copy_rcache_mem_reg_ext_validate_cb(void *context, ucs_rcache_t *rcache,
345+
void *arg, ucs_rcache_region_t *rregion,
346+
uint16_t rcache_mem_reg_flags)
347+
{
348+
ucs_status_t status;
349+
uct_gdr_copy_rcache_region_t *region;
350+
351+
status = uct_gdr_copy_rcache_mem_reg_cb(context, rcache, arg, rregion,
352+
rcache_mem_reg_flags);
353+
if (status != UCS_OK) {
354+
return status;
355+
}
356+
357+
region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t);
358+
return uct_mem_attr_query((void*)region->super.super.start,
359+
region->super.super.end - region->super.super.start,
360+
&region->mem_attr);
361+
}
362+
363+
static void
364+
uct_gdr_copy_rcache_mem_dereg_ext_validate_cb(void *context, ucs_rcache_t *rcache,
365+
ucs_rcache_region_t *rregion)
366+
{
367+
uct_gdr_copy_rcache_region_t *region;
368+
region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t);
369+
uct_mem_attr_destroy(region->mem_attr);
370+
uct_gdr_copy_rcache_mem_dereg_cb(context, rcache, rregion);
371+
}
372+
373+
static int
374+
uct_gdr_copy_rcache_mem_ext_validate_cb(void *context, ucs_rcache_t *rcache,
375+
ucs_rcache_region_t *rregion)
376+
{
377+
int ret;
378+
ucs_status_t status;
379+
uct_mem_attr_h mem_attr;
380+
uct_gdr_copy_rcache_region_t *region;
381+
382+
region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t);
383+
status = uct_mem_attr_query((void*)region->super.super.start,
384+
region->super.super.end - region->super.super.start,
385+
&mem_attr);
386+
387+
ret = status != UCS_OK ? 0 : !uct_mem_attr_cmp(mem_attr, region->mem_attr);
388+
uct_mem_attr_destroy(mem_attr);
389+
return ret;
390+
}
391+
343392
static void uct_gdr_copy_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache,
344393
ucs_rcache_region_t *rregion, char *buf,
345394
size_t max)
@@ -352,9 +401,12 @@ static void uct_gdr_copy_rcache_dump_region_cb(void *context, ucs_rcache_t *rcac
352401
}
353402

354403
static ucs_rcache_ops_t uct_gdr_copy_rcache_ops = {
355-
.mem_reg = uct_gdr_copy_rcache_mem_reg_cb,
356-
.mem_dereg = uct_gdr_copy_rcache_mem_dereg_cb,
357-
.dump_region = uct_gdr_copy_rcache_dump_region_cb
404+
.mem_reg = uct_gdr_copy_rcache_mem_reg_cb,
405+
.mem_dereg = uct_gdr_copy_rcache_mem_dereg_cb,
406+
.mem_reg_ext_validate = uct_gdr_copy_rcache_mem_reg_ext_validate_cb,
407+
.mem_dereg_ext_validate = uct_gdr_copy_rcache_mem_dereg_ext_validate_cb,
408+
.mem_ext_validate = uct_gdr_copy_rcache_mem_ext_validate_cb,
409+
.dump_region = uct_gdr_copy_rcache_dump_region_cb
358410
};
359411

360412
static ucs_status_t

src/uct/cuda/gdr_copy/gdr_copy_md.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ typedef struct uct_gdr_copy_key {
6666
typedef struct uct_gdr_copy_rcache_region {
6767
ucs_rcache_region_t super;
6868
uct_gdr_copy_mem_t memh; /**< mr exposed to the user as the memh */
69+
uct_mem_attr_h mem_attr;
6970
} uct_gdr_copy_rcache_region_t;
7071

7172
#endif

src/uct/ib/base/ib_md.c

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,53 @@ static void uct_ib_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache,
10761076
(void)uct_ib_memh_dereg(md, &region->memh);
10771077
}
10781078

1079+
static ucs_status_t
1080+
uct_ib_rcache_mem_reg_ext_validate_cb(void *context, ucs_rcache_t *rcache,
1081+
void *arg, ucs_rcache_region_t *rregion,
1082+
uint16_t rcache_mem_reg_flags)
1083+
{
1084+
ucs_status_t status;
1085+
uct_ib_rcache_region_t *region;
1086+
1087+
status = uct_ib_rcache_mem_reg_cb(context, rcache, arg, rregion,
1088+
rcache_mem_reg_flags);
1089+
if (status != UCS_OK) {
1090+
return status;
1091+
}
1092+
1093+
region = ucs_derived_of(rregion, uct_ib_rcache_region_t);
1094+
return uct_mem_attr_query((void*)region->super.super.start,
1095+
region->super.super.end - region->super.super.start,
1096+
&region->mem_attr);
1097+
}
1098+
1099+
static void
1100+
uct_ib_rcache_mem_dereg_ext_validate_cb(void *context, ucs_rcache_t *rcache,
1101+
ucs_rcache_region_t *rregion)
1102+
{
1103+
uct_ib_rcache_region_t *region = ucs_derived_of(rregion, uct_ib_rcache_region_t);
1104+
uct_mem_attr_destroy(region->mem_attr);
1105+
uct_ib_rcache_mem_dereg_cb(context, rcache, rregion);
1106+
}
1107+
1108+
static int uct_ib_rcache_mem_ext_validate_cb(void *context, ucs_rcache_t *rcache,
1109+
ucs_rcache_region_t *rregion)
1110+
{
1111+
int ret;
1112+
ucs_status_t status;
1113+
uct_mem_attr_h mem_attr;
1114+
uct_ib_rcache_region_t *region;
1115+
1116+
region = ucs_derived_of(rregion, uct_ib_rcache_region_t);
1117+
status = uct_mem_attr_query((void*)region->super.super.start,
1118+
region->super.super.end - region->super.super.start,
1119+
&mem_attr);
1120+
1121+
ret = status != UCS_OK ? 0 : !uct_mem_attr_cmp(mem_attr, region->mem_attr);
1122+
uct_mem_attr_destroy(mem_attr);
1123+
return ret;
1124+
}
1125+
10791126
static void uct_ib_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache,
10801127
ucs_rcache_region_t *rregion, char *buf,
10811128
size_t max)
@@ -1091,9 +1138,12 @@ static void uct_ib_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache,
10911138
}
10921139

10931140
static ucs_rcache_ops_t uct_ib_rcache_ops = {
1094-
.mem_reg = uct_ib_rcache_mem_reg_cb,
1095-
.mem_dereg = uct_ib_rcache_mem_dereg_cb,
1096-
.dump_region = uct_ib_rcache_dump_region_cb
1141+
.mem_reg = uct_ib_rcache_mem_reg_cb,
1142+
.mem_dereg = uct_ib_rcache_mem_dereg_cb,
1143+
.mem_reg_ext_validate = uct_ib_rcache_mem_reg_ext_validate_cb,
1144+
.mem_dereg_ext_validate = uct_ib_rcache_mem_dereg_ext_validate_cb,
1145+
.mem_ext_validate = uct_ib_rcache_mem_ext_validate_cb,
1146+
.dump_region = uct_ib_rcache_dump_region_cb
10971147
};
10981148

10991149
static ucs_status_t uct_ib_md_odp_query(uct_md_h uct_md, uct_md_attr_t *md_attr)

src/uct/ib/base/ib_md.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ typedef struct uct_ib_md_ops {
339339
typedef struct uct_ib_rcache_region {
340340
ucs_rcache_region_t super;
341341
uct_ib_mem_t memh; /**< mr exposed to the user as the memh */
342+
uct_mem_attr_h mem_attr;
342343
} uct_ib_rcache_region_t;
343344

344345

0 commit comments

Comments
 (0)