Skip to content

Commit

Permalink
add support for mxm 2.0
Browse files Browse the repository at this point in the history
This commit was SVN r27661.
  • Loading branch information
mike-dubman committed Dec 9, 2012
1 parent 4db2c69 commit a454341
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 4 deletions.
63 changes: 59 additions & 4 deletions ompi/mca/mtl/mxm/mtl_mxm.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,45 @@ static uint32_t ompi_mtl_mxm_get_job_id(void)

int ompi_mtl_mxm_progress(void);

#if MXM_API < MXM_VERSION(2, 0)
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
{
size_t addrlen;
mxm_error_t err;

addrlen = sizeof(ep_info->ptl_addr[ptlid]);
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
true, (int)ptlid, mxm_error_string(err));
true, (int)ptlid, mxm_error_string(err));
return OMPI_ERROR;
}

return OMPI_SUCCESS;
}
#else
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info,
mxm_domain_id_t domain)
{
size_t addrlen;
mxm_error_t err;

addrlen = sizeof(ep_info->dest_addr[domain]);
err = mxm_ep_address(ompi_mtl_mxm.ep, domain,
(struct sockaddr *) &ep_info->dest_addr[domain], &addrlen);
if (MXM_OK == err) {
ep_info->domain_bitmap |= MXM_BIT(domain);
return OMPI_SUCCESS;
} else if (MXM_ERR_UNREACHABLE == err) {
return OMPI_SUCCESS;
} else {
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
true, (int)domain, mxm_error_string(err));
return OMPI_ERROR;
}
}
#endif

#define max(a,b) ((a)>(b)?(a):(b))

Expand Down Expand Up @@ -159,6 +182,17 @@ static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_
return err;
}

#if MXM_API >= MXM_VERSION(2,0)
static void ompi_mtl_mxm_set_conn_req(mxm_conn_req_t *conn_req, ompi_mtl_mxm_ep_conn_info_t *ep_info,
mxm_domain_id_t domain)
{
if (ep_info->domain_bitmap & MXM_BIT(domain)) {
conn_req->addr[domain] = (struct sockaddr *)&(ep_info->dest_addr[domain]);
} else {
conn_req->addr[domain] = NULL;
}
}
#endif

int ompi_mtl_mxm_module_init(void)
{
Expand All @@ -168,7 +202,7 @@ int ompi_mtl_mxm_module_init(void)
uint32_t jobid;
uint64_t mxlr;
ompi_proc_t *mp, **procs;
unsigned ptl_bitmap;
unsigned ptl_bitmap;
size_t totps, proc;
int lr, nlps;

Expand Down Expand Up @@ -210,8 +244,10 @@ int ompi_mtl_mxm_module_init(void)
/* Setup the endpoint options and local addresses to bind to. */
#if MXM_API < MXM_VERSION(1,5)
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap;
#else
#elif MXM_API < MXM_VERSION(2,0)
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
#else
ptl_bitmap = 0;
#endif

/* Open MXM endpoint */
Expand All @@ -227,6 +263,7 @@ int ompi_mtl_mxm_module_init(void)
/*
* Get address for each PTL on this endpoint, and share it with other ranks.
*/
#if MXM_API < MXM_VERSION(2,0)
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
return OMPI_ERROR;
Expand All @@ -239,6 +276,18 @@ int ompi_mtl_mxm_module_init(void)
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
return OMPI_ERROR;
}
#else
ep_info.domain_bitmap = 0;
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SELF)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SHM)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_IB)) {
return OMPI_ERROR;
}
#endif

/*
* send information using modex (in some case there is limitation on data size for example ess/pmi)
Expand Down Expand Up @@ -350,9 +399,15 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
free(modex_name);
}

#if MXM_API < MXM_VERSION(2,0)
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
#else
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SELF);
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_SHM);
ompi_mtl_mxm_set_conn_req(&conn_reqs[i], &ep_info[i], MXM_DOMAIN_IB);
#endif
}

/* Connect to remote peers */
Expand Down
5 changes: 5 additions & 0 deletions ompi/mca/mtl/mxm/mtl_mxm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ typedef struct mca_mtl_mxm_module_t {


typedef struct ompi_mtl_mxm_ep_conn_info_t {
#if MXM_API < MXM_VERSION(2,0)
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
#else
unsigned domain_bitmap;
struct sockaddr_storage dest_addr[MXM_DOMAIN_LAST];
#endif
} ompi_mtl_mxm_ep_conn_info_t;

extern mca_mtl_mxm_module_t ompi_mtl_mxm;
Expand Down

0 comments on commit a454341

Please sign in to comment.