Skip to content

Commit

Permalink
Revert part of open-mpi/ompi@c1bbbb5 to restore the usock component, …
Browse files Browse the repository at this point in the history
…thus fixing show_help aggregation.

Fixes open-mpi#1467

Restore debugger attach operations

Fixes open-mpi#1225

(cherry picked from commit open-mpi/ompi@c146c49)

Fix the debugger attach - previous commit had fixed one instance of a check prior to sending the release message, but there was a second code path that included a similar check that was missed. Thanks to John DelSignore for spotting it!

(cherry picked from commit open-mpi/ompi@4a62377)

Very minor typo

(cherry picked from commit open-mpi/ompi@6e6bbfd)
  • Loading branch information
Ralph Castain committed Mar 23, 2016
1 parent 00bd6b8 commit 6e802cf
Show file tree
Hide file tree
Showing 21 changed files with 3,665 additions and 65 deletions.
2 changes: 1 addition & 1 deletion ompi/mca/rte/orte/rte_orte_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void ompi_rte_wait_for_debugger(void)
debugger = 1;
}

if (!debugger) {
if (!debugger && NULL == getenv("ORTE_TEST_DEBUGGER_ATTACH")) {
/* if not, just return */
return;
}
Expand Down
75 changes: 72 additions & 3 deletions orte/mca/ess/base/ess_base_std_app.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,13 @@
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_progress_threads.h"

#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/dfs/base/base.h"
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/filem/base/base.h"
#include "orte/mca/errmgr/base/base.h"
Expand Down Expand Up @@ -173,14 +177,73 @@ int orte_ess_base_app_setup(bool db_restrict_local)
}
OBJ_DESTRUCT(&kv);
}

/* Setup the communication infrastructure */
/*
* OOB Layer
*/
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_oob_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_oob_base_select";
goto error;
}
/* Runtime Messaging Layer */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_rml_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rml_base_select";
goto error;
}
/* setup the errmgr */
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_errmgr_base_select";
goto error;
}

/* Routed system */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_routed_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_routed_base_select";
goto error;
}
/*
* Group communications
*/
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_grpcomm_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_grpcomm_base_select";
goto error;
}
/* enable communication via the rml */
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
ORTE_ERROR_LOG(ret);
error = "orte_rml.enable_comm";
goto error;
}
/* setup the routed info */
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_routed.init_routes";
goto error;
}
/* open the distributed file system */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
Expand Down Expand Up @@ -216,7 +279,13 @@ int orte_ess_base_app_finalize(void)
(void) mca_base_framework_close(&orte_filem_base_framework);
(void) mca_base_framework_close(&orte_errmgr_base_framework);

/* now can close the rml and its friendly group comm */
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
(void) mca_base_framework_close(&orte_dfs_base_framework);
(void) mca_base_framework_close(&orte_routed_base_framework);

(void) mca_base_framework_close(&orte_rml_base_framework);
(void) mca_base_framework_close(&orte_oob_base_framework);
(void) mca_base_framework_close(&orte_state_base_framework);

orte_session_dir_finalize(ORTE_PROC_MY_NAME);
Expand Down Expand Up @@ -270,7 +339,7 @@ void orte_ess_base_app_abort(int status, bool report)
* the message if routing is enabled as this indicates we
* have someone to send to
*/
if (report && orte_create_session_dirs) {
if (report && orte_routing_is_enabled && orte_create_session_dirs) {
myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL);
fd = open(myfile, O_CREAT, S_IRUSR);
close(fd);
Expand Down
13 changes: 13 additions & 0 deletions orte/mca/ess/pmi/ess_pmi_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include "opal/mca/pmix/base/base.h"

#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/util/name_fns.h"
Expand Down Expand Up @@ -83,6 +85,7 @@ static int rte_init(void)
char *envar, *ev1, *ev2;
uint64_t unique_key[2];
char *string_key;
char *rmluri;
opal_value_t *kv;
char *val;
int u32, *u32ptr;
Expand Down Expand Up @@ -358,6 +361,16 @@ static int rte_init(void)

/*** PUSH DATA FOR OTHERS TO FIND ***/

/* push our RML URI in case others need to talk directly to us */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
error = "pmix put uri";
goto error;
}
free(rmluri);

/* push our hostname so others can find us, if they need to */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
if (ORTE_SUCCESS != ret) {
Expand Down
56 changes: 56 additions & 0 deletions orte/mca/oob/usock/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
# All rights reserved
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

sources = \
oob_usock_component.h \
oob_usock.h \
oob_usock_component.c \
oob_usock_connection.h \
oob_usock_sendrecv.h \
oob_usock_hdr.h \
oob_usock_peer.h \
oob_usock_ping.h \
oob_usock.c \
oob_usock_connection.c \
oob_usock_sendrecv.c

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).

if MCA_BUILD_orte_oob_usock_DSO
component_noinst =
component_install = mca_oob_usock.la
else
component_noinst = libmca_oob_usock.la
component_install =
endif

mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_oob_usock_la_SOURCES = $(sources)
mca_oob_usock_la_LDFLAGS = -module -avoid-version

noinst_LTLIBRARIES = $(component_noinst)
libmca_oob_usock_la_SOURCES = $(sources)
libmca_oob_usock_la_LDFLAGS = -module -avoid-version

42 changes: 42 additions & 0 deletions orte/mca/oob/usock/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

# MCA_oob_usock_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_oob_usock_CONFIG],[
AC_CONFIG_FILES([orte/mca/oob/usock/Makefile])

# check for sockaddr_un (a good sign we have Unix domain sockets)
AC_CHECK_TYPES([struct sockaddr_un],
[oob_usock_happy="yes"],
[oob_usock_happy="no"],
[AC_INCLUDES_DEFAULT
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UN_H
#include <sys/un.h>
#endif])

AS_IF([test "$oob_usock_happy" = "yes"], [$1], [$2])
])dnl
70 changes: 70 additions & 0 deletions orte/mca/oob/usock/help-oob-usock.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# -*- text -*-
#
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2006 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[static-and-dynamic]
Both static and dynamic port ranges were specified for the
out-of-band (OOB) communication subsystem:

Static ports: %s
Dynamic ports: %s

Only one can be specified. Please choose either static or
dynamic ports and try again.
#
[include-exclude]
Both TCP interface include and exclude lists were specified:

Include: %s
Exclude: %s

Only one of these can be given.
#
[not-parseable]
The specified network is not parseable. Since we cannot determine
your desired intent, we cannot establish a TCP socket for out-of-band
communications and will therefore abort. Please correct the network
specification and retry.
#
[no-included-found]
None of the networks specified to be included for out-of-band communications
could be found:

Value given: %s

Please revise the specification and try again.
#
[excluded-all]
The specified list of networks to be excluded for out-of-band communications
resulted in no networks being available:

Value given: %s

Please revise the specification and try again.
#
[no-interfaces-avail]
No network interfaces were found for out-of-band communications. We require
at least one available network for TCP-based messaging.
#
[invalid if_inexclude]
WARNING: An invalid value was given for oob_tcp_if_%s. This
value will be ignored.

Local host: %s
Value: %s
Message: %s
#
Loading

0 comments on commit 6e802cf

Please sign in to comment.