Skip to content
This repository has been archived by the owner on Nov 15, 2021. It is now read-only.

Commit

Permalink
Merge with ray master
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanpantic committed Jun 18, 2019
2 parents d81c126 + 2bf92e0 commit 2e0eec9
Show file tree
Hide file tree
Showing 150 changed files with 4,846 additions and 2,076 deletions.
2 changes: 2 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
build --compilation_mode=opt
build --action_env=PATH
build --action_env=PYTHON_BIN_PATH
# This workaround is needed due to https://github.com/bazelbuild/bazel/issues/4341
build --per_file_copt="external/com_github_grpc_grpc/.*@-DGRPC_BAZEL_BUILD"
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
language: generic

dist: xenial


services:
- docker
Expand Down
43 changes: 41 additions & 2 deletions BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,12 +1,37 @@
# Bazel build
# C/C++ documentation: https://docs.bazel.build/versions/master/be/c-cpp.html

load("@com_github_grpc_grpc//bazel:grpc_build_system.bzl", "grpc_proto_library")
load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
load("@//bazel:ray.bzl", "flatbuffer_py_library")
load("@//bazel:cython_library.bzl", "pyx_library")

COPTS = ["-DRAY_USE_GLOG"]

# Node manager gRPC lib.
grpc_proto_library(
name = "node_manager_grpc_lib",
srcs = ["src/ray/protobuf/node_manager.proto"],
)

# Node manager server and client.
cc_library(
name = "node_manager_rpc_lib",
srcs = glob([
"src/ray/rpc/*.cc",
]),
hdrs = glob([
"src/ray/rpc/*.h",
]),
copts = COPTS,
deps = [
":node_manager_grpc_lib",
":ray_common",
"@boost//:asio",
"@com_github_grpc_grpc//:grpc++",
],
)

cc_binary(
name = "raylet",
srcs = ["src/ray/raylet/main.cc"],
Expand Down Expand Up @@ -89,6 +114,7 @@ cc_library(
":gcs",
":gcs_fbs",
":node_manager_fbs",
":node_manager_rpc_lib",
":object_manager",
":ray_common",
":ray_util",
Expand All @@ -111,13 +137,18 @@ cc_library(
srcs = glob(
[
"src/ray/core_worker/*.cc",
"src/ray/core_worker/store_provider/*.cc",
"src/ray/core_worker/transport/*.cc",
],
exclude = [
"src/ray/core_worker/*_test.cc",
"src/ray/core_worker/mock_worker.cc",
],
),
hdrs = glob([
"src/ray/core_worker/*.h",
"src/ray/core_worker/store_provider/*.h",
"src/ray/core_worker/transport/*.h",
]),
copts = COPTS,
deps = [
Expand All @@ -127,7 +158,15 @@ cc_library(
],
)

# This test is run by src/ray/test/run_core_worker_tests.sh
cc_binary(
name = "mock_worker",
srcs = ["src/ray/core_worker/mock_worker.cc"],
copts = COPTS,
deps = [
":core_worker_lib",
],
)

cc_binary(
name = "core_worker_test",
srcs = ["src/ray/core_worker/core_worker_test.cc"],
Expand Down Expand Up @@ -535,7 +574,7 @@ flatbuffer_py_library(
"ErrorTableData.py",
"ErrorType.py",
"FunctionTableData.py",
"GcsTableEntry.py",
"GcsEntry.py",
"HeartbeatBatchTableData.py",
"HeartbeatTableData.py",
"Language.py",
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
.. image:: https://readthedocs.org/projects/ray/badge/?version=latest
:target: http://ray.readthedocs.io/en/latest/?badge=latest

.. image:: https://img.shields.io/badge/pypi-0.7.0-blue.svg
.. image:: https://img.shields.io/badge/pypi-0.7.1-blue.svg
:target: https://pypi.org/project/ray/

|
Expand Down
4 changes: 4 additions & 0 deletions bazel/ray_deps_build_all.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@ load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
load("@com_github_jupp0r_prometheus_cpp//:repositories.bzl", "prometheus_cpp_repositories")
load("@com_github_ray_project_ray//bazel:python_configure.bzl", "python_configure")
load("@com_github_checkstyle_java//:repo.bzl", "checkstyle_deps")
load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")


def ray_deps_build_all():
gen_java_deps()
checkstyle_deps()
boost_deps()
prometheus_cpp_repositories()
python_configure(name = "local_config_python")
grpc_deps()

8 changes: 8 additions & 0 deletions bazel/ray_deps_setup.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,11 @@ def ray_deps_setup():
# `https://github.com/jupp0r/prometheus-cpp/pull/225` getting merged.
urls = ["https://github.com/jovany-wang/prometheus-cpp/archive/master.zip"],
)

http_archive(
name = "com_github_grpc_grpc",
urls = [
"https://github.com/grpc/grpc/archive/7741e806a213cba63c96234f16d712a8aa101a49.tar.gz",
],
strip_prefix = "grpc-7741e806a213cba63c96234f16d712a8aa101a49",
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pushd "$ROOT_DIR"

python -m pip install pytest-benchmark

pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev0-cp27-cp27mu-manylinux1_x86_64.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev1-cp27-cp27mu-manylinux1_x86_64.whl
python -m pytest --benchmark-autosave --benchmark-min-rounds=10 --benchmark-columns="min, max, mean" $ROOT_DIR/../../../python/ray/tests/perf_integration_tests/test_perf_integration.py

pushd $ROOT_DIR/../../../python
Expand Down
10 changes: 10 additions & 0 deletions ci/jenkins_tests/run_rllib_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,16 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/python/ray/rllib/examples/rollout_worker_custom_workflow.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/python/ray/rllib/examples/eager_execution.py --iters=2

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output /ray/python/ray/rllib/train.py \
--env CartPole-v0 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"use_eager": true, "simple_optimizer": true}'

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_tf_policy.py --iters=2

Expand Down
6 changes: 3 additions & 3 deletions ci/stress_tests/application_cluster_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ provider:
# Availability zone(s), comma-separated, that nodes may be launched in.
# Nodes are currently spread between zones by a round-robin approach,
# however this implementation detail should not be relied upon.
availability_zone: us-west-2a,us-west-2b
availability_zone: us-west-2b

# How Ray will authenticate with newly launched nodes.
auth:
Expand Down Expand Up @@ -90,8 +90,8 @@ file_mounts: {
# List of shell commands to run to set up nodes.
setup_commands:
- echo 'export PATH="$HOME/anaconda3/envs/tensorflow_<<<PYTHON_VERSION>>>/bin:$PATH"' >> ~/.bashrc
- ray || wget https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev0-<<<WHEEL_STR>>>-manylinux1_x86_64.whl
- rllib || pip install -U ray-0.8.0.dev0-<<<WHEEL_STR>>>-manylinux1_x86_64.whl[rllib]
- ray || wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/<<<RAY_VERSION>>>/<<<RAY_COMMIT>>>/ray-<<<RAY_VERSION>>>-<<<WHEEL_STR>>>-manylinux1_x86_64.whl
- rllib || pip install -U ray-<<<RAY_VERSION>>>-<<<WHEEL_STR>>>-manylinux1_x86_64.whl[rllib]
- pip install tensorflow-gpu==1.12.0
- echo "sudo halt" | at now + 60 minutes
# Consider uncommenting these if you also want to run apt-get commands during setup
Expand Down
88 changes: 56 additions & 32 deletions ci/stress_tests/run_application_stress_tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
#!/usr/bin/env bash

# This script should be run as follows:
# ./run_application_stress_tests.sh <ray-version> <ray-commit>
# For example, <ray-version> might be 0.7.1
# and <ray-commit> might be bc3b6efdb6933d410563ee70f690855c05f25483. The commit
# should be the latest commit on the branch "releases/<ray-version>".

# This script runs all of the application tests.
# Currently includes an IMPALA stress test and a SGD stress test.
# on both Python 2.7 and 3.6.
Expand All @@ -10,26 +17,39 @@

# This script will exit with code 1 if the test did not run successfully.

# Show explicitly which commands are currently running. This should only be AFTER
# the private key is placed.
set -x

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
RESULT_FILE=$ROOT_DIR/"results-$(date '+%Y-%m-%d_%H-%M-%S').log"

echo "Logging to" $RESULT_FILE
echo -e $RAY_AWS_SSH_KEY > /root/.ssh/ray-autoscaler_us-west-2.pem && chmod 400 /root/.ssh/ray-autoscaler_us-west-2.pem || true
touch "$RESULT_FILE"
echo "Logging to" "$RESULT_FILE"

if [[ -z "$1" ]]; then
echo "ERROR: The first argument must be the Ray version string."
exit 1
else
RAY_VERSION=$1
fi

# Show explicitly which commands are currently running. This should only be AFTER
# the private key is placed.
set -x
if [[ -z "$2" ]]; then
echo "ERROR: The second argument must be the commit hash to test."
exit 1
else
RAY_COMMIT=$2
fi

touch $RESULT_FILE
echo "Testing ray==$RAY_VERSION at commit $RAY_COMMIT."
echo "The wheels used will live under https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_COMMIT/"

# This function identifies the right string for the Ray wheel.
_find_wheel_str(){
local python_version=$1
# echo "PYTHON_VERSION", $python_version
local wheel_str=""
if [ $python_version == "p27" ]; then
if [ "$python_version" == "p27" ]; then
wheel_str="cp27-cp27mu"
else
wheel_str="cp36-cp36m"
Expand All @@ -41,7 +61,7 @@ _find_wheel_str(){
# Actual test runtime is roughly 10 minutes.
test_impala(){
local PYTHON_VERSION=$1
local WHEEL_STR=$(_find_wheel_str $PYTHON_VERSION)
local WHEEL_STR=$(_find_wheel_str "$PYTHON_VERSION")

pushd "$ROOT_DIR"
local TEST_NAME="rllib_impala_$PYTHON_VERSION"
Expand All @@ -50,32 +70,34 @@ test_impala(){

cat application_cluster_template.yaml |
sed -e "
s/<<<RAY_VERSION>>>/$RAY_VERSION/g;
s/<<<RAY_COMMIT>>>/$RAY_COMMIT/;
s/<<<CLUSTER_NAME>>>/$TEST_NAME/;
s/<<<HEAD_TYPE>>>/g3.16xlarge/;
s/<<<HEAD_TYPE>>>/p3.16xlarge/;
s/<<<WORKER_TYPE>>>/m5.24xlarge/;
s/<<<MIN_WORKERS>>>/5/;
s/<<<MAX_WORKERS>>>/5/;
s/<<<PYTHON_VERSION>>>/$PYTHON_VERSION/;
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > $CLUSTER
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > "$CLUSTER"

echo "Try running IMPALA stress test."
{
RLLIB_DIR=../../python/ray/rllib/
ray --logging-level=DEBUG up -y $CLUSTER &&
ray rsync_up $CLUSTER $RLLIB_DIR/tuned_examples/ tuned_examples/ &&
ray --logging-level=DEBUG up -y "$CLUSTER" &&
ray rsync_up "$CLUSTER" $RLLIB_DIR/tuned_examples/ tuned_examples/ &&
sleep 1 &&
ray --logging-level=DEBUG exec $CLUSTER "rllib || true" &&
ray --logging-level=DEBUG exec $CLUSTER "
ray --logging-level=DEBUG exec "$CLUSTER" "rllib || true" &&
ray --logging-level=DEBUG exec "$CLUSTER" "
rllib train -f tuned_examples/atari-impala-large.yaml --redis-address='localhost:6379' --queue-trials" &&
echo "PASS: IMPALA Test for" $PYTHON_VERSION >> $RESULT_FILE
} || echo "FAIL: IMPALA Test for" $PYTHON_VERSION >> $RESULT_FILE
echo "PASS: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
} || echo "FAIL: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"

# Tear down cluster.
if [ "$DEBUG_MODE" = "" ]; then
ray down -y $CLUSTER
rm $CLUSTER
ray down -y "$CLUSTER"
rm "$CLUSTER"
else
echo "Not tearing down cluster" $CLUSTER
echo "Not tearing down cluster" "$CLUSTER"
fi
popd
}
Expand All @@ -93,32 +115,34 @@ test_sgd(){

cat application_cluster_template.yaml |
sed -e "
s/<<<RAY_VERSION>>>/$RAY_VERSION/g;
s/<<<RAY_COMMIT>>>/$RAY_COMMIT/;
s/<<<CLUSTER_NAME>>>/$TEST_NAME/;
s/<<<HEAD_TYPE>>>/g3.16xlarge/;
s/<<<WORKER_TYPE>>>/g3.16xlarge/;
s/<<<HEAD_TYPE>>>/p3.16xlarge/;
s/<<<WORKER_TYPE>>>/p3.16xlarge/;
s/<<<MIN_WORKERS>>>/3/;
s/<<<MAX_WORKERS>>>/3/;
s/<<<PYTHON_VERSION>>>/$PYTHON_VERSION/;
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > $CLUSTER
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > "$CLUSTER"

echo "Try running SGD stress test."
{
SGD_DIR=$ROOT_DIR/../../python/ray/experimental/sgd/
ray --logging-level=DEBUG up -y $CLUSTER &&
ray --logging-level=DEBUG up -y "$CLUSTER" &&
# TODO: fix submit so that args work
ray rsync_up $CLUSTER $SGD_DIR/mnist_example.py mnist_example.py &&
ray rsync_up "$CLUSTER" "$SGD_DIR/mnist_example.py" mnist_example.py &&
sleep 1 &&
ray --logging-level=DEBUG exec $CLUSTER "
ray --logging-level=DEBUG exec "$CLUSTER" "
python mnist_example.py --redis-address=localhost:6379 --num-iters=2000 --num-workers=8 --devices-per-worker=2 --gpu" &&
echo "PASS: SGD Test for" $PYTHON_VERSION >> $RESULT_FILE
} || echo "FAIL: SGD Test for" $PYTHON_VERSION >> $RESULT_FILE
echo "PASS: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
} || echo "FAIL: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"

# Tear down cluster.
if [ "$DEBUG_MODE" = "" ]; then
ray down -y $CLUSTER
rm $CLUSTER
ray down -y "$CLUSTER"
rm "$CLUSTER"
else
echo "Not tearing down cluster" $CLUSTER
echo "Not tearing down cluster" "$CLUSTER"
fi
popd
}
Expand All @@ -130,6 +154,6 @@ do
test_sgd $PYTHON_VERSION
done

cat $RESULT_FILE
cat $RESULT_FILE | grep FAIL > test.log
cat "$RESULT_FILE"
cat "$RESULT_FILE" | grep FAIL > test.log
[ ! -s test.log ] || exit 1
Loading

0 comments on commit 2e0eec9

Please sign in to comment.