diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index 14e6404d3de08..9ea15e7902f49 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -24,3 +24,21 @@ pushd python/manylinux1 git clone ../../ arrow docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 . docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh + +# Testing for https://issues.apache.org/jira/browse/ARROW-2657 +# These tests cannot be run inside of the docker container, since TensorFlow +# does not run on manylinux1 + +source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh + +source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh + +PYTHON_VERSION=3.6 +CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION + +conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION +source activate $CONDA_ENV_DIR + +pip install -q tensorflow +pip install "dist/`ls dist/ | grep cp36`" +python -c "import pyarrow; import tensorflow" diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 20254c2a84d98..dc045e6eab53f 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -44,6 +44,13 @@ def parse_version(root): __version__ = None +import pyarrow.compat as compat + + +# Workaround for https://issues.apache.org/jira/browse/ARROW-2657 +compat.import_tensorflow_extension() + + from pyarrow.lib import cpu_count, set_cpu_count from pyarrow.lib import (null, bool_, int8, int16, int32, int64, diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 1b19ca0e4029b..1fcaf4c59c477 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -160,6 +160,48 @@ def encode_file_path(path): # will convert utf8 to utf16 return encoded_path +def import_tensorflow_extension(): + """ + Load the TensorFlow extension if it exists. + + This is used to load the TensorFlow extension before + pyarrow.lib. If we don't do this there are symbol clashes + between TensorFlow's use of threading and our global + thread pool, see also + https://issues.apache.org/jira/browse/ARROW-2657 and + https://github.com/apache/arrow/pull/2096. + """ + import os + import site + tensorflow_loaded = False + + # Try to load the tensorflow extension directly + # This is a performance optimization, tensorflow will always be + # loaded via the "import tensorflow" statement below if this + # doesn't succeed. + try: + site_paths = site.getsitepackages() + [site.getusersitepackages()] + except AttributeError: + # Workaround for https://github.com/pypa/virtualenv/issues/228, + # this happends in some configurations of virtualenv + site_paths = [os.path.dirname(site.__file__) + '/site-packages'] + for site_path in site_paths: + ext = os.path.join(site_path, "tensorflow", + "libtensorflow_framework.so") + if os.path.exists(ext): + import ctypes + ctypes.CDLL(ext) + tensorflow_loaded = True + break + + # If the above failed, try to load tensorflow the normal way + # (this is more expensive) + if not tensorflow_loaded: + try: + import tensorflow + except ImportError: + pass + integer_types = six.integer_types + (np.integer,)