From 4a43f0d748b860888e40c76bf10a8c320d15725d Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 3 Sep 2015 08:42:14 +0800 Subject: [PATCH 01/15] merging the code, not compiled --- dmlc-core | 2 +- include/mxnet/io.h | 16 ++++++++ src/common/utils.h | 5 +++ src/io/inst_vector.h | 92 ++++++++++++++++++++------------------------ src/io/io.cc | 9 ++++- src/io/iter_mnist.cc | 4 +- 6 files changed, 73 insertions(+), 55 deletions(-) diff --git a/dmlc-core b/dmlc-core index db6ec995f148..7d3c78428819 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit db6ec995f148e1922da40fc53d23ed4fb583056f +Subproject commit 7d3c78428819dc84c4da8ae1f302ba6c6a235a5d diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 47a59eec54fe..5a8267befc1c 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -109,5 +109,21 @@ struct DataIteratorReg } \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ .set_body(__create__ ## DataIteratorType ## __) +/*! + * \brief Macro to register chained Iterators + * + * \code + * // example of registering a imagerec iterator + * MXNET_REGISTER_IO_CHAINED_ITERATOR(ImageRec, ImageRecordIter, BatchIter) + * .describe("batched image record data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_CHAINED_ITER(name, ChainedDataIterType, HoldingDataIterType) \ + static ::mxnet::IIterator* __create__ ## ChainedDataIteratorType ## __() { \ + return new HoldingDataIteratorType(new ChainedDataIterType); \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## ChainedDataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/src/common/utils.h b/src/common/utils.h index cf1fd2f1bb36..f7a2dcce0470 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -22,6 +22,11 @@ namespace common { */ typedef std::mt19937 RANDOM_ENGINE; +// Get a double float, prnd is the pointer to a Random Engine +#define NextDouble(prnd) std::generate_canonical(*prnd) + +#define NextUInt32(range, prnd) static_cast(\ + floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. */ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 1ae734631680..9490ceab94c1 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -1,17 +1,19 @@ /*! - * Copyright (c) 2015 by Contributors - * \inst_vector.h + * \file inst_vector.h * \brief holder of a sequence of DataInst in CPU * that are not necessarily of same shape */ -#ifndef MXNET_IO_INST_VECTOR_H_ -#define MXNET_IO_INST_VECTOR_H_ + +#ifndef MXNET_INST_VECTOR_H_ +#define MXNET_INST_VECTOR_H_ + +#include "./data.h" +#include #include #include -#include -#include -#include "./data.h" + namespace mxnet { +namespace io { /*! * \brief tensor vector that can store sequence of tensor * in a memory compact way, tensors do not have to be of same shape @@ -28,7 +30,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + ((DType*)BeginPtr(content_) + offset_[i], shape_[i]); } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -49,7 +51,6 @@ class TensorVector { content_.clear(); shape_.clear(); } - private: // offset of the data content std::vector offset_; @@ -59,59 +60,48 @@ class TensorVector { std::vector > shape_; }; -/*! - * \brief tblob vector that can store sequence of tblob - * in a memory compact way, tblobs do not have to be of same shape - */ -template -class TBlobVector { - public: - TBlobVector(void) { - this->Clear(); - } - // get i-th tblob - inline TBlob operator[](size_t i) const; - // get the last tblob - inline TBlob Back(); - // return the size of the vector - inline size_t Size(void) const; - // push a tensor of certain shape - // return the reference of the pushed tensor - inline void Push(TShape shape_); - inline void Clear(void); - private: - // offset of the data content - std::vector offset_; - // data content - std::vector content_; - // shape of data - std::vector shape_; -}; - /*! * \brief instance vector that can holds * non-uniform shape data instance in a shape efficient way */ class InstVector { - public: + public: inline size_t Size(void) const { return index_.size(); } // instance - inline DataInst operator[](size_t i) const; + inline DataInst operator[](size_t i) const { + DataInst inst; + inst.index = index_[i]; + inst.data = data_[i]; + inst.label = label_[i]; + return inst; + } // get back of instance vector - inline DataInst Back() const; - // clear the container - inline void Clear(void); - // push the newly coming instance - inline void Push(unsigned index, TBlob data_); - - private: + inline DataInst Back() const { + return (*this)[Size() - 1]; + } + inline void Clear(void) { + index_.clear(); + data_.Clear(); + label_.Clear(); + } + inline void Push(unsigned index, + mshadow::Shape<3> dshape, + mshadow::Shape<1> lshape) { + index_.push_back(index); + data_.Push(dshape); + label_.Push(lshape); + } + + private: /*! \brief index of the data */ std::vector index_; + // label + TensorVector<3, real_t> data_; // data - std::vector > data_; - // extra data - std::vector extra_data_; + TensorVector<1, real_t> label_; }; -#endif // MXNET_IO_INST_VECTOR_H_ +} // namespace io +} // namespace mxnet +#endif // MXNET_TENSOR_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc index bd5b78dda643..9095f4089c92 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -4,7 +4,14 @@ #include #include +#include +#include <> +#include +// Registers namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); -} // namespace dmlc +// Register parameters in header files +DMLC_REGISTER_PARAMETER(BatchParam); +DMLC_REGISTER_PARAMETER(ImageAugmenterParam); +} // namespace dmlc \ No newline at end of file diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 93195061b278..77ac3a479f75 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -31,7 +31,7 @@ struct MNISTParam : public dmlc::Parameter { bool flat; /*! \brief random seed */ int seed; - // declare parameters in header file + // declare parameters DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(image).set_default("./train-images-idx3-ubyte") .describe("Mnist image path."); @@ -155,7 +155,7 @@ class MNISTIter: public IIterator { delete stdlabel; } inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param_.seed)); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { From 2251812d5dd693a608453c3b8faa2a2962568c28 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 3 Sep 2015 08:42:50 +0800 Subject: [PATCH 02/15] add image rec and associate files in --- src/io/image_augmenter.h | 262 ++++++++++++++++++++++++ src/io/image_recordio.h | 75 +++++++ src/io/iter_batch.h | 162 +++++++++++++++ src/io/iter_image_recordio.cc | 369 ++++++++++++++++++++++++++++++++++ src/utils/decoder.h | 128 ++++++++++++ src/utils/io.h | 175 ++++++++++++++++ src/utils/thread_buffer.h | 205 +++++++++++++++++++ 7 files changed, 1376 insertions(+) create mode 100644 src/io/image_augmenter.h create mode 100644 src/io/image_recordio.h create mode 100644 src/io/iter_batch.h create mode 100644 src/io/iter_image_recordio.cc create mode 100644 src/utils/decoder.h create mode 100644 src/utils/io.h create mode 100644 src/utils/thread_buffer.h diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h new file mode 100644 index 000000000000..d33464c4a889 --- /dev/null +++ b/src/io/image_augmenter.h @@ -0,0 +1,262 @@ +/*! + * \file image_augmenter_opencv.hpp + * \brief threaded version of page iterator + * \author Naiyan Wang, Tianqi Chen + */ +#ifndef MXNET_IO_IMAGE_AUGMENTER_H_ +#define MXNET_IO_IMAGE_AUGMENTER_H_ + +#include +#include "../common/utils.h" + +namespace mxnet { +namespace io { +/*! \brief image augmentation parameters*/ +struct ImageAugmentParam : public dmlc::Parameter { + /*! \brief whether we do random cropping */ + bool rand_crop_; + /*! \brief whether we do nonrandom croping */ + int crop_y_start_; + /*! \brief whether we do nonrandom croping */ + int crop_x_start_; + /*! \brief Indicate the max ratation angle for augmentation, we will random rotate */ + /*! \brief [-max_rotate_angle, max_rotate_angle] */ + int max_rotate_angle_; + /*! \brief max aspect ratio */ + float max_aspect_ratio_; + /*! \brief random shear the image [-max_shear_ratio, max_shear_ratio] */ + float max_shear_ratio_; + /*! \brief max crop size */ + int max_crop_size_; + /*! \brief min crop size */ + int min_crop_size_; + /*! \brief max scale ratio */ + float max_random_scale_; + /*! \brief min scale_ratio */ + float min_random_scale_; + /*! \brief min image size */ + float min_img_size_; + /*! \brief max image size */ + float max_img_size_; + /*! \brief whether to mirror the image */ + bool mirror_; + /*! \brief rotate angle */ + int rotate_; + /*! \brief filled color while padding */ + int fill_value_; + // declare parameters + // TODO: didn't understand the range for some params + DMLC_DECLARE_PARAMETER(ImageAugmentParam) { + DMLC_DECLARE_FIELD(rand_crop_).set_default(true) + .describe("Whether we de random cropping"); + DMLC_DECLARE_FIELD(crop_y_start_).set_default(-1) + .describe("Where to nonrandom crop on y"); + DMLC_DECLARE_FIELD(crop_x_start_).set_default(-1) + .describe("Where to nonrandom crop on x"); + DMLC_DECLARE_FIELD(max_rotate_angle_).set_default(0.0f) + .describe("Rotate can be [-max_rotate_angle, max_rotate_angle]"); + DMLC_DECLARE_FIELD(max_aspect_ratio_).set_default(0.0f) + .describe("Max aspect ratio"); + DMLC_DECLARE_FIELD(max_shear_ratio_).set_default(0.0f) + .describe("Shear rotate can be made between [-max_shear_ratio_, max_shear_ratio_]"); + DMLC_DECLARE_FIELD(max_crop_size_).set_default(-1) + .describe("Maximum crop size"); + DMLC_DECLARE_FIELD(min_crop_size_).set_default(-1) + .describe("Minimum crop size"); + DMLC_DECLARE_FIELD(max_random_scale_).set_default(1.0f) + .describe("Maxmum scale ratio"); + DMLC_DECLARE_FIELD(min_random_scale_).set_default(1.0f) + .describe("Minimum scale ratio"); + DMLC_DECLARE_FIELD(max_img_size_).set_default(1e10f) + .describe("Maxmum image size"); + DMLC_DECLARE_FIELD(min_img_size_).set_default(0.0f) + .describe("Minimum image size"); + DMLC_DECLARE_FIELD(mirror_).set_default(false) + .describe("Whether to mirror the image"); + DMLC_DECLARE_FIELD(rotate_).set_default(-1.0f) + .describe("Rotate angle"); + DMLC_DECLARE_FIELD(fill_value_).set_default(255) + .describe("Filled value while padding"); +}; + +/*! \brief helper class to do image augmentation */ +class ImageAugmenter { + public: + // contructor + ImageAugmenter(void) + : tmpres(false), + rotateM(2, 3, CV_32F) { + } + virtual ~ImageAugmenter() { + } + // TODO: Hack the shape and rotate list, didn't use param + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + kwargs_left = param_.InitAllowUnknown(kwargs); + for (size_t i = 0; i < kwargs_left.size(); i++) { + if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { + CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[0], &shape_[1], &shape_[2]) == 3) + << "input_shape must be three consecutive integers without space example: 1,1,200 "; + } + if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { + char* val = kwargs_left[i].second.c_str(); + const char *end = val + strlen(val); + char buf[128]; + while (val < end) { + sscanf(val, "%[^,]", buf); + val += strlen(buf) + 1; + rotate_list_.push_back(atoi(buf)); + } + } + } + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual cv::Mat Process(const cv::Mat &src, + common::RANDOM_ENGINE *prnd) { + // shear + float s = common::NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + // rotate + int angle = common::NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; + if (param_.rotate_ > 0) angle = param_.rotate_; + if (rotate_list_.size() > 0) { + angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; + } + float a = cos(angle / 180.0 * M_PI); + float b = sin(angle / 180.0 * M_PI); + // scale + float scale = NextDouble(prnd) * (param_.max_random_scale_ - param_.min_random_scale_) + param_.min_random_scale_; + // aspect ratio + float ratio = NextDouble(prnd) * param_.max_aspect_ratio_ * 2 - param_.max_aspect_ratio_ + 1; + float hs = 2 * scale / (1 + ratio); + float ws = ratio * hs; + // new width and height + float new_width = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.cols)); + float new_height = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.rows)); + //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); + cv::Mat M(2, 3, CV_32F); + M.at(0, 0) = hs * a - s * b * ws; + M.at(1, 0) = -b * ws; + M.at(0, 1) = hs * b + s * a * ws; + M.at(1, 1) = a * ws; + float ori_center_width = M.at(0, 0) * src.cols + M.at(0, 1) * src.rows; + float ori_center_height = M.at(1, 0) * src.cols + M.at(1, 1) * src.rows; + M.at(0, 2) = (new_width - ori_center_width) / 2; + M.at(1, 2) = (new_height - ori_center_height) / 2; + cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), + cv::INTER_LINEAR, + cv::BORDER_CONSTANT, + cv::Scalar(param_.fill_value_, param_.fill_value_, param_.fill_value_)); + cv::Mat res = temp; + if (param_.max_crop_size_ != -1 || param_.min_crop_size_ != -1){ + CHECK(res.cols >= param_.max_crop_size_ && res.rows >= param_.max_crop_size_&& param_.max_crop_size_ >= param_.min_crop_size_) + << "input image size smaller than max_crop_size"; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; + mshadow::index_t y = res.rows - rand_crop_size; + mshadow::index_t x = res.cols - rand_crop_size; + if (rand_crop_ != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } + else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, rand_crop_size, rand_crop_size); + cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); + } + else{ + utils::Check(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2], + "input image size smaller than input shape"); + mshadow::index_t y = res.rows - shape_[2]; + mshadow::index_t x = res.cols - shape_[1]; + if (param_.rand_crop_ != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } + else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, shape_[1], shape_[2]); + res = res(roi); + } + return res; + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual mshadow::Tensor Process(mshadow::Tensor data, + common::RANDOM_ENGINE *prnd) { + if (!NeedProcess()) return data; + cv::Mat res(data.size(1), data.size(2), CV_8UC3); + for (index_t i = 0; i < data.size(1); ++i) { + for (index_t j = 0; j < data.size(2); ++j) { + res.at(i, j)[0] = data[2][i][j]; + res.at(i, j)[1] = data[1][i][j]; + res.at(i, j)[2] = data[0][i][j]; + } + } + res = this->Process(res, prnd); + tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < tmpres.size(1); ++i) { + for (index_t j = 0; j < tmpres.size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + tmpres[0][i][j] = bgr[2]; + tmpres[1][i][j] = bgr[1]; + tmpres[2][i][j] = bgr[0]; + } + } + return tmpres; + } + + virtual void Process(unsigned char *dptr, size_t sz, + mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + cv::Mat buf(1, sz, CV_8U, dptr); + cv::Mat res = cv::imdecode(buf, 1); + res = this->Process(res, prnd); + p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < p_data->size(1); ++i) { + for (index_t j = 0; j < p_data->size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + (*p_data)[0][i][j] = bgr[2]; + (*p_data)[1][i][j] = bgr[1]; + (*p_data)[2][i][j] = bgr[0]; + } + } + res.release(); + } + + private: + // whether skip processing + inline bool NeedProcess(void) const { + if (max_rotate_angle_ > 0 || max_shear_ratio_ > 0.0f + || rotate_ > 0 || rotate_list_.size() > 0) return true; + if (min_crop_size_ > 0 && max_crop_size_ > 0) return true; + return false; + } + // temp input space + mshadow::TensorContainer tmpres; + // temporal space + cv::Mat temp0, temp, temp2; + // rotation param + cv::Mat rotateM; + // parameters + /*! \brief input shape */ + mshadow::Shape<4> shape_; + /*! \brief list of possible rotate angle */ + std::vector rotate_list_; +}; +} // namespace io +} // namespace cxxnet +#endif diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h new file mode 100644 index 000000000000..4aea8aabcb47 --- /dev/null +++ b/src/io/image_recordio.h @@ -0,0 +1,75 @@ +/*! + * \file image_recordio.h + * \brief image recordio struct + */ +#ifndef MXNET_IO_IMAGE_RECORDIO_H_ +#define MXNET_IO_IMAGE_RECORDIO_H_ + +#include +#include + +namespace mxnet { +namespace io { +/*! \brief image recordio struct */ +struct ImageRecordIO { + /*! \brief header in image recordio */ + struct Header { + /*! + * \brief flag of the header, + * used for future extension purposes + */ + uint32_t flag; + /*! + * \brief label field that returns label of images + * when image list was not presented, + * + * NOTE: user do not need to repack recordio just to + * change label field, just supply a list file that + * maps image id to new labels + */ + float label; + /*! + * \brief unique image index + * image_id[1] is always set to 0, + * reserved for future purposes for 128bit id + * image_id[0] is used to store image id + */ + uint64_t image_id[2]; + }; + /*! \brief header of image recordio */ + Header header; + /*! \brief pointer to data content */ + uint8_t *content; + /*! \brief size of the content */ + size_t content_size; + /*! \brief constructor */ + ImageRecordIO(void) + : content(NULL), content_size(0) { + memset(&header, 0, sizeof(header)); + } + /*! \brief get image id from record */ + inline uint64_t image_index(void) const { + return header.image_id[0]; + } + /*! + * \brief load header from a record content + * \param buf the head of record + * \param size the size of the entire record + */ + inline void Load(void *buf, size_t size) { + CHECK(size >= sizeof(header)); + std::memcpy(&header, buf, sizeof(header)); + content = reinterpret_cast(buf) + sizeof(header); + content_size = size - sizeof(header); + } + /*! + * \brief save the record header + */ + inline void SaveHeader(std::string *blob) const { + blob->resize(sizeof(header)); + std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); + } +}; +} // namespace io +} // namespace mxnet +#endif // MXNET_IO_IMAGE_RECORDIO_H_ diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h new file mode 100644 index 000000000000..a0e4ab7e7ba5 --- /dev/null +++ b/src/io/iter_batch.h @@ -0,0 +1,162 @@ +/*! + * \file iter_batch_proc-inl.hpp + * \brief definition of preprocessing iterators that takes an iterator and do some preprocessing + * \author Tianqi Chen + */ +#ifndef MXNET_IO_ITER_BATCH_H_ +#define MXNET_IO_ITER_BATCH_H_ + +#include +#include +#include +#include + +namespace mxnet { +namespace io { +// Batch parameters +struct BatchParam : public dmlc::Parameter { + /*! \brief label width */ + index_t batch_size_; + /*! \brief label width */ + index_t label_width_; + /*! \brief use round roubin to handle overflow batch */ + bool round_batch_; + /*! \brief skip read */ + bool test_skipread_; + /*! \brief silent */ + bool silent_; + // declare parameters + DMLC_DECLARE_PARAMETER(BatchParam) { + DMLC_DECLARE_FIELD(batch_size_).set_default(1) + .describe("Batch size."); + DMLC_DECLARE_FIELD(label_width_).set_default(1) + .describe("Label width."); + DMLC_DECLARE_FIELD(round_batch_).set_default(false) + .describe("Use round robin to handle overflow batch."); + DMLC_DECLARE_FIELD(test_skipread_).set_default(false) + .describe("Skip read for testing."); + DMLC_DECLARE_FIELD(silent_).set_default(false) + .describe("Whether to print batch information.") + } +}; + +/*! \brief create a batch iterator from single instance iterator */ +class BatchAdaptIter: public IIterator { +public: + BatchAdaptIter(IIterator *base): base_(base) { + num_overflow_ = 0; + } + virtual ~BatchAdaptIter(void) { + delete base_; + out_.FreeSpaceDense(); + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init batch param, it could have similar param with + kwargs_left = param_.InitAllowUnknown(kwargs); + for (size_t i = 0; i < kwargs_left.size(); i++) { + if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { + CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) + << "input_shape must be three consecutive integers without space example: 1,1,200 ") + } + } + // init base iterator + base_->Init(kwargs); + mshadow::Shape<4> tshape = shape_; + tshape[0] = param_.batch_size_; + AllocSpaceDense(false); + } + virtual void BeforeFirst(void) { + if (param_.round_batch_ == 0 || num_overflow_ == 0) { + // otherise, we already called before first + base_->BeforeFirst(); + } else { + num_overflow_ = 0; + } + head_ = 1; + } + virtual bool Next(void) { + out_.num_batch_padd = 0; + + // skip read if in head version + if (param_.test_skipread_ != 0 && head_ == 0) return true; + else this->head_ = 0; + + // if overflow from previous round, directly return false, until before first is called + if (num_overflow_ != 0) return false; + index_t top = 0; + + while (base_->Next()) { + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + + if (++ top >= param_.batch_size_) { + out.data[0] = TBlob(data); + out.data[1] = TBlob(label); + return true; + } + } + if (top != 0) { + if (param_.round_batch_ != 0) { + num_overflow_ = 0; + base_->BeforeFirst(); + for (; top < param_.batch_size_; ++top, ++num_overflow_) { + CHECK(base_->Next()) << "number of input must be bigger than batch size"; + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + } + out_.num_batch_padd = num_overflow_; + } else { + out_.num_batch_padd = batch_size_ - top; + } + out.data[0] = TBlob(data); + out.data[1] = TBlob(label); + return true; + } + return false; + } + virtual const DataBatch &Value(void) const { + CHECK(head_ == 0) << "must call Next to get value"; + return out_; + } +private: + /*! \brief base iterator */ + IIterator *base_; + /*! \brief input shape */ + mshadow::Shape<4> shape_; + /*! \brief output data */ + DataBatch out_; + /*! \brief on first */ + int head_; + /*! \brief number of overflow instances that readed in round_batch mode */ + int num_overflow_; + /*! \brief label information of the data*/ + mshadow::Tensor label; + /*! \brief content of dense data, if this DataBatch is dense */ + mshadow::Tensor data; + // Functions that allocate and free tensor space + inline void AllocSpaceDense(bool pad = false) { + data = mshadow::NewTensor(shape_, 0.0f, pad); + mshadow::Shape<2> lshape = mshadow::Shape2(batch_size, label_width); + label = mshadow::NewTensor(lshape, 0.0f, pad); + out_.inst_index = new unsigned[batch_size]; + out_.batch_size = batch_size; + out_.data.resize(2); + } + /*! \brief auxiliary function to free space, if needed, dense only */ + inline void FreeSpaceDense(void) { + if (label.dptr_ != NULL) { + delete [] inst_index; + mshadow::FreeSpace(&label); + mshadow::FreeSpace(&data); + label.dptr_ = NULL; + } + } +}; // class BatchAdaptIter +} // namespace io +} // namespace cxxnet +#endif // MXNET_IO_ITER_BATCH_H_ \ No newline at end of file diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc new file mode 100644 index 000000000000..2ab1aa8958cb --- /dev/null +++ b/src/io/iter_image_recordio.cc @@ -0,0 +1,369 @@ +/*! + * \file iter_image_recordio-inl.hpp + * \brief recordio data +iterator + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "./inst_vector.h" +#include "./image_recordio.h" +#include "./image_augmenter.h" +#include "../utils/decoder.h" +namespace mxnet { +namespace io { +/*! \brief data structure to hold labels for images */ +class ImageLabelMap { + public: + /*! + * \brief initialize the label list into memory + * \param path_imglist path to the image list + * \param label_width predefined label_width + */ + explicit ImageLabelMap(const char *path_imglist, + mshadow::index_t label_width, + bool silent) { + label_width_ = label_width; + image_index_.clear(); + label_.clear(); + idx2label_.clear(); + dmlc::InputSplit *fi = dmlc::InputSplit::Create + (path_imglist, 0, 1, "text"); + dmlc::InputSplit::Blob rec; + while (fi->NextRecord(&rec)) { + // quick manual parsing + char *p = reinterpret_cast(rec.dptr); + char *end = p + rec.size; + // skip space + while (isspace(*p) && p != end) ++p; + image_index_.push_back(static_cast(atol(p))); + for (size_t i = 0; i < label_width_; ++i) { + // skip till space + while (!isspace(*p) && p != end) ++p; + // skip space + while (isspace(*p) && p != end) ++p; + CHECK(p != end) << "Bad ImageList format"; + label_.push_back(static_cast(atof(p))); + } + } + delete fi; + // be careful not to resize label_ afterwards + idx2label_.reserve(image_index_.size()); + for (size_t i = 0; i < image_index_.size(); ++i) { + idx2label_[image_index_[i]] = BeginPtr(label_) + i * label_width_; + } + if (!silent) { + LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' + << image_index_.size() << " Image records"; + } + } + /*! \brief find a label for corresponding index */ + inline mshadow::Tensor Find(size_t imid) const { + std::unordered_map::const_iterator it + = idx2label_.find(imid); + CHECK(it != idx2label_.end()) << "fail to find imagelabel for id " << imid; + return mshadow::Tensor(it->second, mshadow::Shape1(label_width_)); + } + + private: + // label with_ + mshadow::index_t label_width_; + // image index of each record + std::vector image_index_; + // real label content + std::vector label_; + // map index to label + std::unordered_map idx2label_; +}; + +// Define image record parser parameters +struct ImageRecParserParam : public dmlc::Parameter { + /*! \brief path to image list */ + std::string path_imglist_; + /*! \brief path to image recordio */ + std::string path_imgrec_; + /*! \brief number of threads */ + int nthread_; + /*! \brief whether to remain silent */ + bool silent_; + /*! \brief number of distributed worker */ + int dist_num_worker_, dist_worker_rank_; + /*! \brief label-width */ + int label_width_; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecParserParam) { + DMLC_DECLARE_FIELD(path_imglist_).set_default("") + .describe("Path to image list."); + DMLC_DECLARE_FIELD(path_imagrec_).set_default("./data/imgrec.rec") + .describe("Path to image record file."); + DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) + .describe("Number of thread to do parsing."); + DMLC_DECLARE_FIELD(label_width_).set_lower_bound(1).set_default(1) + .describe("How many labels for an image."); + DMLC_DECLARE_FIELD(silent_).set_default(false) + .describe("Whether to output parser information."); + DMLC_DECLARE_FIELD(dist_num_worker_).set_lower_bound(1).set_default(1) + .describe("Dist worker number."); + DMLC_DECLARE_FIELD(dist_worker_rank_).set_default(0) + .describe("Dist worker rank."); + } +}; + +// parser to parse image recordio +class ImageRecordIOParser { + public: + ImageRecordIOParser(void) + : source_(NULL), + label_map_(NULL) { + } + ~ImageRecordIOParser(void) { + // can be NULL + delete label_map_; + delete source_; + for (size_t i = 0; i < augmenters_.size(); ++i) { + delete augmenters_[i]; + } + for (size_t i = 0; i < prnds_.size(); ++i) { + delete prnds_[i]; + } + } + // initialize the parser + inline void Init(const std::vector >& kwargs); + + // set record to the head + inline void BeforeFirst(void) { + return source_->BeforeFirst(); + } + // parse next set of records, return an array of + // instance vector to the user + inline bool ParseNext(std::vector *out); + private: + // magic nyumber to see prng + static const int kRandMagic = 111; + /*! \brief parameters */ + ImageRecParserParam param_; + /*! \brief augmenters */ + std::vector augmenters_; + /*! \brief random samplers */ + std::vector prnds_; + /*! \brief data source */ + dmlc::InputSplit *source_; + /*! \brief label information, if any */ + ImageLabelMap *label_map_; +}; + +inline void ImageRecordIOParser::Init(const std::vector >& kwargs) { + // initialize parameter + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + int maxthread, threadget; + #pragma omp parallel + { + maxthread = std::max(omp_get_num_procs() / 2 - 1, 1); + } + param_.nthread_ = std::min(maxthread, param_.nthread_); + #pragma omp parallel num_threads(param_.nthread_) + { + threadget = omp_get_num_threads(); + } + param_.nthread_ = threadget; + // setup decoders + for (int i = 0; i < threadget; ++i) { + augmenters_.push_back(new ImageAugmenter()); + augmenters_[i].init(kwargs_left); + prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); + } + + // handling for hadoop + // TODO, hack + const char *ps_rank = getenv("PS_RANK"); + if (ps_rank != NULL) { + param_.dist_worker_rank = atoi(ps_rank); + } + + if (param_.path_imglist_.length() != 0) { + label_map_ = new ImageLabelMap(param_.path_imglist_.c_str(), + param_.label_width_, silent_ != 0); + } else { + param_.label_width_ = 1; + } + CHECK(path_imgrec_.length() != 0) + << "ImageRecordIOIterator: must specify image_rec"; +#if MSHADOW_DIST_PS + // TODO move to a better place + param_.dist_num_worker_ = ::ps::RankSize(); + param_.dist_worker_rank_ = ::ps::MyRank(); + LOG(INFO) << "rank " << param_.dist_worker_rank_ + << " in " << param_.dist_num_worker_; +#endif + source_ = dmlc::InputSplit::Create + (param_.path_imgrec_.c_str(), param_.dist_worker_rank_, + param_.dist_num_worker_, "recordio"); + // use 64 MB chunk when possible + source_->HintChunkSize(8 << 20UL); +} + +inline bool ImageRecordIOParser:: +ParseNext(std::vector *out_vec) { + CHECK(source_ != NULL); + dmlc::InputSplit::Blob chunk; + if (!source_->NextChunk(&chunk)) return false; + out_vec->resize(param_.nthread_); + #pragma omp parallel num_threads(param_.nthread_) + { + CHECK(omp_get_num_threads() == param_.nthread_); + int tid = omp_get_thread_num(); + dmlc::RecordIOChunkReader reader(chunk, tid, parser_.nthread_); + mxnet::ImageRecordIO rec; + dmlc::InputSplit::Blob blob; + // image data + InstVector &out = (*out_vec)[tid]; + out.Clear(); + while (reader.NextRecord(&blob)) { + // result holder + cv::Mat res; + rec.Load(blob.dptr, blob.size); + cv::Mat buf(1, rec.content_size, CV_8U, rec.content); + res = cv::imdecode(buf, 1); + res = augmenters_[tid]->Process(res, prnds_[tid]); + out.Push(static_cast(rec.image_index()), + mshadow::Shape3(3, res.rows, res.cols), + mshadow::Shape1(param_.label_width_)); + DataInst inst = out.Back(); + for (int i = 0; i < res.rows; ++i) { + for (int j = 0; j < res.cols; ++j) { + cv::Vec3b bgr = res.at(i, j); + inst.data[0][i][j] = bgr[2]; + inst.data[1][i][j] = bgr[1]; + inst.data[2][i][j] = bgr[0]; + } + } + if (label_map_ != NULL) { + mshadow::Copy(inst.label, label_map_->Find(rec.image_index())); + } else { + inst.label[0] = rec.header.label; + } + res.release(); + } + } + return true; +} + +// Define image record parameters +struct ImageRecordParam: public dmlc::Parameter { + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief random seed */ + int seed; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecordParam) { + DMLC_DECLARE_FIELD(shuffle).set_default(true) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(seed).set_default(0) + .describe("Random Seed."); + } +}; + +// iterator on image recordio +class ImageRecordIter : public IIterator { + public: + ImageRecordIter() + : data_(NULL) { + } + virtual ~ImageRecordIter(void) { + iter_.Destroy(); + // data can be NULL + delete data_; + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + // use the left kwarg to init parser + parser_.Init(kwargs_left); + // init thread iter + iter_.set_max_capacity(4); + iter_.Init([this](std::vector **dptr) { + if (*dptr == NULL) { + *dptr = new std::vector(); + } + return parser_.ParseNext(*dptr); + }, + [this]() { parser_.BeforeFirst(); }); + inst_ptr_ = 0; + } + virtual void BeforeFirst(void) { + iter_.BeforeFirst(); + inst_order_.clear(); + inst_ptr_ = 0; + } + virtual bool Next(void) { + while (true) { + if (inst_ptr_ < inst_order_.size()) { + std::pair p = inst_order_[inst_ptr_]; + out_ = (*data_)[p.first][p.second]; + ++inst_ptr_; + return true; + } else { + if (data_ != NULL) iter_.Recycle(&data_); + if (!iter_.Next(&data_)) return false; + inst_order_.clear(); + for (unsigned i = 0; i < data_->size(); ++i) { + const InstVector &tmp = (*data_)[i]; + for (unsigned j = 0; j < tmp.Size(); ++j) { + inst_order_.push_back(std::make_pair(i, j)); + } + } + // shuffle instance order if needed + if (shuffle_ != 0) { + std::shuffle(inst_order_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + } + inst_ptr_ = 0; + } + } + return false; + } + virtual const DataInst &Value(void) const { + return out_; + } + + private: + // random magic + static const int kRandMagic = 111; + // output instance + DataInst out_; + // whether shuffle data + int shuffle_; + // data ptr + size_t inst_ptr_; + // internal instance order + std::vector > inst_order_; + // data + std::vector *data_; + // internal parser + ImageRecordIOParser parser_; + // backend thread + dmlc::ThreadedIter > iter_; + // parameters + ImageRecParserParam param_; +}; +DMLC_REGISTER_PARAMETER(ImageRecParserParam); +DMLC_REGISTER_PARAMETER(ImageRecordParam); +MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) +MXNET_REGISTER_IO_CHAINED_ITER(ImageRecordIter, ImageRecordIter, BatchAdaptIter) + .describe("Create iterator for dataset packed in recordio.") + .add_arguments(ImageRecordParam::__FIELDS__()) + .add_arguments(ImageRecParserParam::__FIELDS__()) + .add_arguments(BatchParam::__FIELDS__()) + .add_arguments(ImageAugmenterParam::__FIELDS__()); +} // namespace io +} // namespace mxnet +#endif // ITER_IMAGE_RECORDIO_INL_HPP_ diff --git a/src/utils/decoder.h b/src/utils/decoder.h new file mode 100644 index 000000000000..17203392cc60 --- /dev/null +++ b/src/utils/decoder.h @@ -0,0 +1,128 @@ +#ifndef MXNET_UTILS_DECODER_H_ +#define MXNET_UTILS_DECODER_H_ + +#include +#if MXNET_USE_OPENCV_DECODER == 0 + #include + #include + #include +#endif +#include +#include +#if MXNET_USE_OPENCV + #include +#endif + +namespace cxxnet { +namespace utils { + +#if MXNET_USE_OPENCV_DECODER == 0 +struct JpegDecoder { +public: + JpegDecoder(void) { + cinfo.err = jpeg_std_error(&jerr.base); + jerr.base.error_exit = jerror_exit; + jerr.base.output_message = joutput_message; + jpeg_create_decompress(&cinfo); + } + // destructor + ~JpegDecoder(void) { + jpeg_destroy_decompress(&cinfo); + } + + inline void Decode(unsigned char *ptr, size_t sz, + mshadow::TensorContainer *p_data) { + if(setjmp(jerr.jmp)) { + jpeg_destroy_decompress(&cinfo); + dmlc::Error("Libjpeg fail to decode"); + } + this->jpeg_mem_src(&cinfo, ptr, sz); + CHECK(jpeg_read_header(&cinfo, TRUE) == JPEG_HEADER_OK) << "libjpeg: failed to decode"; + CHECK(jpeg_start_decompress(&cinfo) == true) << "libjpeg: failed to decode"; + p_data->Resize(mshadow::Shape3(cinfo.output_height, cinfo.output_width, cinfo.output_components)); + JSAMPROW jptr = &((*p_data)[0][0][0]); + while (cinfo.output_scanline < cinfo.output_height) { + CHECK(jpeg_read_scanlines(&cinfo, &jptr, 1) == true) << "libjpeg: failed to decode"; + jptr += cinfo.output_width * cinfo.output_components; + } + CHECK(jpeg_finish_decompress(&cinfo) == true) << "libjpeg: failed to decode"); + } +private: + struct jerror_mgr { + jpeg_error_mgr base; + jmp_buf jmp; + }; + + METHODDEF(void) jerror_exit(j_common_ptr jinfo) { + jerror_mgr* err = (jerror_mgr*)jinfo->err; + longjmp(err->jmp, 1); + } + + METHODDEF(void) joutput_message(j_common_ptr) {} + + static boolean mem_fill_input_buffer_ (j_decompress_ptr cinfo) { + dmlc::Error("JpegDecoder: bad jpeg image"); + return true; + } + + static void mem_skip_input_data_ (j_decompress_ptr cinfo, long num_bytes_) { + jpeg_source_mgr *src = cinfo->src; + size_t num_bytes = static_cast(num_bytes_); + if (num_bytes > 0) { + src->next_input_byte += num_bytes; + CHECK(src->bytes_in_buffer >= num_bytes) << "fail to decode"; + src->bytes_in_buffer -= num_bytes; + } else { + dmlc::Error("JpegDecoder: bad jpeg image"); + + } + } + + static void mem_term_source_ (j_decompress_ptr cinfo) {} + static void mem_init_source_ (j_decompress_ptr cinfo) {} + static boolean jpeg_resync_to_restart_(j_decompress_ptr cinfo, int desired) { + dmlc::Error("JpegDecoder: bad jpeg image"); + return true; + } + void jpeg_mem_src (j_decompress_ptr cinfo, void* buffer, long nbytes) { + src.init_source = mem_init_source_; + src.fill_input_buffer = mem_fill_input_buffer_; + src.skip_input_data = mem_skip_input_data_; + src.resync_to_restart = jpeg_resync_to_restart_; + src.term_source = mem_term_source_; + src.bytes_in_buffer = nbytes; + src.next_input_byte = static_cast(buffer); + cinfo->src = &src; + } + +private: + jpeg_decompress_struct cinfo; + jpeg_source_mgr src; + jerror_mgr jerr; +}; +#endif + +#if MXNET_USE_OPENCV +struct OpenCVDecoder { + void Decode(unsigned char *ptr, size_t sz, mshadow::TensorContainer *p_data) { + cv::Mat buf(1, sz, CV_8U, ptr); + cv::Mat res = cv::imdecode(buf, 1); + CHECK(res.data != NULL) << "decoding fail"; + p_data->Resize(mshadow::Shape3(res.rows, res.cols, 3)); + for (int y = 0; y < res.rows; ++y) { + for (int x = 0; x < res.cols; ++x) { + cv::Vec3b bgr = res.at(y, x); + // store in RGB order + (*p_data)[y][x][2] = bgr[0]; + (*p_data)[y][x][1] = bgr[1]; + (*p_data)[y][x][0] = bgr[2]; + } + } + res.release(); + } +}; +#endif +} // namespace utils +} // namespace mxnet + +#endif // DECODER_H diff --git a/src/utils/io.h b/src/utils/io.h new file mode 100644 index 000000000000..3781ce98b012 --- /dev/null +++ b/src/utils/io.h @@ -0,0 +1,175 @@ +#ifndef CXXNET_UTILS_IO_H_ +#define CXXNET_UTILS_IO_H_ +/*! + * \file io.h + * \brief definition of abstract stream interface for IO + * \author Bing Xu Tianqi Chen + */ +#include "./utils.h" +#include +#include +#include +#include + +namespace cxxnet { +namespace utils { +typedef dmlc::Stream IStream; +typedef dmlc::SeekStream ISeekStream; + +/*! \brief a in memory buffer that can be read and write as stream interface */ +struct MemoryBufferStream : public ISeekStream { + public: + MemoryBufferStream(std::string *p_buffer) + : p_buffer_(p_buffer) { + curr_ptr_ = 0; + } + virtual ~MemoryBufferStream(void) {} + virtual size_t Read(void *ptr, size_t size) { + CHECK(curr_ptr_ <= p_buffer_->length()) + << " read can not have position excceed buffer length"; + size_t nread = std::min(p_buffer_->length() - curr_ptr_, size); + if (nread != 0) memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread); + curr_ptr_ += nread; + return nread; + } + virtual void Write(const void *ptr, size_t size) { + if (size == 0) return; + if (curr_ptr_ + size > p_buffer_->length()) { + p_buffer_->resize(curr_ptr_+size); + } + memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); + curr_ptr_ += size; + } + virtual void Seek(size_t pos) { + curr_ptr_ = static_cast(pos); + } + virtual size_t Tell(void) { + return curr_ptr_; + } + + private: + /*! \brief in memory buffer */ + std::string *p_buffer_; + /*! \brief current pointer */ + size_t curr_ptr_; +}; // class MemoryBufferStream + +/*! \brief implementation of file i/o stream */ +class StdFile: public ISeekStream { + public: + /*! \brief constructor */ + StdFile(const char *fname, const char *mode) { + Open(fname, mode); + } + StdFile() {} + virtual ~StdFile(void) { + this->Close(); + } + virtual void Open(const char *fname, const char *mode) { + fp_ = utils::FopenCheck(fname, mode); + fseek(fp_, 0L, SEEK_END); + sz_ = ftell(fp_); + fseek(fp_, 0L, SEEK_SET); + } + virtual size_t Read(void *ptr, size_t size) { + return fread(ptr, size, 1, fp_); + } + virtual void Write(const void *ptr, size_t size) { + fwrite(ptr, size, 1, fp_); + } + virtual void Seek(size_t pos) { + fseek(fp_, pos, SEEK_SET); + } + virtual size_t Tell(void) { + return static_cast(ftell(fp_)); + } + inline void Close(void) { + if (fp_ != NULL){ + fclose(fp_); fp_ = NULL; + } + } + inline size_t Size() { + return sz_; + } + private: + FILE *fp_; + size_t sz_; +}; // class StdFile + +/*! \brief Basic page class */ +class BinaryPage { + public: + /*! \brief page size 64 MB */ + static const size_t kPageSize = 64 << 18; + public: + /*! \brief memory data object */ + struct Obj{ + /*! \brief pointer to the data*/ + void *dptr; + /*! \brief size */ + size_t sz; + Obj(void * dptr, size_t sz) : dptr(dptr), sz(sz){} + }; + public: + /*! \brief constructor of page */ + BinaryPage(void) { + data_ = new int[kPageSize]; + utils::Check(data_ != NULL, "fail to allocate page, out of space"); + this->Clear(); + }; + ~BinaryPage() { + if (data_) delete [] data_; + } + /*! + * \brief load one page form instream + * \return true if loading is successful + */ + inline bool Load(utils::IStream &fi) { + return fi.Read(&data_[0], sizeof(int)*kPageSize) !=0; + } + /*! \brief save one page into outstream */ + inline void Save(utils::IStream &fo) { + fo.Write(&data_[0], sizeof(int)*kPageSize); + } + /*! \return number of elements */ + inline int Size(void){ + return data_[0]; + } + /*! \brief Push one binary object into page + * \param fname file name of obj need to be pushed into + * \return false or true to push into + */ + inline bool Push(const Obj &dat) { + if(this->FreeBytes() < dat.sz + sizeof(int)) return false; + data_[ Size() + 2 ] = data_[ Size() + 1 ] + dat.sz; + memcpy(this->offset(data_[ Size() + 2 ]), dat.dptr, dat.sz); + ++ data_[0]; + return true; + } + /*! \brief Clear the page */ + inline void Clear(void) { + memset(&data_[0], 0, sizeof(int) * kPageSize); + } + /*! + * \brief Get one binary object from page + * \param r r th obj in the page + */ + inline Obj operator[](int r) { + CHECK(r < Size()); + return Obj(this->offset(data_[ r + 2 ]), data_[ r + 2 ] - data_[ r + 1 ]); + } + private: + /*! \return number of elements */ + inline size_t FreeBytes(void) { + return (kPageSize - (Size() + 2)) * sizeof(int) - data_[ Size() + 1 ]; + } + inline void* offset(int pos) { + return (char*)(&data_[0]) + (kPageSize*sizeof(int) - pos); + } + private: + //int data_[ kPageSize ]; + int *data_; +}; // class BinaryPage +} // namespace utils +} // namespace cxxnet +#endif diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h new file mode 100644 index 000000000000..7df1ae17aa56 --- /dev/null +++ b/src/utils/thread_buffer.h @@ -0,0 +1,205 @@ +#ifndef CXXNET_UTILS_THREAD_BUFFER_H_ +#define CXXNET_UTILS_THREAD_BUFFER_H_ +/*! + * \file thread_buffer.h + * \brief multi-thread buffer, iterator, can be used to create parallel pipeline + * \author Tianqi Chen + */ +#include +#include +#include +#include "./utils.h" +#include "./thread.h" +namespace cxxnet { +namespace utils { +/*! + * \brief buffered loading iterator that uses multithread + * this template method will assume the following paramters + * \tparam Elem elememt type to be buffered + * \tparam ElemFactory factory type to implement in order to use thread buffer + */ +template +class ThreadBuffer { + public: + /*!\brief constructor */ + ThreadBuffer(void) { + this->init_end = false; + this->buf_size = 30; + } + ~ThreadBuffer(void) { + if(init_end) this->Destroy(); + } + /*!\brief set parameter, will also pass the parameter to factory */ + inline void SetParam(const char *name, const char *val) { + if (!strcmp( name, "buffer_size")) buf_size = atoi(val); + factory.SetParam(name, val); + } + /*! + * \brief initalize the buffered iterator + * \param param a initialize parameter that will pass to factory, ignore it if not necessary + * \return false if the initlization can't be done, e.g. buffer file hasn't been created + */ + inline bool Init(void) { + if (!factory.Init()) return false; + bufA.reserve(buf_size); + bufB.reserve(buf_size); + for (int i = 0; i < buf_size; ++i) { + bufA.push_back(factory.Create()); + bufB.push_back(factory.Create()); + } + this->init_end = true; + this->StartLoader(); + return true; + } + /*!\brief place the iterator before first value */ + inline void BeforeFirst(void) { + // wait till last loader end + loading_end.Wait(); + // critcal zone + current_buf = 1; + factory.BeforeFirst(); + // reset terminate limit + endA = endB = buf_size; + // wake up loader for first part + loading_need.Post(); + // wait til first part is loaded + loading_end.Wait(); + // set current buf to right value + current_buf = 0; + // wake loader for next part + data_loaded = false; + loading_need.Post(); + // set buffer value + buf_index = 0; + } + /*! \brief destroy the buffer iterator, will deallocate the buffer */ + inline void Destroy(void) { + // wait until the signal is consumed + this->destroy_signal = true; + loading_need.Post(); + loader_thread.Join(); + loading_need.Destroy(); + loading_end.Destroy(); + for (size_t i = 0; i < bufA.size(); ++i) { + factory.FreeSpace(bufA[i]); + } + for (size_t i = 0; i < bufB.size(); ++i) { + factory.FreeSpace(bufB[i]); + } + bufA.clear(); bufB.clear(); + factory.Destroy(); + this->init_end = false; + } + /*! + * \brief get the next element needed in buffer + * \param elem element to store into + * \return whether reaches end of data + */ + inline bool Next(Elem &elem) { + // end of buffer try to switch + if (buf_index == buf_size) { + this->SwitchBuffer(); + buf_index = 0; + } + if (buf_index >= (current_buf ? endA : endB)) { + return false; + } + std::vector &buf = current_buf ? bufA : bufB; + elem = buf[buf_index]; + ++buf_index; + return true; + } + /*! + * \brief get the factory object + */ + inline ElemFactory &get_factory(void) { + return factory; + } + inline const ElemFactory &get_factory(void) const{ + return factory; + } + // size of buffer + int buf_size; + private: + // factory object used to load configures + ElemFactory factory; + // index in current buffer + int buf_index; + // indicate which one is current buffer + int current_buf; + // max limit of visit, also marks termination + int endA, endB; + // double buffer, one is accessed by loader + // the other is accessed by consumer + // buffer of the data + std::vector bufA, bufB; + // initialization end + bool init_end; + // singal whether the data is loaded + bool data_loaded; + // signal to kill the thread + bool destroy_signal; + // thread object + Thread loader_thread; + // signal of the buffer + Semaphore loading_end, loading_need; + /*! + * \brief slave thread + * this implementation is like producer-consumer style + */ + inline void RunLoader(void) { + while(!destroy_signal) { + // sleep until loading is needed + loading_need.Wait(); + std::vector &buf = current_buf ? bufB : bufA; + int i; + for (i = 0; i < buf_size ; ++i) { + if (!factory.LoadNext(buf[i])) { + int &end = current_buf ? endB : endA; + end = i; // marks the termination + break; + } + } + // signal that loading is done + data_loaded = true; + loading_end.Post(); + } + } + /*!\brief entry point of loader thread */ + inline static CXXNET_THREAD_PREFIX LoaderEntry(void *pthread) { + static_cast< ThreadBuffer* >(pthread)->RunLoader(); + ThreadExit(NULL); + return NULL; + } + /*!\brief start loader thread */ + inline void StartLoader(void) { + destroy_signal = false; + // set param + current_buf = 1; + loading_need.Init(1); + loading_end .Init(0); + // reset terminate limit + endA = endB = buf_size; + loader_thread.Start(LoaderEntry, this); + // wait until first part of data is loaded + loading_end.Wait(); + // set current buf to right value + current_buf = 0; + // wake loader for next part + data_loaded = false; + loading_need.Post(); + buf_index = 0; + } + /*!\brief switch double buffer */ + inline void SwitchBuffer(void) { + loading_end.Wait(); + // loader shall be sleep now, critcal zone! + current_buf = !current_buf; + // wake up loader + data_loaded = false; + loading_need.Post(); + } +}; +} // namespace utils +} // namespace cxxnet +#endif From 38931a69c8b47477de35c549a992b99d79eab866 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Fri, 4 Sep 2015 01:15:31 +0800 Subject: [PATCH 03/15] pass compilation, not tested --- Makefile | 23 +++- include/mxnet/io.h | 2 +- make/config.mk | 4 +- src/common/utils.h | 5 +- src/io/image_augmenter.h | 21 ++-- src/io/inst_vector.h | 11 +- src/io/io.cc | 14 ++- src/io/iter_batch.h | 36 +++--- src/io/iter_image_recordio.cc | 38 ++++--- src/utils/decoder.h | 2 +- src/utils/io.h | 175 ----------------------------- src/utils/thread_buffer.h | 205 ---------------------------------- tests/python/test_io.py | 15 +++ 13 files changed, 106 insertions(+), 445 deletions(-) delete mode 100644 src/utils/io.h delete mode 100644 src/utils/thread_buffer.h diff --git a/Makefile b/Makefile index bdebed0b5ae6..2d980ad74785 100644 --- a/Makefile +++ b/Makefile @@ -16,10 +16,15 @@ ifndef RABIT RABIT = rabit endif +ifneq ($(USE_OPENMP_ITER), 1) + export NO_OPENMP = 1 +endif + # use customized config file include $(config) include mshadow/make/mshadow.mk include $(DMLC_CORE)/make/dmlc.mk +unexport NO_OPENMP # all tge possible warning tread WARNFLAGS= -Wall @@ -42,10 +47,21 @@ endif # setup opencv ifeq ($(USE_OPENCV),1) - CFLAGS+= -DCXXNET_USE_OPENCV=1 + CFLAGS+= -DMXNET_USE_OPENCV=1 LDFLAGS+= `pkg-config --libs opencv` else - CFLAGS+= -DCXXNET_USE_OPENCV=0 + CFLAGS+= -DMXNET_USE_OPENCV=0 +endif + +# setup opencv +ifeq ($(USE_OPENCV_DECODER),1) + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=1 +else + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=0 +endif + +ifeq ($(USE_OPENMP_ITER), 1) + CFLAGS += -fopenmp endif ifeq ($(USE_CUDNN), 1) @@ -64,7 +80,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter_mnist.o +OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter_mnist.o iter_image_recordio.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -107,6 +123,7 @@ reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc iter_mnist.o: src/io/iter_mnist.cc +iter_image_recordio.o: src/io/iter_image_recordio.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 5a8267befc1c..7bb86f4eece3 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -121,7 +121,7 @@ struct DataIteratorReg */ #define MXNET_REGISTER_IO_CHAINED_ITER(name, ChainedDataIterType, HoldingDataIterType) \ static ::mxnet::IIterator* __create__ ## ChainedDataIteratorType ## __() { \ - return new HoldingDataIteratorType(new ChainedDataIterType); \ + return new HoldingDataIterType(new ChainedDataIterType); \ } \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ .set_body(__create__ ## ChainedDataIteratorType ## __) diff --git a/make/config.mk b/make/config.mk index cd04b146180c..3e93e240e493 100644 --- a/make/config.mk +++ b/make/config.mk @@ -27,8 +27,8 @@ USE_CUDA_PATH = NONE # whether use opencv during compilation # you can disable it, however, you will not able to use # imbin iterator -USE_OPENCV = 0 -USE_OPENCV_DECODER = 0 +USE_OPENCV = 1 +USE_OPENCV_DECODER = 1 # whether use CUDNN R3 library USE_CUDNN = 0 # add the path to CUDNN libary to link and compile flag diff --git a/src/common/utils.h b/src/common/utils.h index f7a2dcce0470..b5edb78bd6f9 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -21,12 +21,11 @@ namespace common { * \brief Random Engine */ typedef std::mt19937 RANDOM_ENGINE; - // Get a double float, prnd is the pointer to a Random Engine #define NextDouble(prnd) std::generate_canonical(*prnd) +// Get a random int in [0, range) +#define NextUInt32(range, prnd) static_cast(floor(std::generate_canonical(*prnd) * range)) -#define NextUInt32(range, prnd) static_cast(\ - floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. */ diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index d33464c4a889..3ca373d768b0 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -19,7 +19,6 @@ struct ImageAugmentParam : public dmlc::Parameter { int crop_y_start_; /*! \brief whether we do nonrandom croping */ int crop_x_start_; - /*! \brief Indicate the max ratation angle for augmentation, we will random rotate */ /*! \brief [-max_rotate_angle, max_rotate_angle] */ int max_rotate_angle_; /*! \brief max aspect ratio */ @@ -77,6 +76,7 @@ struct ImageAugmentParam : public dmlc::Parameter { .describe("Rotate angle"); DMLC_DECLARE_FIELD(fill_value_).set_default(255) .describe("Filled value while padding"); + } }; /*! \brief helper class to do image augmentation */ @@ -99,7 +99,7 @@ class ImageAugmenter { << "input_shape must be three consecutive integers without space example: 1,1,200 "; } if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { - char* val = kwargs_left[i].second.c_str(); + const char* val = kwargs_left[i].second.c_str(); const char *end = val + strlen(val); char buf[128]; while (val < end) { @@ -121,9 +121,9 @@ class ImageAugmenter { virtual cv::Mat Process(const cv::Mat &src, common::RANDOM_ENGINE *prnd) { // shear - float s = common::NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + float s = NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; // rotate - int angle = common::NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; + int angle = NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; if (param_.rotate_ > 0) angle = param_.rotate_; if (rotate_list_.size() > 0) { angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; @@ -160,7 +160,7 @@ class ImageAugmenter { mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; - if (rand_crop_ != 0) { + if (param_.rand_crop_ != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); } @@ -171,8 +171,8 @@ class ImageAugmenter { cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); } else{ - utils::Check(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2], - "input image size smaller than input shape"); + CHECK(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2]) + << "input image size smaller than input shape"; mshadow::index_t y = res.rows - shape_[2]; mshadow::index_t x = res.cols - shape_[1]; if (param_.rand_crop_ != 0) { @@ -240,9 +240,9 @@ class ImageAugmenter { private: // whether skip processing inline bool NeedProcess(void) const { - if (max_rotate_angle_ > 0 || max_shear_ratio_ > 0.0f - || rotate_ > 0 || rotate_list_.size() > 0) return true; - if (min_crop_size_ > 0 && max_crop_size_ > 0) return true; + if (param_.max_rotate_angle_ > 0 || param_.max_shear_ratio_ > 0.0f + || param_.rotate_ > 0 || rotate_list_.size() > 0) return true; + if (param_.min_crop_size_ > 0 && param_.max_crop_size_ > 0) return true; return false; } // temp input space @@ -252,6 +252,7 @@ class ImageAugmenter { // rotation param cv::Mat rotateM; // parameters + ImageAugmentParam param_; /*! \brief input shape */ mshadow::Shape<4> shape_; /*! \brief list of possible rotate angle */ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 9490ceab94c1..4ced7dd64c63 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -7,10 +7,11 @@ #ifndef MXNET_INST_VECTOR_H_ #define MXNET_INST_VECTOR_H_ -#include "./data.h" -#include +#include +#include #include #include +#include namespace mxnet { namespace io { @@ -30,7 +31,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - ((DType*)BeginPtr(content_) + offset_[i], shape_[i]); + ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -73,8 +74,8 @@ class InstVector { inline DataInst operator[](size_t i) const { DataInst inst; inst.index = index_[i]; - inst.data = data_[i]; - inst.label = label_[i]; + inst.data.push_back(TBlob(data_[i])); + inst.data.push_back(TBlob(label_[i])); return inst; } // get back of instance vector diff --git a/src/io/io.cc b/src/io/io.cc index 9095f4089c92..b2dbc9f8c2c5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -4,14 +4,18 @@ #include #include -#include -#include <> -#include +#include "./image_augmenter.h" +#include "./iter_batch.h" // Registers namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + +namespace mxnet { +namespace io { // Register parameters in header files DMLC_REGISTER_PARAMETER(BatchParam); -DMLC_REGISTER_PARAMETER(ImageAugmenterParam); -} // namespace dmlc \ No newline at end of file +DMLC_REGISTER_PARAMETER(ImageAugmentParam); +} // namespace mxnet +} // namespace io diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index a0e4ab7e7ba5..f258bc2d6afd 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -36,7 +36,7 @@ struct BatchParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(test_skipread_).set_default(false) .describe("Skip read for testing."); DMLC_DECLARE_FIELD(silent_).set_default(false) - .describe("Whether to print batch information.") + .describe("Whether to print batch information."); } }; @@ -48,7 +48,7 @@ class BatchAdaptIter: public IIterator { } virtual ~BatchAdaptIter(void) { delete base_; - out_.FreeSpaceDense(); + FreeSpaceDense(); } virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; @@ -57,7 +57,7 @@ class BatchAdaptIter: public IIterator { for (size_t i = 0; i < kwargs_left.size(); i++) { if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 ") + << "input_shape must be three consecutive integers without space example: 1,1,200 "; } } // init base iterator @@ -88,13 +88,13 @@ class BatchAdaptIter: public IIterator { while (base_->Next()) { const DataInst& d = base_->Value(); - mshadow::Copy(label[top], d.data[1].get()); + mshadow::Copy(label[top], d.data[1].get()); out_.inst_index[top] = d.index; - mshadow::Copy(data[top], d.data[0].get()); + mshadow::Copy(data[top], d.data[0].get()); if (++ top >= param_.batch_size_) { - out.data[0] = TBlob(data); - out.data[1] = TBlob(label); + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); return true; } } @@ -105,16 +105,16 @@ class BatchAdaptIter: public IIterator { for (; top < param_.batch_size_; ++top, ++num_overflow_) { CHECK(base_->Next()) << "number of input must be bigger than batch size"; const DataInst& d = base_->Value(); - mshadow::Copy(label[top], d.data[1].get()); + mshadow::Copy(label[top], d.data[1].get()); out_.inst_index[top] = d.index; - mshadow::Copy(data[top], d.data[0].get()); + mshadow::Copy(data[top], d.data[0].get()); } out_.num_batch_padd = num_overflow_; } else { - out_.num_batch_padd = batch_size_ - top; + out_.num_batch_padd = param_.batch_size_ - top; } - out.data[0] = TBlob(data); - out.data[1] = TBlob(label); + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); return true; } return false; @@ -124,6 +124,8 @@ class BatchAdaptIter: public IIterator { return out_; } private: + /*! \brief batch parameters */ + BatchParam param_; /*! \brief base iterator */ IIterator *base_; /*! \brief input shape */ @@ -141,16 +143,16 @@ class BatchAdaptIter: public IIterator { // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { data = mshadow::NewTensor(shape_, 0.0f, pad); - mshadow::Shape<2> lshape = mshadow::Shape2(batch_size, label_width); + mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size_, param_.label_width_); label = mshadow::NewTensor(lshape, 0.0f, pad); - out_.inst_index = new unsigned[batch_size]; - out_.batch_size = batch_size; + out_.inst_index = new unsigned[param_.batch_size_]; + out_.batch_size = param_.batch_size_; out_.data.resize(2); } /*! \brief auxiliary function to free space, if needed, dense only */ inline void FreeSpaceDense(void) { if (label.dptr_ != NULL) { - delete [] inst_index; + delete [] out_.inst_index; mshadow::FreeSpace(&label); mshadow::FreeSpace(&data); label.dptr_ = NULL; @@ -159,4 +161,4 @@ class BatchAdaptIter: public IIterator { }; // class BatchAdaptIter } // namespace io } // namespace cxxnet -#endif // MXNET_IO_ITER_BATCH_H_ \ No newline at end of file +#endif // MXNET_IO_ITER_BATCH_H_ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 2ab1aa8958cb..9977ddd2290c 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -16,6 +16,7 @@ iterator #include "./inst_vector.h" #include "./image_recordio.h" #include "./image_augmenter.h" +#include "./iter_batch.h" #include "../utils/decoder.h" namespace mxnet { namespace io { @@ -57,7 +58,7 @@ class ImageLabelMap { // be careful not to resize label_ afterwards idx2label_.reserve(image_index_.size()); for (size_t i = 0; i < image_index_.size(); ++i) { - idx2label_[image_index_[i]] = BeginPtr(label_) + i * label_width_; + idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width_; } if (!silent) { LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' @@ -101,7 +102,7 @@ struct ImageRecParserParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(ImageRecParserParam) { DMLC_DECLARE_FIELD(path_imglist_).set_default("") .describe("Path to image list."); - DMLC_DECLARE_FIELD(path_imagrec_).set_default("./data/imgrec.rec") + DMLC_DECLARE_FIELD(path_imgrec_).set_default("./data/imgrec.rec") .describe("Path to image record file."); DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) .describe("Number of thread to do parsing."); @@ -178,7 +179,7 @@ inline void ImageRecordIOParser::Init(const std::vectorInit(kwargs_left); prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); } @@ -186,16 +187,16 @@ inline void ImageRecordIOParser::Init(const std::vector *out_vec) { { CHECK(omp_get_num_threads() == param_.nthread_); int tid = omp_get_thread_num(); - dmlc::RecordIOChunkReader reader(chunk, tid, parser_.nthread_); - mxnet::ImageRecordIO rec; + dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread_); + ImageRecordIO rec; dmlc::InputSplit::Blob blob; // image data InstVector &out = (*out_vec)[tid]; @@ -238,18 +239,21 @@ ParseNext(std::vector *out_vec) { mshadow::Shape3(3, res.rows, res.cols), mshadow::Shape1(param_.label_width_)); DataInst inst = out.Back(); + // turn datainst into tensor + mshadow::Tensor data = inst.data[0].get(); + mshadow::Tensor label = inst.data[1].get(); for (int i = 0; i < res.rows; ++i) { for (int j = 0; j < res.cols; ++j) { cv::Vec3b bgr = res.at(i, j); - inst.data[0][i][j] = bgr[2]; - inst.data[1][i][j] = bgr[1]; - inst.data[2][i][j] = bgr[0]; + data[0][i][j] = bgr[2]; + data[1][i][j] = bgr[1]; + data[2][i][j] = bgr[0]; } } if (label_map_ != NULL) { - mshadow::Copy(inst.label, label_map_->Find(rec.image_index())); + mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { - inst.label[0] = rec.header.label; + label[0] = rec.header.label; } res.release(); } @@ -324,7 +328,7 @@ class ImageRecordIter : public IIterator { } // shuffle instance order if needed if (shuffle_ != 0) { - std::shuffle(inst_order_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + std::shuffle(inst_order_.begin(), inst_order_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); } inst_ptr_ = 0; } @@ -353,17 +357,15 @@ class ImageRecordIter : public IIterator { // backend thread dmlc::ThreadedIter > iter_; // parameters - ImageRecParserParam param_; + ImageRecordParam param_; }; DMLC_REGISTER_PARAMETER(ImageRecParserParam); DMLC_REGISTER_PARAMETER(ImageRecordParam); -MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) MXNET_REGISTER_IO_CHAINED_ITER(ImageRecordIter, ImageRecordIter, BatchAdaptIter) .describe("Create iterator for dataset packed in recordio.") .add_arguments(ImageRecordParam::__FIELDS__()) .add_arguments(ImageRecParserParam::__FIELDS__()) .add_arguments(BatchParam::__FIELDS__()) - .add_arguments(ImageAugmenterParam::__FIELDS__()); + .add_arguments(ImageAugmentParam::__FIELDS__()); } // namespace io } // namespace mxnet -#endif // ITER_IMAGE_RECORDIO_INL_HPP_ diff --git a/src/utils/decoder.h b/src/utils/decoder.h index 17203392cc60..52db01edee23 100644 --- a/src/utils/decoder.h +++ b/src/utils/decoder.h @@ -13,7 +13,7 @@ #include #endif -namespace cxxnet { +namespace mxnet { namespace utils { #if MXNET_USE_OPENCV_DECODER == 0 diff --git a/src/utils/io.h b/src/utils/io.h deleted file mode 100644 index 3781ce98b012..000000000000 --- a/src/utils/io.h +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef CXXNET_UTILS_IO_H_ -#define CXXNET_UTILS_IO_H_ -/*! - * \file io.h - * \brief definition of abstract stream interface for IO - * \author Bing Xu Tianqi Chen - */ -#include "./utils.h" -#include -#include -#include -#include - -namespace cxxnet { -namespace utils { -typedef dmlc::Stream IStream; -typedef dmlc::SeekStream ISeekStream; - -/*! \brief a in memory buffer that can be read and write as stream interface */ -struct MemoryBufferStream : public ISeekStream { - public: - MemoryBufferStream(std::string *p_buffer) - : p_buffer_(p_buffer) { - curr_ptr_ = 0; - } - virtual ~MemoryBufferStream(void) {} - virtual size_t Read(void *ptr, size_t size) { - CHECK(curr_ptr_ <= p_buffer_->length()) - << " read can not have position excceed buffer length"; - size_t nread = std::min(p_buffer_->length() - curr_ptr_, size); - if (nread != 0) memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread); - curr_ptr_ += nread; - return nread; - } - virtual void Write(const void *ptr, size_t size) { - if (size == 0) return; - if (curr_ptr_ + size > p_buffer_->length()) { - p_buffer_->resize(curr_ptr_+size); - } - memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); - curr_ptr_ += size; - } - virtual void Seek(size_t pos) { - curr_ptr_ = static_cast(pos); - } - virtual size_t Tell(void) { - return curr_ptr_; - } - - private: - /*! \brief in memory buffer */ - std::string *p_buffer_; - /*! \brief current pointer */ - size_t curr_ptr_; -}; // class MemoryBufferStream - -/*! \brief implementation of file i/o stream */ -class StdFile: public ISeekStream { - public: - /*! \brief constructor */ - StdFile(const char *fname, const char *mode) { - Open(fname, mode); - } - StdFile() {} - virtual ~StdFile(void) { - this->Close(); - } - virtual void Open(const char *fname, const char *mode) { - fp_ = utils::FopenCheck(fname, mode); - fseek(fp_, 0L, SEEK_END); - sz_ = ftell(fp_); - fseek(fp_, 0L, SEEK_SET); - } - virtual size_t Read(void *ptr, size_t size) { - return fread(ptr, size, 1, fp_); - } - virtual void Write(const void *ptr, size_t size) { - fwrite(ptr, size, 1, fp_); - } - virtual void Seek(size_t pos) { - fseek(fp_, pos, SEEK_SET); - } - virtual size_t Tell(void) { - return static_cast(ftell(fp_)); - } - inline void Close(void) { - if (fp_ != NULL){ - fclose(fp_); fp_ = NULL; - } - } - inline size_t Size() { - return sz_; - } - private: - FILE *fp_; - size_t sz_; -}; // class StdFile - -/*! \brief Basic page class */ -class BinaryPage { - public: - /*! \brief page size 64 MB */ - static const size_t kPageSize = 64 << 18; - public: - /*! \brief memory data object */ - struct Obj{ - /*! \brief pointer to the data*/ - void *dptr; - /*! \brief size */ - size_t sz; - Obj(void * dptr, size_t sz) : dptr(dptr), sz(sz){} - }; - public: - /*! \brief constructor of page */ - BinaryPage(void) { - data_ = new int[kPageSize]; - utils::Check(data_ != NULL, "fail to allocate page, out of space"); - this->Clear(); - }; - ~BinaryPage() { - if (data_) delete [] data_; - } - /*! - * \brief load one page form instream - * \return true if loading is successful - */ - inline bool Load(utils::IStream &fi) { - return fi.Read(&data_[0], sizeof(int)*kPageSize) !=0; - } - /*! \brief save one page into outstream */ - inline void Save(utils::IStream &fo) { - fo.Write(&data_[0], sizeof(int)*kPageSize); - } - /*! \return number of elements */ - inline int Size(void){ - return data_[0]; - } - /*! \brief Push one binary object into page - * \param fname file name of obj need to be pushed into - * \return false or true to push into - */ - inline bool Push(const Obj &dat) { - if(this->FreeBytes() < dat.sz + sizeof(int)) return false; - data_[ Size() + 2 ] = data_[ Size() + 1 ] + dat.sz; - memcpy(this->offset(data_[ Size() + 2 ]), dat.dptr, dat.sz); - ++ data_[0]; - return true; - } - /*! \brief Clear the page */ - inline void Clear(void) { - memset(&data_[0], 0, sizeof(int) * kPageSize); - } - /*! - * \brief Get one binary object from page - * \param r r th obj in the page - */ - inline Obj operator[](int r) { - CHECK(r < Size()); - return Obj(this->offset(data_[ r + 2 ]), data_[ r + 2 ] - data_[ r + 1 ]); - } - private: - /*! \return number of elements */ - inline size_t FreeBytes(void) { - return (kPageSize - (Size() + 2)) * sizeof(int) - data_[ Size() + 1 ]; - } - inline void* offset(int pos) { - return (char*)(&data_[0]) + (kPageSize*sizeof(int) - pos); - } - private: - //int data_[ kPageSize ]; - int *data_; -}; // class BinaryPage -} // namespace utils -} // namespace cxxnet -#endif diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h deleted file mode 100644 index 7df1ae17aa56..000000000000 --- a/src/utils/thread_buffer.h +++ /dev/null @@ -1,205 +0,0 @@ -#ifndef CXXNET_UTILS_THREAD_BUFFER_H_ -#define CXXNET_UTILS_THREAD_BUFFER_H_ -/*! - * \file thread_buffer.h - * \brief multi-thread buffer, iterator, can be used to create parallel pipeline - * \author Tianqi Chen - */ -#include -#include -#include -#include "./utils.h" -#include "./thread.h" -namespace cxxnet { -namespace utils { -/*! - * \brief buffered loading iterator that uses multithread - * this template method will assume the following paramters - * \tparam Elem elememt type to be buffered - * \tparam ElemFactory factory type to implement in order to use thread buffer - */ -template -class ThreadBuffer { - public: - /*!\brief constructor */ - ThreadBuffer(void) { - this->init_end = false; - this->buf_size = 30; - } - ~ThreadBuffer(void) { - if(init_end) this->Destroy(); - } - /*!\brief set parameter, will also pass the parameter to factory */ - inline void SetParam(const char *name, const char *val) { - if (!strcmp( name, "buffer_size")) buf_size = atoi(val); - factory.SetParam(name, val); - } - /*! - * \brief initalize the buffered iterator - * \param param a initialize parameter that will pass to factory, ignore it if not necessary - * \return false if the initlization can't be done, e.g. buffer file hasn't been created - */ - inline bool Init(void) { - if (!factory.Init()) return false; - bufA.reserve(buf_size); - bufB.reserve(buf_size); - for (int i = 0; i < buf_size; ++i) { - bufA.push_back(factory.Create()); - bufB.push_back(factory.Create()); - } - this->init_end = true; - this->StartLoader(); - return true; - } - /*!\brief place the iterator before first value */ - inline void BeforeFirst(void) { - // wait till last loader end - loading_end.Wait(); - // critcal zone - current_buf = 1; - factory.BeforeFirst(); - // reset terminate limit - endA = endB = buf_size; - // wake up loader for first part - loading_need.Post(); - // wait til first part is loaded - loading_end.Wait(); - // set current buf to right value - current_buf = 0; - // wake loader for next part - data_loaded = false; - loading_need.Post(); - // set buffer value - buf_index = 0; - } - /*! \brief destroy the buffer iterator, will deallocate the buffer */ - inline void Destroy(void) { - // wait until the signal is consumed - this->destroy_signal = true; - loading_need.Post(); - loader_thread.Join(); - loading_need.Destroy(); - loading_end.Destroy(); - for (size_t i = 0; i < bufA.size(); ++i) { - factory.FreeSpace(bufA[i]); - } - for (size_t i = 0; i < bufB.size(); ++i) { - factory.FreeSpace(bufB[i]); - } - bufA.clear(); bufB.clear(); - factory.Destroy(); - this->init_end = false; - } - /*! - * \brief get the next element needed in buffer - * \param elem element to store into - * \return whether reaches end of data - */ - inline bool Next(Elem &elem) { - // end of buffer try to switch - if (buf_index == buf_size) { - this->SwitchBuffer(); - buf_index = 0; - } - if (buf_index >= (current_buf ? endA : endB)) { - return false; - } - std::vector &buf = current_buf ? bufA : bufB; - elem = buf[buf_index]; - ++buf_index; - return true; - } - /*! - * \brief get the factory object - */ - inline ElemFactory &get_factory(void) { - return factory; - } - inline const ElemFactory &get_factory(void) const{ - return factory; - } - // size of buffer - int buf_size; - private: - // factory object used to load configures - ElemFactory factory; - // index in current buffer - int buf_index; - // indicate which one is current buffer - int current_buf; - // max limit of visit, also marks termination - int endA, endB; - // double buffer, one is accessed by loader - // the other is accessed by consumer - // buffer of the data - std::vector bufA, bufB; - // initialization end - bool init_end; - // singal whether the data is loaded - bool data_loaded; - // signal to kill the thread - bool destroy_signal; - // thread object - Thread loader_thread; - // signal of the buffer - Semaphore loading_end, loading_need; - /*! - * \brief slave thread - * this implementation is like producer-consumer style - */ - inline void RunLoader(void) { - while(!destroy_signal) { - // sleep until loading is needed - loading_need.Wait(); - std::vector &buf = current_buf ? bufB : bufA; - int i; - for (i = 0; i < buf_size ; ++i) { - if (!factory.LoadNext(buf[i])) { - int &end = current_buf ? endB : endA; - end = i; // marks the termination - break; - } - } - // signal that loading is done - data_loaded = true; - loading_end.Post(); - } - } - /*!\brief entry point of loader thread */ - inline static CXXNET_THREAD_PREFIX LoaderEntry(void *pthread) { - static_cast< ThreadBuffer* >(pthread)->RunLoader(); - ThreadExit(NULL); - return NULL; - } - /*!\brief start loader thread */ - inline void StartLoader(void) { - destroy_signal = false; - // set param - current_buf = 1; - loading_need.Init(1); - loading_end .Init(0); - // reset terminate limit - endA = endB = buf_size; - loader_thread.Start(LoaderEntry, this); - // wait until first part of data is loaded - loading_end.Wait(); - // set current buf to right value - current_buf = 0; - // wake loader for next part - data_loaded = false; - loading_need.Post(); - buf_index = 0; - } - /*!\brief switch double buffer */ - inline void SwitchBuffer(void) { - loading_end.Wait(); - // loader shall be sleep now, critcal zone! - current_buf = !current_buf; - // wake up loader - data_loaded = false; - loading_need.Post(); - } -}; -} // namespace utils -} // namespace cxxnet -#endif diff --git a/tests/python/test_io.py b/tests/python/test_io.py index dfeb3f67c293..991a4813033e 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -39,3 +39,18 @@ def test_MNISTIter_reset(): label_1 = train_dataiter.getlabel().numpy.flatten() assert(sum(label_0 - label_1) == 0) +def test_ImageRecIter(): + dataiter = mx.io.ImageRecordIter(path_imgrec="data/val_cxxnet.rec", + image_mean="data/val_cxxnet_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape="3,224,224", + batch_size=128) + + + + + + + + From c1603d77168d62cccd760c974ae8bd251808b08a Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 6 Sep 2015 07:47:04 +0800 Subject: [PATCH 04/15] merge augmenter, modify param attribute --- src/io/image_augmenter.h | 279 +++++++++++++++++++++++++--------- src/io/iter_batch.h | 55 ++++--- src/io/iter_image_recordio.cc | 76 ++++----- 3 files changed, 275 insertions(+), 135 deletions(-) diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index 3ca373d768b0..a81e5297d5b3 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -1,7 +1,7 @@ /*! * \file image_augmenter_opencv.hpp * \brief threaded version of page iterator - * \author Naiyan Wang, Tianqi Chen + * \author Naiyan Wang, Tianqi Chen, Tianjun Xiao */ #ifndef MXNET_IO_IMAGE_AUGMENTER_H_ #define MXNET_IO_IMAGE_AUGMENTER_H_ @@ -14,68 +14,102 @@ namespace io { /*! \brief image augmentation parameters*/ struct ImageAugmentParam : public dmlc::Parameter { /*! \brief whether we do random cropping */ - bool rand_crop_; + bool rand_crop; /*! \brief whether we do nonrandom croping */ - int crop_y_start_; + int crop_y_start; /*! \brief whether we do nonrandom croping */ - int crop_x_start_; + int crop_x_start; /*! \brief [-max_rotate_angle, max_rotate_angle] */ - int max_rotate_angle_; + int max_rotate_angle; /*! \brief max aspect ratio */ - float max_aspect_ratio_; + float max_aspect_ratio; /*! \brief random shear the image [-max_shear_ratio, max_shear_ratio] */ - float max_shear_ratio_; + float max_shear_ratio; /*! \brief max crop size */ - int max_crop_size_; + int max_crop_size; /*! \brief min crop size */ - int min_crop_size_; + int min_crop_size; /*! \brief max scale ratio */ - float max_random_scale_; + float max_random_scale; /*! \brief min scale_ratio */ - float min_random_scale_; + float min_random_scale; /*! \brief min image size */ - float min_img_size_; + float min_img_size; /*! \brief max image size */ - float max_img_size_; - /*! \brief whether to mirror the image */ - bool mirror_; + float max_img_size; /*! \brief rotate angle */ - int rotate_; + int rotate; /*! \brief filled color while padding */ - int fill_value_; + int fill_value; + /*! \brief whether to mirror the image */ + bool mirror; + /*! \brief whether to perform rand mirror the image */ + bool rand_mirror; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief mean value for r channel */ + float mean_r; + /*! \brief mean value for g channel */ + float mean_g; + /*! \brief mean value for b channel */ + float mean_b; + /*! \brief shape of the image data*/ + TShape input_shape; + /*! \brief maximum ratio of contrast variation */ + float max_random_contrast_; + /*! \brief maximum value of illumination variation */ + float max_random_illumination_; // declare parameters // TODO: didn't understand the range for some params DMLC_DECLARE_PARAMETER(ImageAugmentParam) { DMLC_DECLARE_FIELD(rand_crop_).set_default(true) .describe("Whether we de random cropping"); - DMLC_DECLARE_FIELD(crop_y_start_).set_default(-1) + DMLC_DECLARE_FIELD(crop_y_start).set_default(-1) .describe("Where to nonrandom crop on y"); - DMLC_DECLARE_FIELD(crop_x_start_).set_default(-1) + DMLC_DECLARE_FIELD(crop_x_start).set_default(-1) .describe("Where to nonrandom crop on x"); - DMLC_DECLARE_FIELD(max_rotate_angle_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_rotate_angle).set_default(0.0f) .describe("Rotate can be [-max_rotate_angle, max_rotate_angle]"); - DMLC_DECLARE_FIELD(max_aspect_ratio_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_aspect_ratio).set_default(0.0f) .describe("Max aspect ratio"); - DMLC_DECLARE_FIELD(max_shear_ratio_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_shear_ratio).set_default(0.0f) .describe("Shear rotate can be made between [-max_shear_ratio_, max_shear_ratio_]"); - DMLC_DECLARE_FIELD(max_crop_size_).set_default(-1) + DMLC_DECLARE_FIELD(max_crop_size).set_default(-1) .describe("Maximum crop size"); - DMLC_DECLARE_FIELD(min_crop_size_).set_default(-1) + DMLC_DECLARE_FIELD(min_crop_size).set_default(-1) .describe("Minimum crop size"); - DMLC_DECLARE_FIELD(max_random_scale_).set_default(1.0f) + DMLC_DECLARE_FIELD(max_random_scale).set_default(1.0f) .describe("Maxmum scale ratio"); - DMLC_DECLARE_FIELD(min_random_scale_).set_default(1.0f) + DMLC_DECLARE_FIELD(min_random_scale).set_default(1.0f) .describe("Minimum scale ratio"); - DMLC_DECLARE_FIELD(max_img_size_).set_default(1e10f) + DMLC_DECLARE_FIELD(max_img_size).set_default(1e10f) .describe("Maxmum image size"); - DMLC_DECLARE_FIELD(min_img_size_).set_default(0.0f) + DMLC_DECLARE_FIELD(min_img_size).set_default(0.0f) .describe("Minimum image size"); - DMLC_DECLARE_FIELD(mirror_).set_default(false) - .describe("Whether to mirror the image"); - DMLC_DECLARE_FIELD(rotate_).set_default(-1.0f) + DMLC_DECLARE_FIELD(rotate).set_default(-1.0f) .describe("Rotate angle"); - DMLC_DECLARE_FIELD(fill_value_).set_default(255) + DMLC_DECLARE_FIELD(fill_value).set_default(255) .describe("Filled value while padding"); + DMLC_DECLARE_FIELD(mirror).set_default(false) + .describe("Whether to mirror the image"); + DMLC_DECLARE_FIELD(rand_mirror).set_default(false) + .describe("Whether to mirror the image randomly"); + DMLC_DECLARE_FIELD(mean_img).set_default("") + .describe("Mean Image to be subtracted"); + DMLC_DECLARE_FIELD(mean_r).set_default(0.0f) + .describe("Mean value on R channel"); + DMLC_DECLARE_FIELD(mean_g).set_default(0.0f) + .describe("Mean value on G channel"); + DMLC_DECLARE_FIELD(mean_b).set_default(0.0f) + .describe("Mean value on B channel"); + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(max_random_contrast).set_default(0.0f) + .describe("Maximum ratio of contrast variation"); + DMLC_DECLARE_FIELD(max_random_illumination).set_default(0.0f) + .describe("Maximum value of illumination variation"); } }; @@ -84,8 +118,8 @@ class ImageAugmenter { public: // contructor ImageAugmenter(void) - : tmpres(false), - rotateM(2, 3, CV_32F) { + : tmpres_(false), + rotateM_(2, 3, CV_32F) { } virtual ~ImageAugmenter() { } @@ -94,10 +128,6 @@ class ImageAugmenter { std::vector > kwargs_left; kwargs_left = param_.InitAllowUnknown(kwargs); for (size_t i = 0; i < kwargs_left.size(); i++) { - if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { - CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[0], &shape_[1], &shape_[2]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 "; - } if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { const char* val = kwargs_left[i].second.c_str(); const char *end = val + strlen(val); @@ -109,6 +139,19 @@ class ImageAugmenter { } } } + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + this->CreateMeanImg(); + } else { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } } /*! * \brief augment src image, store result into dst @@ -118,27 +161,27 @@ class ImageAugmenter { * \param source of random number * \param dst the pointer to the place where we want to store the result */ - virtual cv::Mat Process(const cv::Mat &src, + virtual cv::Mat OpencvProcess(const cv::Mat &src, common::RANDOM_ENGINE *prnd) { // shear - float s = NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + float s = NextDouble(prnd) * param_.max_shear_ratio * 2 - param_.max_shear_ratio; // rotate - int angle = NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; - if (param_.rotate_ > 0) angle = param_.rotate_; + int angle = NextUInt32(param_.max_rotate_angle * 2, prnd) - param_.max_rotate_angle; + if (param_.rotate > 0) angle = param_.rotate; if (rotate_list_.size() > 0) { angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; } float a = cos(angle / 180.0 * M_PI); float b = sin(angle / 180.0 * M_PI); // scale - float scale = NextDouble(prnd) * (param_.max_random_scale_ - param_.min_random_scale_) + param_.min_random_scale_; + float scale = NextDouble(prnd) * (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; // aspect ratio - float ratio = NextDouble(prnd) * param_.max_aspect_ratio_ * 2 - param_.max_aspect_ratio_ + 1; + float ratio = NextDouble(prnd) * param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; float hs = 2 * scale / (1 + ratio); float ws = ratio * hs; // new width and height - float new_width = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.cols)); - float new_height = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.rows)); + float new_width = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.cols)); + float new_height = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.rows)); //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); cv::Mat M(2, 3, CV_32F); M.at(0, 0) = hs * a - s * b * ws; @@ -152,15 +195,16 @@ class ImageAugmenter { cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), cv::INTER_LINEAR, cv::BORDER_CONSTANT, - cv::Scalar(param_.fill_value_, param_.fill_value_, param_.fill_value_)); + cv::Scalar(param_.fill_value, param_.fill_value, param_.fill_value)); cv::Mat res = temp; - if (param_.max_crop_size_ != -1 || param_.min_crop_size_ != -1){ - CHECK(res.cols >= param_.max_crop_size_ && res.rows >= param_.max_crop_size_&& param_.max_crop_size_ >= param_.min_crop_size_) + // crop + if (param_.max_crop_size != -1 || param_.min_crop_size != -1){ + CHECK(res.cols >= param_.max_crop_size && res.rows >= param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) << "input image size smaller than max_crop_size"; - mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size- param_.min_crop_size+1, prnd)+ param_.min_crop_size; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; - if (param_.rand_crop_ != 0) { + if (param_.rand_crop != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); } @@ -168,13 +212,13 @@ class ImageAugmenter { y /= 2; x /= 2; } cv::Rect roi(x, y, rand_crop_size, rand_crop_size); - cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); + cv::resize(res(roi), res, cv::Size(param_.input_shape[1], param_.input_shape[2])); } else{ - CHECK(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2]) + CHECK(static_cast(res.cols) >= param_.input_shape[1] && static_cast(res.rows) >= param_.input_shape[2]) << "input image size smaller than input shape"; - mshadow::index_t y = res.rows - shape_[2]; - mshadow::index_t x = res.cols - shape_[1]; + mshadow::index_t y = res.rows - param_.input_shape[2]; + mshadow::index_t x = res.cols - param_.input_shape[1]; if (param_.rand_crop_ != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); @@ -182,7 +226,7 @@ class ImageAugmenter { else { y /= 2; x /= 2; } - cv::Rect roi(x, y, shape_[1], shape_[2]); + cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); res = res(roi); } return res; @@ -195,9 +239,9 @@ class ImageAugmenter { * \param source of random number * \param dst the pointer to the place where we want to store the result */ - virtual mshadow::Tensor Process(mshadow::Tensor data, + virtual mshadow::Tensor OpencvProcess(mshadow::Tensor data, common::RANDOM_ENGINE *prnd) { - if (!NeedProcess()) return data; + if (!NeedOpencvProcess()) return data; cv::Mat res(data.size(1), data.size(2), CV_8UC3); for (index_t i = 0; i < data.size(1); ++i) { for (index_t j = 0; j < data.size(2); ++j) { @@ -206,7 +250,7 @@ class ImageAugmenter { res.at(i, j)[2] = data[0][i][j]; } } - res = this->Process(res, prnd); + res = this->OpencvProcess(res, prnd); tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < tmpres.size(1); ++i) { for (index_t j = 0; j < tmpres.size(2); ++j) { @@ -219,12 +263,12 @@ class ImageAugmenter { return tmpres; } - virtual void Process(unsigned char *dptr, size_t sz, + virtual void OpencvProcess(unsigned char *dptr, size_t sz, mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { cv::Mat buf(1, sz, CV_8U, dptr); cv::Mat res = cv::imdecode(buf, 1); - res = this->Process(res, prnd); + res = this->OpencvProcess(res, prnd); p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < p_data->size(1); ++i) { for (index_t j = 0; j < p_data->size(2); ++j) { @@ -237,20 +281,117 @@ class ImageAugmenter { res.release(); } + void TensorProcess(mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); + if (param_.input_shape[1] == 1) { + img_ = (*p_data) * param_.scale; + } else { + CHECK(p_data->size(1) >= param_.input_shape[1] && p_data->size(2) >= param_.input_shape[2]) + << "Data size must be bigger than the input size to net."; + mshadow::index_t yy = p_data->size(1) - param_.input_shape[1]; + mshadow::index_t xx = p_data->size(2) - param_.input_shape[2]; + if (param_.rand_crop != 0 && (yy != 0 || xx != 0)) { + yy = NextUInt32(yy + 1, prnd); + xx = NextUInt32(xx + 1, prnd); + } else { + yy /= 2; xx /= 2; + } + if (p_data->size(1) != param_.input_shape[1] && param_.crop_y_start != -1) { + yy = param_.crop_y_start; + } + if (p_data->size(2) != param_.input_shape[2] && param_.crop_x_start != -1) { + xx = param_.crop_x_start; + } + float contrast = NextDouble(prnd) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1; + float illumination = NextDouble(prnd) * param_.max_random_illumination * 2 - param_.max_random_illumination; + if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { + // substract mean value + (*p_data)[0] -= param_.mean_b; (*p_data)[1] -= param_.mean_g; (*p_data)[2] -= param_.mean_r; + if ((param_.rand_mirror != 0 && NextDouble(rnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale ; + } + } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { + // do not substract anything + if (param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) { + img_ = mirror(crop((*p_data), img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale ; + } + } else { + // substract mean image + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + if (p_data->shape_ == meanimg_.shape_) { + img_ = mirror(crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + } + } else { + if (p_data->shape_ == meanimg_.shape_){ + img_ = crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale; + } else { + img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + } + } + } + } + out_.data = img_; + } + + inline void CreateMeanImg(void) { + if (silent_ == 0) { + printf("cannot find %s: create mean image, this will take some time...\n", name_meanimg_.c_str()); + } + time_t start = time(NULL); + unsigned long elapsed = 0; + size_t imcnt = 1; + + CHECK(this->Next_()) << "input iterator failed."; + meanimg_.Resize(mshadow::Shape3(shape_[0], shape_[1], shape_[2])); + mshadow::Copy(meanimg_, img_); + while (this->Next()) { + meanimg_ += img_; imcnt += 1; + elapsed = (long)(time(NULL) - start); + if (imcnt % 1000 == 0 && silent_ == 0) { + printf("\r \r"); + printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); + fflush(stdout); + } + } + meanimg_ *= (1.0f / imcnt); + + dmlc::Stream *fo = dmlc::Stream::Create(name_meanimg_.c_str(), "w"); + meanimg_.SaveBinary(*fo); + delete fo; + if (silent_ == 0) { + printf("save mean image to %s..\n", name_meanimg_.c_str()); + } + meanfile_ready_ = true; + } + + private: - // whether skip processing - inline bool NeedProcess(void) const { - if (param_.max_rotate_angle_ > 0 || param_.max_shear_ratio_ > 0.0f - || param_.rotate_ > 0 || rotate_list_.size() > 0) return true; - if (param_.min_crop_size_ > 0 && param_.max_crop_size_ > 0) return true; + // whether skip opencv processing + inline bool NeedOpencvProcess(void) const { + if (param_.max_rotate_angle > 0 || param_.max_shear_ratio > 0.0f + || param_.rotate > 0 || rotate_list_.size() > 0) return true; + if (param_.min_crop_size > 0 && param_.max_crop_size > 0) return true; return false; } // temp input space - mshadow::TensorContainer tmpres; + mshadow::TensorContainer tmpres_; + // mean image + mshadow::TensorContainer meanimg_; + /*! \brief temp space */ + mshadow::TensorContainer img_; // temporal space - cv::Mat temp0, temp, temp2; + cv::Mat temp_; // rotation param - cv::Mat rotateM; + cv::Mat rotateM_; + // whether the mean file is ready + bool menafile_ready_; // parameters ImageAugmentParam param_; /*! \brief input shape */ diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index f258bc2d6afd..4d95b92cce1e 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -16,26 +16,33 @@ namespace io { // Batch parameters struct BatchParam : public dmlc::Parameter { /*! \brief label width */ - index_t batch_size_; + index_t batch_size; + /*! \brief input shape */ + // TODO: haven't modify all shape_ + TShape input_shape; /*! \brief label width */ - index_t label_width_; + index_t label_width; /*! \brief use round roubin to handle overflow batch */ - bool round_batch_; + bool round_batch; /*! \brief skip read */ - bool test_skipread_; + bool test_skipread; /*! \brief silent */ - bool silent_; + bool silent; // declare parameters DMLC_DECLARE_PARAMETER(BatchParam) { - DMLC_DECLARE_FIELD(batch_size_).set_default(1) + DMLC_DECLARE_FIELD(batch_size) .describe("Batch size."); - DMLC_DECLARE_FIELD(label_width_).set_default(1) + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(label_width).set_default(1) .describe("Label width."); - DMLC_DECLARE_FIELD(round_batch_).set_default(false) + DMLC_DECLARE_FIELD(round_batch).set_default(false) .describe("Use round robin to handle overflow batch."); - DMLC_DECLARE_FIELD(test_skipread_).set_default(false) + DMLC_DECLARE_FIELD(test_skipread).set_default(false) .describe("Skip read for testing."); - DMLC_DECLARE_FIELD(silent_).set_default(false) + DMLC_DECLARE_FIELD(silent).set_default(false) .describe("Whether to print batch information."); } }; @@ -54,20 +61,14 @@ class BatchAdaptIter: public IIterator { std::vector > kwargs_left; // init batch param, it could have similar param with kwargs_left = param_.InitAllowUnknown(kwargs); - for (size_t i = 0; i < kwargs_left.size(); i++) { - if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { - CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 "; - } - } // init base iterator base_->Init(kwargs); mshadow::Shape<4> tshape = shape_; - tshape[0] = param_.batch_size_; + tshape[0] = param_.batch_size; AllocSpaceDense(false); } virtual void BeforeFirst(void) { - if (param_.round_batch_ == 0 || num_overflow_ == 0) { + if (param_.round_batch == 0 || num_overflow_ == 0) { // otherise, we already called before first base_->BeforeFirst(); } else { @@ -79,7 +80,7 @@ class BatchAdaptIter: public IIterator { out_.num_batch_padd = 0; // skip read if in head version - if (param_.test_skipread_ != 0 && head_ == 0) return true; + if (param_.test_skipread != 0 && head_ == 0) return true; else this->head_ = 0; // if overflow from previous round, directly return false, until before first is called @@ -92,17 +93,17 @@ class BatchAdaptIter: public IIterator { out_.inst_index[top] = d.index; mshadow::Copy(data[top], d.data[0].get()); - if (++ top >= param_.batch_size_) { + if (++ top >= param_.batch_size) { out_.data[0] = TBlob(data); out_.data[1] = TBlob(label); return true; } } if (top != 0) { - if (param_.round_batch_ != 0) { + if (param_.round_batch != 0) { num_overflow_ = 0; base_->BeforeFirst(); - for (; top < param_.batch_size_; ++top, ++num_overflow_) { + for (; top < param_.batch_size; ++top, ++num_overflow_) { CHECK(base_->Next()) << "number of input must be bigger than batch size"; const DataInst& d = base_->Value(); mshadow::Copy(label[top], d.data[1].get()); @@ -111,7 +112,7 @@ class BatchAdaptIter: public IIterator { } out_.num_batch_padd = num_overflow_; } else { - out_.num_batch_padd = param_.batch_size_ - top; + out_.num_batch_padd = param_.batch_size - top; } out_.data[0] = TBlob(data); out_.data[1] = TBlob(label); @@ -128,8 +129,6 @@ class BatchAdaptIter: public IIterator { BatchParam param_; /*! \brief base iterator */ IIterator *base_; - /*! \brief input shape */ - mshadow::Shape<4> shape_; /*! \brief output data */ DataBatch out_; /*! \brief on first */ @@ -143,10 +142,10 @@ class BatchAdaptIter: public IIterator { // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { data = mshadow::NewTensor(shape_, 0.0f, pad); - mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size_, param_.label_width_); + mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); - out_.inst_index = new unsigned[param_.batch_size_]; - out_.batch_size = param_.batch_size_; + out_.inst_index = new unsigned[param_.batch_size]; + out_.batch_size = param_.batch_size; out_.data.resize(2); } /*! \brief auxiliary function to free space, if needed, dense only */ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 9977ddd2290c..1589fd5ad6c7 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -31,7 +31,7 @@ class ImageLabelMap { explicit ImageLabelMap(const char *path_imglist, mshadow::index_t label_width, bool silent) { - label_width_ = label_width; + label_width = label_width; image_index_.clear(); label_.clear(); idx2label_.clear(); @@ -45,7 +45,7 @@ class ImageLabelMap { // skip space while (isspace(*p) && p != end) ++p; image_index_.push_back(static_cast(atol(p))); - for (size_t i = 0; i < label_width_; ++i) { + for (size_t i = 0; i < label_width; ++i) { // skip till space while (!isspace(*p) && p != end) ++p; // skip space @@ -58,7 +58,7 @@ class ImageLabelMap { // be careful not to resize label_ afterwards idx2label_.reserve(image_index_.size()); for (size_t i = 0; i < image_index_.size(); ++i) { - idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width_; + idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width; } if (!silent) { LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' @@ -70,12 +70,12 @@ class ImageLabelMap { std::unordered_map::const_iterator it = idx2label_.find(imid); CHECK(it != idx2label_.end()) << "fail to find imagelabel for id " << imid; - return mshadow::Tensor(it->second, mshadow::Shape1(label_width_)); + return mshadow::Tensor(it->second, mshadow::Shape1(label_width)); } private: // label with_ - mshadow::index_t label_width_; + mshadow::index_t label_width; // image index of each record std::vector image_index_; // real label content @@ -87,32 +87,32 @@ class ImageLabelMap { // Define image record parser parameters struct ImageRecParserParam : public dmlc::Parameter { /*! \brief path to image list */ - std::string path_imglist_; + std::string path_imglist; /*! \brief path to image recordio */ - std::string path_imgrec_; + std::string path_imgrec; /*! \brief number of threads */ - int nthread_; + int nthread; /*! \brief whether to remain silent */ - bool silent_; + bool silent; /*! \brief number of distributed worker */ - int dist_num_worker_, dist_worker_rank_; + int dist_num_worker, dist_worker_rank; /*! \brief label-width */ - int label_width_; + int label_width; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecParserParam) { - DMLC_DECLARE_FIELD(path_imglist_).set_default("") + DMLC_DECLARE_FIELD(path_imglist).set_default("") .describe("Path to image list."); - DMLC_DECLARE_FIELD(path_imgrec_).set_default("./data/imgrec.rec") + DMLC_DECLARE_FIELD(path_imgrec).set_default("./data/imgrec.rec") .describe("Path to image record file."); - DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) + DMLC_DECLARE_FIELD(nthread).set_lower_bound(1).set_default(4) .describe("Number of thread to do parsing."); - DMLC_DECLARE_FIELD(label_width_).set_lower_bound(1).set_default(1) + DMLC_DECLARE_FIELD(label_width).set_lower_bound(1).set_default(1) .describe("How many labels for an image."); - DMLC_DECLARE_FIELD(silent_).set_default(false) + DMLC_DECLARE_FIELD(silent).set_default(false) .describe("Whether to output parser information."); - DMLC_DECLARE_FIELD(dist_num_worker_).set_lower_bound(1).set_default(1) + DMLC_DECLARE_FIELD(dist_num_worker).set_lower_bound(1).set_default(1) .describe("Dist worker number."); - DMLC_DECLARE_FIELD(dist_worker_rank_).set_default(0) + DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); } }; @@ -170,12 +170,12 @@ inline void ImageRecordIOParser::Init(const std::vectorHintChunkSize(8 << 20UL); } @@ -217,12 +217,12 @@ ParseNext(std::vector *out_vec) { CHECK(source_ != NULL); dmlc::InputSplit::Blob chunk; if (!source_->NextChunk(&chunk)) return false; - out_vec->resize(param_.nthread_); - #pragma omp parallel num_threads(param_.nthread_) + out_vec->resize(param_.nthread); + #pragma omp parallel num_threads(param_.nthread) { - CHECK(omp_get_num_threads() == param_.nthread_); + CHECK(omp_get_num_threads() == param_.nthread); int tid = omp_get_thread_num(); - dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread_); + dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread); ImageRecordIO rec; dmlc::InputSplit::Blob blob; // image data @@ -237,7 +237,7 @@ ParseNext(std::vector *out_vec) { res = augmenters_[tid]->Process(res, prnds_[tid]); out.Push(static_cast(rec.image_index()), mshadow::Shape3(3, res.rows, res.cols), - mshadow::Shape1(param_.label_width_)); + mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor mshadow::Tensor data = inst.data[0].get(); From d6ceed317fd1bd82737c8b0361f344910c385d83 Mon Sep 17 00:00:00 2001 From: tianjun Date: Sun, 6 Sep 2015 10:05:36 +0800 Subject: [PATCH 05/15] call augprocess in base iter --- src/io/image_augmenter.h | 40 +++++++++++++++++------------------ src/io/iter_batch.h | 4 ++-- src/io/iter_image_recordio.cc | 24 +++++++-------------- 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index a81e5297d5b3..38efcc58e61a 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -263,24 +263,6 @@ class ImageAugmenter { return tmpres; } - virtual void OpencvProcess(unsigned char *dptr, size_t sz, - mshadow::TensorContainer *p_data, - common::RANDOM_ENGINE *prnd) { - cv::Mat buf(1, sz, CV_8U, dptr); - cv::Mat res = cv::imdecode(buf, 1); - res = this->OpencvProcess(res, prnd); - p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); - for (index_t i = 0; i < p_data->size(1); ++i) { - for (index_t j = 0; j < p_data->size(2); ++j) { - cv::Vec3b bgr = res.at(i, j); - (*p_data)[0][i][j] = bgr[2]; - (*p_data)[1][i][j] = bgr[1]; - (*p_data)[2][i][j] = bgr[0]; - } - } - res.release(); - } - void TensorProcess(mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); @@ -337,7 +319,7 @@ class ImageAugmenter { } } } - out_.data = img_; + (*p_data) = img_; } inline void CreateMeanImg(void) { @@ -371,7 +353,25 @@ class ImageAugmenter { meanfile_ready_ = true; } - + virtual void Process(unsigned char *dptr, size_t sz, + mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + cv::Mat buf(1, sz, CV_8U, dptr); + cv::Mat res = cv::imdecode(buf, 1); + res = this->OpencvProcess(res, prnd); + p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < p_data->size(1); ++i) { + for (index_t j = 0; j < p_data->size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + (*p_data)[0][i][j] = bgr[2]; + (*p_data)[1][i][j] = bgr[1]; + (*p_data)[2][i][j] = bgr[0]; + } + } + res.release(); + this->TensorProcess(p_data, prnd); + } + private: // whether skip opencv processing inline bool NeedOpencvProcess(void) const { diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index 4d95b92cce1e..7fe8f4440513 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -63,7 +63,7 @@ class BatchAdaptIter: public IIterator { kwargs_left = param_.InitAllowUnknown(kwargs); // init base iterator base_->Init(kwargs); - mshadow::Shape<4> tshape = shape_; + mshadow::Shape<4> tshape = param_.input_shape; tshape[0] = param_.batch_size; AllocSpaceDense(false); } @@ -141,7 +141,7 @@ class BatchAdaptIter: public IIterator { mshadow::Tensor data; // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { - data = mshadow::NewTensor(shape_, 0.0f, pad); + data = mshadow::NewTensor(param_.input_shape, 0.0f, pad); mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); out_.inst_index = new unsigned[param_.batch_size]; diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 1589fd5ad6c7..0c44a2346e4a 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -98,6 +98,8 @@ struct ImageRecParserParam : public dmlc::Parameter { int dist_num_worker, dist_worker_rank; /*! \brief label-width */ int label_width; + /*! \brief input shape */ + TShape input_shape; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecParserParam) { DMLC_DECLARE_FIELD(path_imglist).set_default("") @@ -114,6 +116,10 @@ struct ImageRecParserParam : public dmlc::Parameter { .describe("Dist worker number."); DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); } }; @@ -229,33 +235,19 @@ ParseNext(std::vector *out_vec) { InstVector &out = (*out_vec)[tid]; out.Clear(); while (reader.NextRecord(&blob)) { - // result holder - cv::Mat res; - rec.Load(blob.dptr, blob.size); - cv::Mat buf(1, rec.content_size, CV_8U, rec.content); - res = cv::imdecode(buf, 1); - res = augmenters_[tid]->Process(res, prnds_[tid]); out.Push(static_cast(rec.image_index()), - mshadow::Shape3(3, res.rows, res.cols), + mshadow::Shape3(param_.input_shape[0], param_.input_shape[0], param_.input_shape[0]), mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor mshadow::Tensor data = inst.data[0].get(); mshadow::Tensor label = inst.data[1].get(); - for (int i = 0; i < res.rows; ++i) { - for (int j = 0; j < res.cols; ++j) { - cv::Vec3b bgr = res.at(i, j); - data[0][i][j] = bgr[2]; - data[1][i][j] = bgr[1]; - data[2][i][j] = bgr[0]; - } - } + augmenters_[tid]->Process(rec.content, rec.content_size, &data, prnd); if (label_map_ != NULL) { mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { label[0] = rec.header.label; } - res.release(); } } return true; From 06637a3995af6a9fdcce360cdb39c8d67ac1020b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Tue, 8 Sep 2015 01:13:59 +0800 Subject: [PATCH 06/15] recio works --- mshadow | 2 +- src/common/utils.h | 5 +- src/io/image_augmenter.h | 192 ++++++++++++++++++---------------- src/io/image_recordio.h | 8 +- src/io/inst_vector.h | 16 +-- src/io/io.cc | 2 +- src/io/iter_batch.h | 51 +++++---- src/io/iter_image_recordio.cc | 99 ++++++++++++++---- src/utils/decoder.h | 128 ----------------------- tests/python/test_io.py | 70 +++++++++---- 10 files changed, 274 insertions(+), 299 deletions(-) delete mode 100644 src/utils/decoder.h diff --git a/mshadow b/mshadow index 4449f22c6854..3053f8cdfea0 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit 4449f22c68543435e5b4f3239de944c03fc0ea46 +Subproject commit 3053f8cdfea0274739282ced015ad458090760e8 diff --git a/src/common/utils.h b/src/common/utils.h index b5edb78bd6f9..29cb9f0e2f2a 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -22,9 +22,10 @@ namespace common { */ typedef std::mt19937 RANDOM_ENGINE; // Get a double float, prnd is the pointer to a Random Engine -#define NextDouble(prnd) std::generate_canonical(*prnd) +#define NextDouble(prnd) std::generate_canonical(*prnd) // Get a random int in [0, range) -#define NextUInt32(range, prnd) static_cast(floor(std::generate_canonical(*prnd) * range)) +#define NextUInt32(range, prnd) static_cast \ +(floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index 38efcc58e61a..a4b77f5a41df 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2015 by Contributors * \file image_augmenter_opencv.hpp * \brief threaded version of page iterator * \author Naiyan Wang, Tianqi Chen, Tianjun Xiao @@ -7,6 +8,10 @@ #define MXNET_IO_IMAGE_AUGMENTER_H_ #include +#include +#include +#include +#include #include "../common/utils.h" namespace mxnet { @@ -41,6 +46,7 @@ struct ImageAugmentParam : public dmlc::Parameter { int rotate; /*! \brief filled color while padding */ int fill_value; + // The following are params for tensor process /*! \brief whether to mirror the image */ bool mirror; /*! \brief whether to perform rand mirror the image */ @@ -55,14 +61,17 @@ struct ImageAugmentParam : public dmlc::Parameter { float mean_b; /*! \brief shape of the image data*/ TShape input_shape; + /*! \brief scale on color space */ + float scale; /*! \brief maximum ratio of contrast variation */ - float max_random_contrast_; + float max_random_contrast; /*! \brief maximum value of illumination variation */ - float max_random_illumination_; + float max_random_illumination; + /*! \brief whether to print augment info */ + bool silent; // declare parameters - // TODO: didn't understand the range for some params DMLC_DECLARE_PARAMETER(ImageAugmentParam) { - DMLC_DECLARE_FIELD(rand_crop_).set_default(true) + DMLC_DECLARE_FIELD(rand_crop).set_default(true) .describe("Whether we de random cropping"); DMLC_DECLARE_FIELD(crop_y_start).set_default(-1) .describe("Where to nonrandom crop on y"); @@ -81,7 +90,7 @@ struct ImageAugmentParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(max_random_scale).set_default(1.0f) .describe("Maxmum scale ratio"); DMLC_DECLARE_FIELD(min_random_scale).set_default(1.0f) - .describe("Minimum scale ratio"); + .describe("Minimum scale ratio"); DMLC_DECLARE_FIELD(max_img_size).set_default(1e10f) .describe("Maxmum image size"); DMLC_DECLARE_FIELD(min_img_size).set_default(0.0f) @@ -99,13 +108,16 @@ struct ImageAugmentParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(mean_r).set_default(0.0f) .describe("Mean value on R channel"); DMLC_DECLARE_FIELD(mean_g).set_default(0.0f) - .describe("Mean value on G channel"); + .describe("Mean value on G channel"); DMLC_DECLARE_FIELD(mean_b).set_default(0.0f) .describe("Mean value on B channel"); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(scale).set_default(1.0f) + .describe("Scale in color space"); DMLC_DECLARE_FIELD(max_random_contrast).set_default(0.0f) .describe("Maximum ratio of contrast variation"); DMLC_DECLARE_FIELD(max_random_illumination).set_default(0.0f) @@ -123,7 +135,6 @@ class ImageAugmenter { } virtual ~ImageAugmenter() { } - // TODO: Hack the shape and rotate list, didn't use param virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; kwargs_left = param_.InitAllowUnknown(kwargs); @@ -142,7 +153,7 @@ class ImageAugmenter { if (param_.mean_img.length() != 0) { dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); if (fi == NULL) { - this->CreateMeanImg(); + meanfile_ready_ = false; } else { if (param_.silent == 0) { printf("loading mean image from %s\n", param_.mean_img.c_str()); @@ -174,15 +185,18 @@ class ImageAugmenter { float a = cos(angle / 180.0 * M_PI); float b = sin(angle / 180.0 * M_PI); // scale - float scale = NextDouble(prnd) * (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; + float scale = NextDouble(prnd) * \ + (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; // aspect ratio - float ratio = NextDouble(prnd) * param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; + float ratio = NextDouble(prnd) * \ + param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; float hs = 2 * scale / (1 + ratio); float ws = ratio * hs; // new width and height - float new_width = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.cols)); - float new_height = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.rows)); - //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); + float new_width = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.cols)); + float new_height = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.rows)); cv::Mat M(2, 3, CV_32F); M.at(0, 0) = hs * a - s * b * ws; M.at(1, 0) = -b * ws; @@ -192,42 +206,42 @@ class ImageAugmenter { float ori_center_height = M.at(1, 0) * src.cols + M.at(1, 1) * src.rows; M.at(0, 2) = (new_width - ori_center_width) / 2; M.at(1, 2) = (new_height - ori_center_height) / 2; - cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), + cv::warpAffine(src, temp_, M, cv::Size(new_width, new_height), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(param_.fill_value, param_.fill_value, param_.fill_value)); - cv::Mat res = temp; + cv::Mat res = temp_; // crop - if (param_.max_crop_size != -1 || param_.min_crop_size != -1){ - CHECK(res.cols >= param_.max_crop_size && res.rows >= param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) + if (param_.max_crop_size != -1 || param_.min_crop_size != -1) { + CHECK(res.cols >= param_.max_crop_size && res.rows >= \ + param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) << "input image size smaller than max_crop_size"; - mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size- param_.min_crop_size+1, prnd)+ param_.min_crop_size; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size \ + - param_.min_crop_size+1, prnd)+ param_.min_crop_size; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; if (param_.rand_crop != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); - } - else { + } else { y /= 2; x /= 2; } cv::Rect roi(x, y, rand_crop_size, rand_crop_size); cv::resize(res(roi), res, cv::Size(param_.input_shape[1], param_.input_shape[2])); - } - else{ - CHECK(static_cast(res.cols) >= param_.input_shape[1] && static_cast(res.rows) >= param_.input_shape[2]) - << "input image size smaller than input shape"; - mshadow::index_t y = res.rows - param_.input_shape[2]; - mshadow::index_t x = res.cols - param_.input_shape[1]; - if (param_.rand_crop_ != 0) { - y = NextUInt32(y + 1, prnd); - x = NextUInt32(x + 1, prnd); - } - else { - y /= 2; x /= 2; - } - cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); - res = res(roi); + } else { + CHECK(static_cast(res.cols) >= param_.input_shape[1] \ + && static_cast(res.rows) >= param_.input_shape[2]) + << "input image size smaller than input shape"; + mshadow::index_t y = res.rows - param_.input_shape[2]; + mshadow::index_t x = res.cols - param_.input_shape[1]; + if (param_.rand_crop != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); + res = res(roi); } return res; } @@ -251,20 +265,32 @@ class ImageAugmenter { } } res = this->OpencvProcess(res, prnd); - tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); - for (index_t i = 0; i < tmpres.size(1); ++i) { - for (index_t j = 0; j < tmpres.size(2); ++j) { + tmpres_.Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < tmpres_.size(1); ++i) { + for (index_t j = 0; j < tmpres_.size(2); ++j) { cv::Vec3b bgr = res.at(i, j); - tmpres[0][i][j] = bgr[2]; - tmpres[1][i][j] = bgr[1]; - tmpres[2][i][j] = bgr[0]; + tmpres_[0][i][j] = bgr[2]; + tmpres_[1][i][j] = bgr[1]; + tmpres_[2][i][j] = bgr[0]; } } - return tmpres; + return tmpres_; } void TensorProcess(mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { + // Check Newly Created mean image + if (meanfile_ready_ == false && param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi != NULL) { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); if (param_.input_shape[1] == 1) { img_ = (*p_data) * param_.scale; @@ -285,72 +311,51 @@ class ImageAugmenter { if (p_data->size(2) != param_.input_shape[2] && param_.crop_x_start != -1) { xx = param_.crop_x_start; } - float contrast = NextDouble(prnd) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1; - float illumination = NextDouble(prnd) * param_.max_random_illumination * 2 - param_.max_random_illumination; + float contrast = NextDouble(prnd) * param_.max_random_contrast \ + * 2 - param_.max_random_contrast + 1; + float illumination = NextDouble(prnd) * param_.max_random_illumination \ + * 2 - param_.max_random_illumination; if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { // substract mean value - (*p_data)[0] -= param_.mean_b; (*p_data)[1] -= param_.mean_g; (*p_data)[2] -= param_.mean_r; - if ((param_.rand_mirror != 0 && NextDouble(rnd) < 0.5f) || param_.mirror == 1) { - img_ = mirror(crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + (*p_data)[0] -= param_.mean_b; + (*p_data)[1] -= param_.mean_g; + (*p_data)[2] -= param_.mean_r; + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale ; + img_ = crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; } } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { // do not substract anything - if (param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) { + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { img_ = mirror(crop((*p_data), img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale ; + img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale; } } else { // substract mean image if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { if (p_data->shape_ == meanimg_.shape_) { - img_ = mirror(crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + img_ = mirror(crop(((*p_data) - meanimg_) * contrast \ + + illumination, img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) \ + * contrast + illumination) * param_.scale; } } else { - if (p_data->shape_ == meanimg_.shape_){ - img_ = crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale; + if (p_data->shape_ == meanimg_.shape_) { + img_ = crop(((*p_data) - meanimg_) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; } else { - img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * \ + contrast + illumination) * param_.scale; } } } } (*p_data) = img_; - } - - inline void CreateMeanImg(void) { - if (silent_ == 0) { - printf("cannot find %s: create mean image, this will take some time...\n", name_meanimg_.c_str()); - } - time_t start = time(NULL); - unsigned long elapsed = 0; - size_t imcnt = 1; - - CHECK(this->Next_()) << "input iterator failed."; - meanimg_.Resize(mshadow::Shape3(shape_[0], shape_[1], shape_[2])); - mshadow::Copy(meanimg_, img_); - while (this->Next()) { - meanimg_ += img_; imcnt += 1; - elapsed = (long)(time(NULL) - start); - if (imcnt % 1000 == 0 && silent_ == 0) { - printf("\r \r"); - printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); - fflush(stdout); - } - } - meanimg_ *= (1.0f / imcnt); - - dmlc::Stream *fo = dmlc::Stream::Create(name_meanimg_.c_str(), "w"); - meanimg_.SaveBinary(*fo); - delete fo; - if (silent_ == 0) { - printf("save mean image to %s..\n", name_meanimg_.c_str()); - } - meanfile_ready_ = true; } virtual void Process(unsigned char *dptr, size_t sz, @@ -358,7 +363,8 @@ class ImageAugmenter { common::RANDOM_ENGINE *prnd) { cv::Mat buf(1, sz, CV_8U, dptr); cv::Mat res = cv::imdecode(buf, 1); - res = this->OpencvProcess(res, prnd); + if (NeedOpencvProcess()) + res = this->OpencvProcess(res, prnd); p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < p_data->size(1); ++i) { for (index_t j = 0; j < p_data->size(2); ++j) { @@ -371,7 +377,7 @@ class ImageAugmenter { res.release(); this->TensorProcess(p_data, prnd); } - + private: // whether skip opencv processing inline bool NeedOpencvProcess(void) const { @@ -391,7 +397,7 @@ class ImageAugmenter { // rotation param cv::Mat rotateM_; // whether the mean file is ready - bool menafile_ready_; + bool meanfile_ready_; // parameters ImageAugmentParam param_; /*! \brief input shape */ @@ -400,5 +406,5 @@ class ImageAugmenter { std::vector rotate_list_; }; } // namespace io -} // namespace cxxnet -#endif +} // namespace mxnet +#endif // MXNET_IO_IMAGE_AUGMENTER_H_ diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h index 4aea8aabcb47..3b4fa0302435 100644 --- a/src/io/image_recordio.h +++ b/src/io/image_recordio.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2015 by Contributors * \file image_recordio.h * \brief image recordio struct */ @@ -7,6 +8,7 @@ #include #include +#include namespace mxnet { namespace io { @@ -67,9 +69,9 @@ struct ImageRecordIO { */ inline void SaveHeader(std::string *blob) const { blob->resize(sizeof(header)); - std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); - } -}; + std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); + } +}; } // namespace io } // namespace mxnet #endif // MXNET_IO_IMAGE_RECORDIO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 4ced7dd64c63..ed560fc2b5da 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -1,11 +1,12 @@ /*! + * Copyright (c) 2015 by Contributors * \file inst_vector.h * \brief holder of a sequence of DataInst in CPU * that are not necessarily of same shape */ -#ifndef MXNET_INST_VECTOR_H_ -#define MXNET_INST_VECTOR_H_ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ #include #include @@ -31,7 +32,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); + ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); // NOLINT(*) } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -52,6 +53,7 @@ class TensorVector { content_.clear(); shape_.clear(); } + private: // offset of the data content std::vector offset_; @@ -66,7 +68,7 @@ class TensorVector { * non-uniform shape data instance in a shape efficient way */ class InstVector { - public: + public: inline size_t Size(void) const { return index_.size(); } @@ -94,8 +96,8 @@ class InstVector { data_.Push(dshape); label_.Push(lshape); } - - private: + + private: /*! \brief index of the data */ std::vector index_; // label @@ -105,4 +107,4 @@ class InstVector { }; } // namespace io } // namespace mxnet -#endif // MXNET_TENSOR_VECTOR_H_ +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc index b2dbc9f8c2c5..8bfb5dbdd570 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -17,5 +17,5 @@ namespace io { // Register parameters in header files DMLC_REGISTER_PARAMETER(BatchParam); DMLC_REGISTER_PARAMETER(ImageAugmentParam); -} // namespace mxnet } // namespace io +} // namespace mxnet diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index 7fe8f4440513..b45dfd3328e1 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -1,7 +1,8 @@ /*! + * Copyright (c) 2015 by Contributors * \file iter_batch_proc-inl.hpp * \brief definition of preprocessing iterators that takes an iterator and do some preprocessing - * \author Tianqi Chen + * \author Tianqi Chen, Tianjun Xiao */ #ifndef MXNET_IO_ITER_BATCH_H_ #define MXNET_IO_ITER_BATCH_H_ @@ -10,6 +11,9 @@ #include #include #include +#include +#include +#include namespace mxnet { namespace io { @@ -18,7 +22,6 @@ struct BatchParam : public dmlc::Parameter { /*! \brief label width */ index_t batch_size; /*! \brief input shape */ - // TODO: haven't modify all shape_ TShape input_shape; /*! \brief label width */ index_t label_width; @@ -32,13 +35,14 @@ struct BatchParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(BatchParam) { DMLC_DECLARE_FIELD(batch_size) .describe("Batch size."); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() - .describe("Input shape of the neural net"); + .describe("Input shape of the neural net"); DMLC_DECLARE_FIELD(label_width).set_default(1) .describe("Label width."); - DMLC_DECLARE_FIELD(round_batch).set_default(false) + DMLC_DECLARE_FIELD(round_batch).set_default(true) .describe("Use round robin to handle overflow batch."); DMLC_DECLARE_FIELD(test_skipread).set_default(false) .describe("Skip read for testing."); @@ -46,25 +50,25 @@ struct BatchParam : public dmlc::Parameter { .describe("Whether to print batch information."); } }; - + /*! \brief create a batch iterator from single instance iterator */ class BatchAdaptIter: public IIterator { -public: - BatchAdaptIter(IIterator *base): base_(base) { - num_overflow_ = 0; - } + public: + explicit BatchAdaptIter(IIterator *base): base_(base), num_overflow_(0) {} virtual ~BatchAdaptIter(void) { delete base_; FreeSpaceDense(); } virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; - // init batch param, it could have similar param with + // init batch param, it could have similar param with kwargs_left = param_.InitAllowUnknown(kwargs); // init base iterator base_->Init(kwargs); - mshadow::Shape<4> tshape = param_.input_shape; - tshape[0] = param_.batch_size; + data_shape_[1] = param_.input_shape[0]; + data_shape_[2] = param_.input_shape[1]; + data_shape_[3] = param_.input_shape[2]; + data_shape_[0] = param_.batch_size; AllocSpaceDense(false); } virtual void BeforeFirst(void) { @@ -80,8 +84,10 @@ class BatchAdaptIter: public IIterator { out_.num_batch_padd = 0; // skip read if in head version - if (param_.test_skipread != 0 && head_ == 0) return true; - else this->head_ = 0; + if (param_.test_skipread != 0 && head_ == 0) + return true; + else + this->head_ = 0; // if overflow from previous round, directly return false, until before first is called if (num_overflow_ != 0) return false; @@ -124,7 +130,8 @@ class BatchAdaptIter: public IIterator { CHECK(head_ == 0) << "must call Next to get value"; return out_; } -private: + + private: /*! \brief batch parameters */ BatchParam param_; /*! \brief base iterator */ @@ -139,9 +146,11 @@ class BatchAdaptIter: public IIterator { mshadow::Tensor label; /*! \brief content of dense data, if this DataBatch is dense */ mshadow::Tensor data; + /*! \brief data shape */ + mshadow::Shape<4> data_shape_; // Functions that allocate and free tensor space - inline void AllocSpaceDense(bool pad = false) { - data = mshadow::NewTensor(param_.input_shape, 0.0f, pad); + inline void AllocSpaceDense(bool pad = false) { + data = mshadow::NewTensor(data_shape_, 0.0f, pad); mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); out_.inst_index = new unsigned[param_.batch_size]; @@ -157,7 +166,7 @@ class BatchAdaptIter: public IIterator { label.dptr_ = NULL; } } -}; // class BatchAdaptIter +}; // class BatchAdaptIter } // namespace io -} // namespace cxxnet +} // namespace mxnet #endif // MXNET_IO_ITER_BATCH_H_ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 0c44a2346e4a..701c28deb4c9 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -1,9 +1,9 @@ /*! + * Copyright (c) 2015 by Contributors * \file iter_image_recordio-inl.hpp * \brief recordio data iterator */ -#include #include #include #include @@ -13,11 +13,11 @@ iterator #include #include #include +#include #include "./inst_vector.h" #include "./image_recordio.h" #include "./image_augmenter.h" #include "./iter_batch.h" -#include "../utils/decoder.h" namespace mxnet { namespace io { /*! \brief data structure to hold labels for images */ @@ -31,7 +31,7 @@ class ImageLabelMap { explicit ImageLabelMap(const char *path_imglist, mshadow::index_t label_width, bool silent) { - label_width = label_width; + this->label_width = label_width; image_index_.clear(); label_.clear(); idx2label_.clear(); @@ -116,10 +116,11 @@ struct ImageRecParserParam : public dmlc::Parameter { .describe("Dist worker number."); DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() - .describe("Input shape of the neural net"); + .describe("Input shape of the neural net"); } }; @@ -143,7 +144,7 @@ class ImageRecordIOParser { } // initialize the parser inline void Init(const std::vector >& kwargs); - + // set record to the head inline void BeforeFirst(void) { return source_->BeforeFirst(); @@ -151,11 +152,12 @@ class ImageRecordIOParser { // parse next set of records, return an array of // instance vector to the user inline bool ParseNext(std::vector *out); + private: // magic nyumber to see prng static const int kRandMagic = 111; /*! \brief parameters */ - ImageRecParserParam param_; + ImageRecParserParam param_; /*! \brief augmenters */ std::vector augmenters_; /*! \brief random samplers */ @@ -164,9 +166,12 @@ class ImageRecordIOParser { dmlc::InputSplit *source_; /*! \brief label information, if any */ ImageLabelMap *label_map_; + /*! \brief temp space */ + mshadow::TensorContainer img_; }; -inline void ImageRecordIOParser::Init(const std::vector >& kwargs) { +inline void ImageRecordIOParser::Init( + const std::vector >& kwargs) { // initialize parameter std::vector > kwargs_left; // init image rec param @@ -185,12 +190,11 @@ inline void ImageRecordIOParser::Init(const std::vectorInit(kwargs_left); + augmenters_[i]->Init(kwargs); prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); } - + // handling for hadoop - // TODO, hack const char *ps_rank = getenv("PS_RANK"); if (ps_rank != NULL) { param_.dist_worker_rank = atoi(ps_rank); @@ -205,7 +209,6 @@ inline void ImageRecordIOParser::Init(const std::vector *out_vec) { InstVector &out = (*out_vec)[tid]; out.Clear(); while (reader.NextRecord(&blob)) { + rec.Load(blob.dptr, blob.size); out.Push(static_cast(rec.image_index()), - mshadow::Shape3(param_.input_shape[0], param_.input_shape[0], param_.input_shape[0]), + mshadow::Shape3(param_.input_shape[0], param_.input_shape[1], param_.input_shape[2]), mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor - mshadow::Tensor data = inst.data[0].get(); - mshadow::Tensor label = inst.data[1].get(); - augmenters_[tid]->Process(rec.content, rec.content_size, &data, prnd); + mshadow::Tensor data = inst.data[0].get(); + mshadow::Tensor label = inst.data[1].get(); + augmenters_[tid]->Process(rec.content, rec.content_size, &img_, prnds_[tid]); + mshadow::Copy(data, img_); if (label_map_ != NULL) { mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { @@ -259,12 +264,20 @@ struct ImageRecordParam: public dmlc::Parameter { bool shuffle; /*! \brief random seed */ int seed; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief whether to remain silent */ + bool silent; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecordParam) { DMLC_DECLARE_FIELD(shuffle).set_default(true) .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(seed).set_default(0) .describe("Random Seed."); + DMLC_DECLARE_FIELD(mean_img).set_default("./data/mean.bin") + .describe("Path to image mean file."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to output information."); } }; @@ -283,8 +296,8 @@ class ImageRecordIter : public IIterator { std::vector > kwargs_left; // init image rec param kwargs_left = param_.InitAllowUnknown(kwargs); - // use the left kwarg to init parser - parser_.Init(kwargs_left); + // use the kwarg to init parser + parser_.Init(kwargs); // init thread iter iter_.set_max_capacity(4); iter_.Init([this](std::vector **dptr) { @@ -294,6 +307,15 @@ class ImageRecordIter : public IIterator { return parser_.ParseNext(*dptr); }, [this]() { parser_.BeforeFirst(); }); + // Check Meanfile + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + this->CreateMeanImg(); + } else { + delete fi; + } + } inst_ptr_ = 0; } virtual void BeforeFirst(void) { @@ -320,7 +342,8 @@ class ImageRecordIter : public IIterator { } // shuffle instance order if needed if (shuffle_ != 0) { - std::shuffle(inst_order_.begin(), inst_order_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + std::shuffle(inst_order_.begin(), inst_order_.end(), \ + common::RANDOM_ENGINE(kRandMagic + param_.seed)); } inst_ptr_ = 0; } @@ -332,6 +355,40 @@ class ImageRecordIter : public IIterator { } private: + inline void CreateMeanImg(void) { + if (param_.silent == 0) { + printf("cannot find %s: create mean image, this will take some time...\n", + param_.mean_img.c_str()); + } + time_t start = time(NULL); + uint64_t elapsed = 0; + size_t imcnt = 1; + this->BeforeFirst(); + CHECK(this->Next()) << "input iterator failed."; + // Get the first data + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_.Resize(img_tensor.shape_); + mshadow::Copy(meanimg_, img_tensor); + while (this->Next()) { + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_ += img_tensor; imcnt += 1; + elapsed = (uint64_t)(time(NULL) - start); + if (imcnt % 1000 == 0 && param_.silent == 0) { + printf("\r \r"); + printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); + fflush(stdout); + } + } + meanimg_ *= (1.0f / imcnt); + + dmlc::Stream *fo = dmlc::Stream::Create(param_.mean_img.c_str(), "w"); + meanimg_.SaveBinary(*fo); + delete fo; + if (param_.silent == 0) { + printf("save mean image to %s..\n", param_.mean_img.c_str()); + } + } + // random magic static const int kRandMagic = 111; // output instance @@ -350,6 +407,8 @@ class ImageRecordIter : public IIterator { dmlc::ThreadedIter > iter_; // parameters ImageRecordParam param_; + // mean image + mshadow::TensorContainer meanimg_; }; DMLC_REGISTER_PARAMETER(ImageRecParserParam); DMLC_REGISTER_PARAMETER(ImageRecordParam); diff --git a/src/utils/decoder.h b/src/utils/decoder.h deleted file mode 100644 index 52db01edee23..000000000000 --- a/src/utils/decoder.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef MXNET_UTILS_DECODER_H_ -#define MXNET_UTILS_DECODER_H_ - -#include -#if MXNET_USE_OPENCV_DECODER == 0 - #include - #include - #include -#endif -#include -#include -#if MXNET_USE_OPENCV - #include -#endif - -namespace mxnet { -namespace utils { - -#if MXNET_USE_OPENCV_DECODER == 0 -struct JpegDecoder { -public: - JpegDecoder(void) { - cinfo.err = jpeg_std_error(&jerr.base); - jerr.base.error_exit = jerror_exit; - jerr.base.output_message = joutput_message; - jpeg_create_decompress(&cinfo); - } - // destructor - ~JpegDecoder(void) { - jpeg_destroy_decompress(&cinfo); - } - - inline void Decode(unsigned char *ptr, size_t sz, - mshadow::TensorContainer *p_data) { - if(setjmp(jerr.jmp)) { - jpeg_destroy_decompress(&cinfo); - dmlc::Error("Libjpeg fail to decode"); - } - this->jpeg_mem_src(&cinfo, ptr, sz); - CHECK(jpeg_read_header(&cinfo, TRUE) == JPEG_HEADER_OK) << "libjpeg: failed to decode"; - CHECK(jpeg_start_decompress(&cinfo) == true) << "libjpeg: failed to decode"; - p_data->Resize(mshadow::Shape3(cinfo.output_height, cinfo.output_width, cinfo.output_components)); - JSAMPROW jptr = &((*p_data)[0][0][0]); - while (cinfo.output_scanline < cinfo.output_height) { - CHECK(jpeg_read_scanlines(&cinfo, &jptr, 1) == true) << "libjpeg: failed to decode"; - jptr += cinfo.output_width * cinfo.output_components; - } - CHECK(jpeg_finish_decompress(&cinfo) == true) << "libjpeg: failed to decode"); - } -private: - struct jerror_mgr { - jpeg_error_mgr base; - jmp_buf jmp; - }; - - METHODDEF(void) jerror_exit(j_common_ptr jinfo) { - jerror_mgr* err = (jerror_mgr*)jinfo->err; - longjmp(err->jmp, 1); - } - - METHODDEF(void) joutput_message(j_common_ptr) {} - - static boolean mem_fill_input_buffer_ (j_decompress_ptr cinfo) { - dmlc::Error("JpegDecoder: bad jpeg image"); - return true; - } - - static void mem_skip_input_data_ (j_decompress_ptr cinfo, long num_bytes_) { - jpeg_source_mgr *src = cinfo->src; - size_t num_bytes = static_cast(num_bytes_); - if (num_bytes > 0) { - src->next_input_byte += num_bytes; - CHECK(src->bytes_in_buffer >= num_bytes) << "fail to decode"; - src->bytes_in_buffer -= num_bytes; - } else { - dmlc::Error("JpegDecoder: bad jpeg image"); - - } - } - - static void mem_term_source_ (j_decompress_ptr cinfo) {} - static void mem_init_source_ (j_decompress_ptr cinfo) {} - static boolean jpeg_resync_to_restart_(j_decompress_ptr cinfo, int desired) { - dmlc::Error("JpegDecoder: bad jpeg image"); - return true; - } - void jpeg_mem_src (j_decompress_ptr cinfo, void* buffer, long nbytes) { - src.init_source = mem_init_source_; - src.fill_input_buffer = mem_fill_input_buffer_; - src.skip_input_data = mem_skip_input_data_; - src.resync_to_restart = jpeg_resync_to_restart_; - src.term_source = mem_term_source_; - src.bytes_in_buffer = nbytes; - src.next_input_byte = static_cast(buffer); - cinfo->src = &src; - } - -private: - jpeg_decompress_struct cinfo; - jpeg_source_mgr src; - jerror_mgr jerr; -}; -#endif - -#if MXNET_USE_OPENCV -struct OpenCVDecoder { - void Decode(unsigned char *ptr, size_t sz, mshadow::TensorContainer *p_data) { - cv::Mat buf(1, sz, CV_8U, ptr); - cv::Mat res = cv::imdecode(buf, 1); - CHECK(res.data != NULL) << "decoding fail"; - p_data->Resize(mshadow::Shape3(res.rows, res.cols, 3)); - for (int y = 0; y < res.rows; ++y) { - for (int x = 0; x < res.cols; ++x) { - cv::Vec3b bgr = res.at(y, x); - // store in RGB order - (*p_data)[y][x][2] = bgr[0]; - (*p_data)[y][x][1] = bgr[1]; - (*p_data)[y][x][0] = bgr[2]; - } - } - res.release(); - } -}; -#endif -} // namespace utils -} // namespace mxnet - -#endif // DECODER_H diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 991a4813033e..8706b062e5d7 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -5,28 +5,29 @@ import pickle as pickle import sys import get_data +from PIL import Image -# prepare data -get_data.GetMNIST_ubyte() -batch_size = 100 -train_dataiter = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) -val_dataiter = mx.io.MNISTIter( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - batch_size=batch_size, shuffle=0, flat=1, silent=0) +def test_MNISTIter(): + # prepare data + get_data.GetMNIST_ubyte() -def test_MNISTIter_loop(): + batch_size = 100 + train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) + val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=0, flat=1, silent=0) + # test_loop nbatch = 60000 / batch_size batch_count = 0 for data, label in train_dataiter: batch_count += 1 assert(nbatch == batch_count) - -def test_MNISTIter_reset(): + # test_reset train_dataiter.reset() train_dataiter.iter_next() label_0 = train_dataiter.getlabel().numpy.flatten() @@ -40,17 +41,40 @@ def test_MNISTIter_reset(): assert(sum(label_0 - label_1) == 0) def test_ImageRecIter(): - dataiter = mx.io.ImageRecordIter(path_imgrec="data/val_cxxnet.rec", - image_mean="data/val_cxxnet_mean.bin", + dataiter = mx.io.ImageRecordIter( + #path_imglist="data/smallset/val_cxxnet5000.txt", + path_imgrec="data/val_cxxnet.rec", + #mean_img="data/smallset/image_net_mean.bin", rand_crop=True, - rand_mirror=True, - input_shape="3,224,224", - batch_size=128) - - - - + mirror=True, + input_shape=(3,227,227), + batch_size=100, + nthread=1, + seed=10) + # Test label read + labelcount = [0 for i in range(1000)] + batchcount = 0 + for data, label in dataiter: + npdata = data.numpy + print npdata[0,:,:,:] + imgdata = np.zeros([227, 227, 3], dtype=np.uint8) + imgdata[:,:,0] = npdata[10,2,:,:] + imgdata[:,:,1] = npdata[10,1,:,:] + imgdata[:,:,2] = npdata[10,0,:,:] + img = Image.fromarray(imgdata) + imgpath = "data/smallset/test_3.jpg" + img.save(imgpath, format='JPEG') + exit(0) + print batchcount + sys.stdout.flush() + batchcount += 1 + nplabel = label.numpy + for i in range(nplabel.shape[0]): + labelcount[int(nplabel[i])] += 1 + # Test image +if __name__ == '__main__': + test_ImageRecIter() From ccfba8944eaeb2be18e51d861547c45a3e53f4c9 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 3 Sep 2015 08:42:14 +0800 Subject: [PATCH 07/15] merging the code, not compiled --- include/mxnet/io.h | 16 ++++++++ src/common/utils.h | 5 +++ src/io/inst_vector.h | 92 ++++++++++++++++++++------------------------ src/io/io.cc | 9 ++++- src/io/iter_mnist.cc | 4 +- 5 files changed, 72 insertions(+), 54 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 47a59eec54fe..5a8267befc1c 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -109,5 +109,21 @@ struct DataIteratorReg } \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ .set_body(__create__ ## DataIteratorType ## __) +/*! + * \brief Macro to register chained Iterators + * + * \code + * // example of registering a imagerec iterator + * MXNET_REGISTER_IO_CHAINED_ITERATOR(ImageRec, ImageRecordIter, BatchIter) + * .describe("batched image record data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_CHAINED_ITER(name, ChainedDataIterType, HoldingDataIterType) \ + static ::mxnet::IIterator* __create__ ## ChainedDataIteratorType ## __() { \ + return new HoldingDataIteratorType(new ChainedDataIterType); \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## ChainedDataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/src/common/utils.h b/src/common/utils.h index cf1fd2f1bb36..f7a2dcce0470 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -22,6 +22,11 @@ namespace common { */ typedef std::mt19937 RANDOM_ENGINE; +// Get a double float, prnd is the pointer to a Random Engine +#define NextDouble(prnd) std::generate_canonical(*prnd) + +#define NextUInt32(range, prnd) static_cast(\ + floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. */ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 1ae734631680..9490ceab94c1 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -1,17 +1,19 @@ /*! - * Copyright (c) 2015 by Contributors - * \inst_vector.h + * \file inst_vector.h * \brief holder of a sequence of DataInst in CPU * that are not necessarily of same shape */ -#ifndef MXNET_IO_INST_VECTOR_H_ -#define MXNET_IO_INST_VECTOR_H_ + +#ifndef MXNET_INST_VECTOR_H_ +#define MXNET_INST_VECTOR_H_ + +#include "./data.h" +#include #include #include -#include -#include -#include "./data.h" + namespace mxnet { +namespace io { /*! * \brief tensor vector that can store sequence of tensor * in a memory compact way, tensors do not have to be of same shape @@ -28,7 +30,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + ((DType*)BeginPtr(content_) + offset_[i], shape_[i]); } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -49,7 +51,6 @@ class TensorVector { content_.clear(); shape_.clear(); } - private: // offset of the data content std::vector offset_; @@ -59,59 +60,48 @@ class TensorVector { std::vector > shape_; }; -/*! - * \brief tblob vector that can store sequence of tblob - * in a memory compact way, tblobs do not have to be of same shape - */ -template -class TBlobVector { - public: - TBlobVector(void) { - this->Clear(); - } - // get i-th tblob - inline TBlob operator[](size_t i) const; - // get the last tblob - inline TBlob Back(); - // return the size of the vector - inline size_t Size(void) const; - // push a tensor of certain shape - // return the reference of the pushed tensor - inline void Push(TShape shape_); - inline void Clear(void); - private: - // offset of the data content - std::vector offset_; - // data content - std::vector content_; - // shape of data - std::vector shape_; -}; - /*! * \brief instance vector that can holds * non-uniform shape data instance in a shape efficient way */ class InstVector { - public: + public: inline size_t Size(void) const { return index_.size(); } // instance - inline DataInst operator[](size_t i) const; + inline DataInst operator[](size_t i) const { + DataInst inst; + inst.index = index_[i]; + inst.data = data_[i]; + inst.label = label_[i]; + return inst; + } // get back of instance vector - inline DataInst Back() const; - // clear the container - inline void Clear(void); - // push the newly coming instance - inline void Push(unsigned index, TBlob data_); - - private: + inline DataInst Back() const { + return (*this)[Size() - 1]; + } + inline void Clear(void) { + index_.clear(); + data_.Clear(); + label_.Clear(); + } + inline void Push(unsigned index, + mshadow::Shape<3> dshape, + mshadow::Shape<1> lshape) { + index_.push_back(index); + data_.Push(dshape); + label_.Push(lshape); + } + + private: /*! \brief index of the data */ std::vector index_; + // label + TensorVector<3, real_t> data_; // data - std::vector > data_; - // extra data - std::vector extra_data_; + TensorVector<1, real_t> label_; }; -#endif // MXNET_IO_INST_VECTOR_H_ +} // namespace io +} // namespace mxnet +#endif // MXNET_TENSOR_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc index bd5b78dda643..9095f4089c92 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -4,7 +4,14 @@ #include #include +#include +#include <> +#include +// Registers namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); -} // namespace dmlc +// Register parameters in header files +DMLC_REGISTER_PARAMETER(BatchParam); +DMLC_REGISTER_PARAMETER(ImageAugmenterParam); +} // namespace dmlc \ No newline at end of file diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 93195061b278..77ac3a479f75 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -31,7 +31,7 @@ struct MNISTParam : public dmlc::Parameter { bool flat; /*! \brief random seed */ int seed; - // declare parameters in header file + // declare parameters DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(image).set_default("./train-images-idx3-ubyte") .describe("Mnist image path."); @@ -155,7 +155,7 @@ class MNISTIter: public IIterator { delete stdlabel; } inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param_.seed)); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { From a54f6c9f4bc44fdc3910ccfd97ee4352df077913 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 3 Sep 2015 08:42:50 +0800 Subject: [PATCH 08/15] add image rec and associate files in --- src/io/image_augmenter.h | 262 ++++++++++++++++++++++++ src/io/image_recordio.h | 75 +++++++ src/io/iter_batch.h | 162 +++++++++++++++ src/io/iter_image_recordio.cc | 369 ++++++++++++++++++++++++++++++++++ src/utils/decoder.h | 128 ++++++++++++ src/utils/io.h | 175 ++++++++++++++++ src/utils/thread_buffer.h | 205 +++++++++++++++++++ 7 files changed, 1376 insertions(+) create mode 100644 src/io/image_augmenter.h create mode 100644 src/io/image_recordio.h create mode 100644 src/io/iter_batch.h create mode 100644 src/io/iter_image_recordio.cc create mode 100644 src/utils/decoder.h create mode 100644 src/utils/io.h create mode 100644 src/utils/thread_buffer.h diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h new file mode 100644 index 000000000000..d33464c4a889 --- /dev/null +++ b/src/io/image_augmenter.h @@ -0,0 +1,262 @@ +/*! + * \file image_augmenter_opencv.hpp + * \brief threaded version of page iterator + * \author Naiyan Wang, Tianqi Chen + */ +#ifndef MXNET_IO_IMAGE_AUGMENTER_H_ +#define MXNET_IO_IMAGE_AUGMENTER_H_ + +#include +#include "../common/utils.h" + +namespace mxnet { +namespace io { +/*! \brief image augmentation parameters*/ +struct ImageAugmentParam : public dmlc::Parameter { + /*! \brief whether we do random cropping */ + bool rand_crop_; + /*! \brief whether we do nonrandom croping */ + int crop_y_start_; + /*! \brief whether we do nonrandom croping */ + int crop_x_start_; + /*! \brief Indicate the max ratation angle for augmentation, we will random rotate */ + /*! \brief [-max_rotate_angle, max_rotate_angle] */ + int max_rotate_angle_; + /*! \brief max aspect ratio */ + float max_aspect_ratio_; + /*! \brief random shear the image [-max_shear_ratio, max_shear_ratio] */ + float max_shear_ratio_; + /*! \brief max crop size */ + int max_crop_size_; + /*! \brief min crop size */ + int min_crop_size_; + /*! \brief max scale ratio */ + float max_random_scale_; + /*! \brief min scale_ratio */ + float min_random_scale_; + /*! \brief min image size */ + float min_img_size_; + /*! \brief max image size */ + float max_img_size_; + /*! \brief whether to mirror the image */ + bool mirror_; + /*! \brief rotate angle */ + int rotate_; + /*! \brief filled color while padding */ + int fill_value_; + // declare parameters + // TODO: didn't understand the range for some params + DMLC_DECLARE_PARAMETER(ImageAugmentParam) { + DMLC_DECLARE_FIELD(rand_crop_).set_default(true) + .describe("Whether we de random cropping"); + DMLC_DECLARE_FIELD(crop_y_start_).set_default(-1) + .describe("Where to nonrandom crop on y"); + DMLC_DECLARE_FIELD(crop_x_start_).set_default(-1) + .describe("Where to nonrandom crop on x"); + DMLC_DECLARE_FIELD(max_rotate_angle_).set_default(0.0f) + .describe("Rotate can be [-max_rotate_angle, max_rotate_angle]"); + DMLC_DECLARE_FIELD(max_aspect_ratio_).set_default(0.0f) + .describe("Max aspect ratio"); + DMLC_DECLARE_FIELD(max_shear_ratio_).set_default(0.0f) + .describe("Shear rotate can be made between [-max_shear_ratio_, max_shear_ratio_]"); + DMLC_DECLARE_FIELD(max_crop_size_).set_default(-1) + .describe("Maximum crop size"); + DMLC_DECLARE_FIELD(min_crop_size_).set_default(-1) + .describe("Minimum crop size"); + DMLC_DECLARE_FIELD(max_random_scale_).set_default(1.0f) + .describe("Maxmum scale ratio"); + DMLC_DECLARE_FIELD(min_random_scale_).set_default(1.0f) + .describe("Minimum scale ratio"); + DMLC_DECLARE_FIELD(max_img_size_).set_default(1e10f) + .describe("Maxmum image size"); + DMLC_DECLARE_FIELD(min_img_size_).set_default(0.0f) + .describe("Minimum image size"); + DMLC_DECLARE_FIELD(mirror_).set_default(false) + .describe("Whether to mirror the image"); + DMLC_DECLARE_FIELD(rotate_).set_default(-1.0f) + .describe("Rotate angle"); + DMLC_DECLARE_FIELD(fill_value_).set_default(255) + .describe("Filled value while padding"); +}; + +/*! \brief helper class to do image augmentation */ +class ImageAugmenter { + public: + // contructor + ImageAugmenter(void) + : tmpres(false), + rotateM(2, 3, CV_32F) { + } + virtual ~ImageAugmenter() { + } + // TODO: Hack the shape and rotate list, didn't use param + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + kwargs_left = param_.InitAllowUnknown(kwargs); + for (size_t i = 0; i < kwargs_left.size(); i++) { + if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { + CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[0], &shape_[1], &shape_[2]) == 3) + << "input_shape must be three consecutive integers without space example: 1,1,200 "; + } + if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { + char* val = kwargs_left[i].second.c_str(); + const char *end = val + strlen(val); + char buf[128]; + while (val < end) { + sscanf(val, "%[^,]", buf); + val += strlen(buf) + 1; + rotate_list_.push_back(atoi(buf)); + } + } + } + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual cv::Mat Process(const cv::Mat &src, + common::RANDOM_ENGINE *prnd) { + // shear + float s = common::NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + // rotate + int angle = common::NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; + if (param_.rotate_ > 0) angle = param_.rotate_; + if (rotate_list_.size() > 0) { + angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; + } + float a = cos(angle / 180.0 * M_PI); + float b = sin(angle / 180.0 * M_PI); + // scale + float scale = NextDouble(prnd) * (param_.max_random_scale_ - param_.min_random_scale_) + param_.min_random_scale_; + // aspect ratio + float ratio = NextDouble(prnd) * param_.max_aspect_ratio_ * 2 - param_.max_aspect_ratio_ + 1; + float hs = 2 * scale / (1 + ratio); + float ws = ratio * hs; + // new width and height + float new_width = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.cols)); + float new_height = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.rows)); + //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); + cv::Mat M(2, 3, CV_32F); + M.at(0, 0) = hs * a - s * b * ws; + M.at(1, 0) = -b * ws; + M.at(0, 1) = hs * b + s * a * ws; + M.at(1, 1) = a * ws; + float ori_center_width = M.at(0, 0) * src.cols + M.at(0, 1) * src.rows; + float ori_center_height = M.at(1, 0) * src.cols + M.at(1, 1) * src.rows; + M.at(0, 2) = (new_width - ori_center_width) / 2; + M.at(1, 2) = (new_height - ori_center_height) / 2; + cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), + cv::INTER_LINEAR, + cv::BORDER_CONSTANT, + cv::Scalar(param_.fill_value_, param_.fill_value_, param_.fill_value_)); + cv::Mat res = temp; + if (param_.max_crop_size_ != -1 || param_.min_crop_size_ != -1){ + CHECK(res.cols >= param_.max_crop_size_ && res.rows >= param_.max_crop_size_&& param_.max_crop_size_ >= param_.min_crop_size_) + << "input image size smaller than max_crop_size"; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; + mshadow::index_t y = res.rows - rand_crop_size; + mshadow::index_t x = res.cols - rand_crop_size; + if (rand_crop_ != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } + else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, rand_crop_size, rand_crop_size); + cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); + } + else{ + utils::Check(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2], + "input image size smaller than input shape"); + mshadow::index_t y = res.rows - shape_[2]; + mshadow::index_t x = res.cols - shape_[1]; + if (param_.rand_crop_ != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } + else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, shape_[1], shape_[2]); + res = res(roi); + } + return res; + } + /*! + * \brief augment src image, store result into dst + * this function is not thread safe, and will only be called by one thread + * however, it will tries to re-use memory space as much as possible + * \param src the source image + * \param source of random number + * \param dst the pointer to the place where we want to store the result + */ + virtual mshadow::Tensor Process(mshadow::Tensor data, + common::RANDOM_ENGINE *prnd) { + if (!NeedProcess()) return data; + cv::Mat res(data.size(1), data.size(2), CV_8UC3); + for (index_t i = 0; i < data.size(1); ++i) { + for (index_t j = 0; j < data.size(2); ++j) { + res.at(i, j)[0] = data[2][i][j]; + res.at(i, j)[1] = data[1][i][j]; + res.at(i, j)[2] = data[0][i][j]; + } + } + res = this->Process(res, prnd); + tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < tmpres.size(1); ++i) { + for (index_t j = 0; j < tmpres.size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + tmpres[0][i][j] = bgr[2]; + tmpres[1][i][j] = bgr[1]; + tmpres[2][i][j] = bgr[0]; + } + } + return tmpres; + } + + virtual void Process(unsigned char *dptr, size_t sz, + mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + cv::Mat buf(1, sz, CV_8U, dptr); + cv::Mat res = cv::imdecode(buf, 1); + res = this->Process(res, prnd); + p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < p_data->size(1); ++i) { + for (index_t j = 0; j < p_data->size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + (*p_data)[0][i][j] = bgr[2]; + (*p_data)[1][i][j] = bgr[1]; + (*p_data)[2][i][j] = bgr[0]; + } + } + res.release(); + } + + private: + // whether skip processing + inline bool NeedProcess(void) const { + if (max_rotate_angle_ > 0 || max_shear_ratio_ > 0.0f + || rotate_ > 0 || rotate_list_.size() > 0) return true; + if (min_crop_size_ > 0 && max_crop_size_ > 0) return true; + return false; + } + // temp input space + mshadow::TensorContainer tmpres; + // temporal space + cv::Mat temp0, temp, temp2; + // rotation param + cv::Mat rotateM; + // parameters + /*! \brief input shape */ + mshadow::Shape<4> shape_; + /*! \brief list of possible rotate angle */ + std::vector rotate_list_; +}; +} // namespace io +} // namespace cxxnet +#endif diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h new file mode 100644 index 000000000000..4aea8aabcb47 --- /dev/null +++ b/src/io/image_recordio.h @@ -0,0 +1,75 @@ +/*! + * \file image_recordio.h + * \brief image recordio struct + */ +#ifndef MXNET_IO_IMAGE_RECORDIO_H_ +#define MXNET_IO_IMAGE_RECORDIO_H_ + +#include +#include + +namespace mxnet { +namespace io { +/*! \brief image recordio struct */ +struct ImageRecordIO { + /*! \brief header in image recordio */ + struct Header { + /*! + * \brief flag of the header, + * used for future extension purposes + */ + uint32_t flag; + /*! + * \brief label field that returns label of images + * when image list was not presented, + * + * NOTE: user do not need to repack recordio just to + * change label field, just supply a list file that + * maps image id to new labels + */ + float label; + /*! + * \brief unique image index + * image_id[1] is always set to 0, + * reserved for future purposes for 128bit id + * image_id[0] is used to store image id + */ + uint64_t image_id[2]; + }; + /*! \brief header of image recordio */ + Header header; + /*! \brief pointer to data content */ + uint8_t *content; + /*! \brief size of the content */ + size_t content_size; + /*! \brief constructor */ + ImageRecordIO(void) + : content(NULL), content_size(0) { + memset(&header, 0, sizeof(header)); + } + /*! \brief get image id from record */ + inline uint64_t image_index(void) const { + return header.image_id[0]; + } + /*! + * \brief load header from a record content + * \param buf the head of record + * \param size the size of the entire record + */ + inline void Load(void *buf, size_t size) { + CHECK(size >= sizeof(header)); + std::memcpy(&header, buf, sizeof(header)); + content = reinterpret_cast(buf) + sizeof(header); + content_size = size - sizeof(header); + } + /*! + * \brief save the record header + */ + inline void SaveHeader(std::string *blob) const { + blob->resize(sizeof(header)); + std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); + } +}; +} // namespace io +} // namespace mxnet +#endif // MXNET_IO_IMAGE_RECORDIO_H_ diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h new file mode 100644 index 000000000000..a0e4ab7e7ba5 --- /dev/null +++ b/src/io/iter_batch.h @@ -0,0 +1,162 @@ +/*! + * \file iter_batch_proc-inl.hpp + * \brief definition of preprocessing iterators that takes an iterator and do some preprocessing + * \author Tianqi Chen + */ +#ifndef MXNET_IO_ITER_BATCH_H_ +#define MXNET_IO_ITER_BATCH_H_ + +#include +#include +#include +#include + +namespace mxnet { +namespace io { +// Batch parameters +struct BatchParam : public dmlc::Parameter { + /*! \brief label width */ + index_t batch_size_; + /*! \brief label width */ + index_t label_width_; + /*! \brief use round roubin to handle overflow batch */ + bool round_batch_; + /*! \brief skip read */ + bool test_skipread_; + /*! \brief silent */ + bool silent_; + // declare parameters + DMLC_DECLARE_PARAMETER(BatchParam) { + DMLC_DECLARE_FIELD(batch_size_).set_default(1) + .describe("Batch size."); + DMLC_DECLARE_FIELD(label_width_).set_default(1) + .describe("Label width."); + DMLC_DECLARE_FIELD(round_batch_).set_default(false) + .describe("Use round robin to handle overflow batch."); + DMLC_DECLARE_FIELD(test_skipread_).set_default(false) + .describe("Skip read for testing."); + DMLC_DECLARE_FIELD(silent_).set_default(false) + .describe("Whether to print batch information.") + } +}; + +/*! \brief create a batch iterator from single instance iterator */ +class BatchAdaptIter: public IIterator { +public: + BatchAdaptIter(IIterator *base): base_(base) { + num_overflow_ = 0; + } + virtual ~BatchAdaptIter(void) { + delete base_; + out_.FreeSpaceDense(); + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init batch param, it could have similar param with + kwargs_left = param_.InitAllowUnknown(kwargs); + for (size_t i = 0; i < kwargs_left.size(); i++) { + if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { + CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) + << "input_shape must be three consecutive integers without space example: 1,1,200 ") + } + } + // init base iterator + base_->Init(kwargs); + mshadow::Shape<4> tshape = shape_; + tshape[0] = param_.batch_size_; + AllocSpaceDense(false); + } + virtual void BeforeFirst(void) { + if (param_.round_batch_ == 0 || num_overflow_ == 0) { + // otherise, we already called before first + base_->BeforeFirst(); + } else { + num_overflow_ = 0; + } + head_ = 1; + } + virtual bool Next(void) { + out_.num_batch_padd = 0; + + // skip read if in head version + if (param_.test_skipread_ != 0 && head_ == 0) return true; + else this->head_ = 0; + + // if overflow from previous round, directly return false, until before first is called + if (num_overflow_ != 0) return false; + index_t top = 0; + + while (base_->Next()) { + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + + if (++ top >= param_.batch_size_) { + out.data[0] = TBlob(data); + out.data[1] = TBlob(label); + return true; + } + } + if (top != 0) { + if (param_.round_batch_ != 0) { + num_overflow_ = 0; + base_->BeforeFirst(); + for (; top < param_.batch_size_; ++top, ++num_overflow_) { + CHECK(base_->Next()) << "number of input must be bigger than batch size"; + const DataInst& d = base_->Value(); + mshadow::Copy(label[top], d.data[1].get()); + out_.inst_index[top] = d.index; + mshadow::Copy(data[top], d.data[0].get()); + } + out_.num_batch_padd = num_overflow_; + } else { + out_.num_batch_padd = batch_size_ - top; + } + out.data[0] = TBlob(data); + out.data[1] = TBlob(label); + return true; + } + return false; + } + virtual const DataBatch &Value(void) const { + CHECK(head_ == 0) << "must call Next to get value"; + return out_; + } +private: + /*! \brief base iterator */ + IIterator *base_; + /*! \brief input shape */ + mshadow::Shape<4> shape_; + /*! \brief output data */ + DataBatch out_; + /*! \brief on first */ + int head_; + /*! \brief number of overflow instances that readed in round_batch mode */ + int num_overflow_; + /*! \brief label information of the data*/ + mshadow::Tensor label; + /*! \brief content of dense data, if this DataBatch is dense */ + mshadow::Tensor data; + // Functions that allocate and free tensor space + inline void AllocSpaceDense(bool pad = false) { + data = mshadow::NewTensor(shape_, 0.0f, pad); + mshadow::Shape<2> lshape = mshadow::Shape2(batch_size, label_width); + label = mshadow::NewTensor(lshape, 0.0f, pad); + out_.inst_index = new unsigned[batch_size]; + out_.batch_size = batch_size; + out_.data.resize(2); + } + /*! \brief auxiliary function to free space, if needed, dense only */ + inline void FreeSpaceDense(void) { + if (label.dptr_ != NULL) { + delete [] inst_index; + mshadow::FreeSpace(&label); + mshadow::FreeSpace(&data); + label.dptr_ = NULL; + } + } +}; // class BatchAdaptIter +} // namespace io +} // namespace cxxnet +#endif // MXNET_IO_ITER_BATCH_H_ \ No newline at end of file diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc new file mode 100644 index 000000000000..2ab1aa8958cb --- /dev/null +++ b/src/io/iter_image_recordio.cc @@ -0,0 +1,369 @@ +/*! + * \file iter_image_recordio-inl.hpp + * \brief recordio data +iterator + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "./inst_vector.h" +#include "./image_recordio.h" +#include "./image_augmenter.h" +#include "../utils/decoder.h" +namespace mxnet { +namespace io { +/*! \brief data structure to hold labels for images */ +class ImageLabelMap { + public: + /*! + * \brief initialize the label list into memory + * \param path_imglist path to the image list + * \param label_width predefined label_width + */ + explicit ImageLabelMap(const char *path_imglist, + mshadow::index_t label_width, + bool silent) { + label_width_ = label_width; + image_index_.clear(); + label_.clear(); + idx2label_.clear(); + dmlc::InputSplit *fi = dmlc::InputSplit::Create + (path_imglist, 0, 1, "text"); + dmlc::InputSplit::Blob rec; + while (fi->NextRecord(&rec)) { + // quick manual parsing + char *p = reinterpret_cast(rec.dptr); + char *end = p + rec.size; + // skip space + while (isspace(*p) && p != end) ++p; + image_index_.push_back(static_cast(atol(p))); + for (size_t i = 0; i < label_width_; ++i) { + // skip till space + while (!isspace(*p) && p != end) ++p; + // skip space + while (isspace(*p) && p != end) ++p; + CHECK(p != end) << "Bad ImageList format"; + label_.push_back(static_cast(atof(p))); + } + } + delete fi; + // be careful not to resize label_ afterwards + idx2label_.reserve(image_index_.size()); + for (size_t i = 0; i < image_index_.size(); ++i) { + idx2label_[image_index_[i]] = BeginPtr(label_) + i * label_width_; + } + if (!silent) { + LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' + << image_index_.size() << " Image records"; + } + } + /*! \brief find a label for corresponding index */ + inline mshadow::Tensor Find(size_t imid) const { + std::unordered_map::const_iterator it + = idx2label_.find(imid); + CHECK(it != idx2label_.end()) << "fail to find imagelabel for id " << imid; + return mshadow::Tensor(it->second, mshadow::Shape1(label_width_)); + } + + private: + // label with_ + mshadow::index_t label_width_; + // image index of each record + std::vector image_index_; + // real label content + std::vector label_; + // map index to label + std::unordered_map idx2label_; +}; + +// Define image record parser parameters +struct ImageRecParserParam : public dmlc::Parameter { + /*! \brief path to image list */ + std::string path_imglist_; + /*! \brief path to image recordio */ + std::string path_imgrec_; + /*! \brief number of threads */ + int nthread_; + /*! \brief whether to remain silent */ + bool silent_; + /*! \brief number of distributed worker */ + int dist_num_worker_, dist_worker_rank_; + /*! \brief label-width */ + int label_width_; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecParserParam) { + DMLC_DECLARE_FIELD(path_imglist_).set_default("") + .describe("Path to image list."); + DMLC_DECLARE_FIELD(path_imagrec_).set_default("./data/imgrec.rec") + .describe("Path to image record file."); + DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) + .describe("Number of thread to do parsing."); + DMLC_DECLARE_FIELD(label_width_).set_lower_bound(1).set_default(1) + .describe("How many labels for an image."); + DMLC_DECLARE_FIELD(silent_).set_default(false) + .describe("Whether to output parser information."); + DMLC_DECLARE_FIELD(dist_num_worker_).set_lower_bound(1).set_default(1) + .describe("Dist worker number."); + DMLC_DECLARE_FIELD(dist_worker_rank_).set_default(0) + .describe("Dist worker rank."); + } +}; + +// parser to parse image recordio +class ImageRecordIOParser { + public: + ImageRecordIOParser(void) + : source_(NULL), + label_map_(NULL) { + } + ~ImageRecordIOParser(void) { + // can be NULL + delete label_map_; + delete source_; + for (size_t i = 0; i < augmenters_.size(); ++i) { + delete augmenters_[i]; + } + for (size_t i = 0; i < prnds_.size(); ++i) { + delete prnds_[i]; + } + } + // initialize the parser + inline void Init(const std::vector >& kwargs); + + // set record to the head + inline void BeforeFirst(void) { + return source_->BeforeFirst(); + } + // parse next set of records, return an array of + // instance vector to the user + inline bool ParseNext(std::vector *out); + private: + // magic nyumber to see prng + static const int kRandMagic = 111; + /*! \brief parameters */ + ImageRecParserParam param_; + /*! \brief augmenters */ + std::vector augmenters_; + /*! \brief random samplers */ + std::vector prnds_; + /*! \brief data source */ + dmlc::InputSplit *source_; + /*! \brief label information, if any */ + ImageLabelMap *label_map_; +}; + +inline void ImageRecordIOParser::Init(const std::vector >& kwargs) { + // initialize parameter + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + int maxthread, threadget; + #pragma omp parallel + { + maxthread = std::max(omp_get_num_procs() / 2 - 1, 1); + } + param_.nthread_ = std::min(maxthread, param_.nthread_); + #pragma omp parallel num_threads(param_.nthread_) + { + threadget = omp_get_num_threads(); + } + param_.nthread_ = threadget; + // setup decoders + for (int i = 0; i < threadget; ++i) { + augmenters_.push_back(new ImageAugmenter()); + augmenters_[i].init(kwargs_left); + prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); + } + + // handling for hadoop + // TODO, hack + const char *ps_rank = getenv("PS_RANK"); + if (ps_rank != NULL) { + param_.dist_worker_rank = atoi(ps_rank); + } + + if (param_.path_imglist_.length() != 0) { + label_map_ = new ImageLabelMap(param_.path_imglist_.c_str(), + param_.label_width_, silent_ != 0); + } else { + param_.label_width_ = 1; + } + CHECK(path_imgrec_.length() != 0) + << "ImageRecordIOIterator: must specify image_rec"; +#if MSHADOW_DIST_PS + // TODO move to a better place + param_.dist_num_worker_ = ::ps::RankSize(); + param_.dist_worker_rank_ = ::ps::MyRank(); + LOG(INFO) << "rank " << param_.dist_worker_rank_ + << " in " << param_.dist_num_worker_; +#endif + source_ = dmlc::InputSplit::Create + (param_.path_imgrec_.c_str(), param_.dist_worker_rank_, + param_.dist_num_worker_, "recordio"); + // use 64 MB chunk when possible + source_->HintChunkSize(8 << 20UL); +} + +inline bool ImageRecordIOParser:: +ParseNext(std::vector *out_vec) { + CHECK(source_ != NULL); + dmlc::InputSplit::Blob chunk; + if (!source_->NextChunk(&chunk)) return false; + out_vec->resize(param_.nthread_); + #pragma omp parallel num_threads(param_.nthread_) + { + CHECK(omp_get_num_threads() == param_.nthread_); + int tid = omp_get_thread_num(); + dmlc::RecordIOChunkReader reader(chunk, tid, parser_.nthread_); + mxnet::ImageRecordIO rec; + dmlc::InputSplit::Blob blob; + // image data + InstVector &out = (*out_vec)[tid]; + out.Clear(); + while (reader.NextRecord(&blob)) { + // result holder + cv::Mat res; + rec.Load(blob.dptr, blob.size); + cv::Mat buf(1, rec.content_size, CV_8U, rec.content); + res = cv::imdecode(buf, 1); + res = augmenters_[tid]->Process(res, prnds_[tid]); + out.Push(static_cast(rec.image_index()), + mshadow::Shape3(3, res.rows, res.cols), + mshadow::Shape1(param_.label_width_)); + DataInst inst = out.Back(); + for (int i = 0; i < res.rows; ++i) { + for (int j = 0; j < res.cols; ++j) { + cv::Vec3b bgr = res.at(i, j); + inst.data[0][i][j] = bgr[2]; + inst.data[1][i][j] = bgr[1]; + inst.data[2][i][j] = bgr[0]; + } + } + if (label_map_ != NULL) { + mshadow::Copy(inst.label, label_map_->Find(rec.image_index())); + } else { + inst.label[0] = rec.header.label; + } + res.release(); + } + } + return true; +} + +// Define image record parameters +struct ImageRecordParam: public dmlc::Parameter { + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief random seed */ + int seed; + // declare parameters + DMLC_DECLARE_PARAMETER(ImageRecordParam) { + DMLC_DECLARE_FIELD(shuffle).set_default(true) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(seed).set_default(0) + .describe("Random Seed."); + } +}; + +// iterator on image recordio +class ImageRecordIter : public IIterator { + public: + ImageRecordIter() + : data_(NULL) { + } + virtual ~ImageRecordIter(void) { + iter_.Destroy(); + // data can be NULL + delete data_; + } + virtual void Init(const std::vector >& kwargs) { + std::vector > kwargs_left; + // init image rec param + kwargs_left = param_.InitAllowUnknown(kwargs); + // use the left kwarg to init parser + parser_.Init(kwargs_left); + // init thread iter + iter_.set_max_capacity(4); + iter_.Init([this](std::vector **dptr) { + if (*dptr == NULL) { + *dptr = new std::vector(); + } + return parser_.ParseNext(*dptr); + }, + [this]() { parser_.BeforeFirst(); }); + inst_ptr_ = 0; + } + virtual void BeforeFirst(void) { + iter_.BeforeFirst(); + inst_order_.clear(); + inst_ptr_ = 0; + } + virtual bool Next(void) { + while (true) { + if (inst_ptr_ < inst_order_.size()) { + std::pair p = inst_order_[inst_ptr_]; + out_ = (*data_)[p.first][p.second]; + ++inst_ptr_; + return true; + } else { + if (data_ != NULL) iter_.Recycle(&data_); + if (!iter_.Next(&data_)) return false; + inst_order_.clear(); + for (unsigned i = 0; i < data_->size(); ++i) { + const InstVector &tmp = (*data_)[i]; + for (unsigned j = 0; j < tmp.Size(); ++j) { + inst_order_.push_back(std::make_pair(i, j)); + } + } + // shuffle instance order if needed + if (shuffle_ != 0) { + std::shuffle(inst_order_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + } + inst_ptr_ = 0; + } + } + return false; + } + virtual const DataInst &Value(void) const { + return out_; + } + + private: + // random magic + static const int kRandMagic = 111; + // output instance + DataInst out_; + // whether shuffle data + int shuffle_; + // data ptr + size_t inst_ptr_; + // internal instance order + std::vector > inst_order_; + // data + std::vector *data_; + // internal parser + ImageRecordIOParser parser_; + // backend thread + dmlc::ThreadedIter > iter_; + // parameters + ImageRecParserParam param_; +}; +DMLC_REGISTER_PARAMETER(ImageRecParserParam); +DMLC_REGISTER_PARAMETER(ImageRecordParam); +MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) +MXNET_REGISTER_IO_CHAINED_ITER(ImageRecordIter, ImageRecordIter, BatchAdaptIter) + .describe("Create iterator for dataset packed in recordio.") + .add_arguments(ImageRecordParam::__FIELDS__()) + .add_arguments(ImageRecParserParam::__FIELDS__()) + .add_arguments(BatchParam::__FIELDS__()) + .add_arguments(ImageAugmenterParam::__FIELDS__()); +} // namespace io +} // namespace mxnet +#endif // ITER_IMAGE_RECORDIO_INL_HPP_ diff --git a/src/utils/decoder.h b/src/utils/decoder.h new file mode 100644 index 000000000000..17203392cc60 --- /dev/null +++ b/src/utils/decoder.h @@ -0,0 +1,128 @@ +#ifndef MXNET_UTILS_DECODER_H_ +#define MXNET_UTILS_DECODER_H_ + +#include +#if MXNET_USE_OPENCV_DECODER == 0 + #include + #include + #include +#endif +#include +#include +#if MXNET_USE_OPENCV + #include +#endif + +namespace cxxnet { +namespace utils { + +#if MXNET_USE_OPENCV_DECODER == 0 +struct JpegDecoder { +public: + JpegDecoder(void) { + cinfo.err = jpeg_std_error(&jerr.base); + jerr.base.error_exit = jerror_exit; + jerr.base.output_message = joutput_message; + jpeg_create_decompress(&cinfo); + } + // destructor + ~JpegDecoder(void) { + jpeg_destroy_decompress(&cinfo); + } + + inline void Decode(unsigned char *ptr, size_t sz, + mshadow::TensorContainer *p_data) { + if(setjmp(jerr.jmp)) { + jpeg_destroy_decompress(&cinfo); + dmlc::Error("Libjpeg fail to decode"); + } + this->jpeg_mem_src(&cinfo, ptr, sz); + CHECK(jpeg_read_header(&cinfo, TRUE) == JPEG_HEADER_OK) << "libjpeg: failed to decode"; + CHECK(jpeg_start_decompress(&cinfo) == true) << "libjpeg: failed to decode"; + p_data->Resize(mshadow::Shape3(cinfo.output_height, cinfo.output_width, cinfo.output_components)); + JSAMPROW jptr = &((*p_data)[0][0][0]); + while (cinfo.output_scanline < cinfo.output_height) { + CHECK(jpeg_read_scanlines(&cinfo, &jptr, 1) == true) << "libjpeg: failed to decode"; + jptr += cinfo.output_width * cinfo.output_components; + } + CHECK(jpeg_finish_decompress(&cinfo) == true) << "libjpeg: failed to decode"); + } +private: + struct jerror_mgr { + jpeg_error_mgr base; + jmp_buf jmp; + }; + + METHODDEF(void) jerror_exit(j_common_ptr jinfo) { + jerror_mgr* err = (jerror_mgr*)jinfo->err; + longjmp(err->jmp, 1); + } + + METHODDEF(void) joutput_message(j_common_ptr) {} + + static boolean mem_fill_input_buffer_ (j_decompress_ptr cinfo) { + dmlc::Error("JpegDecoder: bad jpeg image"); + return true; + } + + static void mem_skip_input_data_ (j_decompress_ptr cinfo, long num_bytes_) { + jpeg_source_mgr *src = cinfo->src; + size_t num_bytes = static_cast(num_bytes_); + if (num_bytes > 0) { + src->next_input_byte += num_bytes; + CHECK(src->bytes_in_buffer >= num_bytes) << "fail to decode"; + src->bytes_in_buffer -= num_bytes; + } else { + dmlc::Error("JpegDecoder: bad jpeg image"); + + } + } + + static void mem_term_source_ (j_decompress_ptr cinfo) {} + static void mem_init_source_ (j_decompress_ptr cinfo) {} + static boolean jpeg_resync_to_restart_(j_decompress_ptr cinfo, int desired) { + dmlc::Error("JpegDecoder: bad jpeg image"); + return true; + } + void jpeg_mem_src (j_decompress_ptr cinfo, void* buffer, long nbytes) { + src.init_source = mem_init_source_; + src.fill_input_buffer = mem_fill_input_buffer_; + src.skip_input_data = mem_skip_input_data_; + src.resync_to_restart = jpeg_resync_to_restart_; + src.term_source = mem_term_source_; + src.bytes_in_buffer = nbytes; + src.next_input_byte = static_cast(buffer); + cinfo->src = &src; + } + +private: + jpeg_decompress_struct cinfo; + jpeg_source_mgr src; + jerror_mgr jerr; +}; +#endif + +#if MXNET_USE_OPENCV +struct OpenCVDecoder { + void Decode(unsigned char *ptr, size_t sz, mshadow::TensorContainer *p_data) { + cv::Mat buf(1, sz, CV_8U, ptr); + cv::Mat res = cv::imdecode(buf, 1); + CHECK(res.data != NULL) << "decoding fail"; + p_data->Resize(mshadow::Shape3(res.rows, res.cols, 3)); + for (int y = 0; y < res.rows; ++y) { + for (int x = 0; x < res.cols; ++x) { + cv::Vec3b bgr = res.at(y, x); + // store in RGB order + (*p_data)[y][x][2] = bgr[0]; + (*p_data)[y][x][1] = bgr[1]; + (*p_data)[y][x][0] = bgr[2]; + } + } + res.release(); + } +}; +#endif +} // namespace utils +} // namespace mxnet + +#endif // DECODER_H diff --git a/src/utils/io.h b/src/utils/io.h new file mode 100644 index 000000000000..3781ce98b012 --- /dev/null +++ b/src/utils/io.h @@ -0,0 +1,175 @@ +#ifndef CXXNET_UTILS_IO_H_ +#define CXXNET_UTILS_IO_H_ +/*! + * \file io.h + * \brief definition of abstract stream interface for IO + * \author Bing Xu Tianqi Chen + */ +#include "./utils.h" +#include +#include +#include +#include + +namespace cxxnet { +namespace utils { +typedef dmlc::Stream IStream; +typedef dmlc::SeekStream ISeekStream; + +/*! \brief a in memory buffer that can be read and write as stream interface */ +struct MemoryBufferStream : public ISeekStream { + public: + MemoryBufferStream(std::string *p_buffer) + : p_buffer_(p_buffer) { + curr_ptr_ = 0; + } + virtual ~MemoryBufferStream(void) {} + virtual size_t Read(void *ptr, size_t size) { + CHECK(curr_ptr_ <= p_buffer_->length()) + << " read can not have position excceed buffer length"; + size_t nread = std::min(p_buffer_->length() - curr_ptr_, size); + if (nread != 0) memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread); + curr_ptr_ += nread; + return nread; + } + virtual void Write(const void *ptr, size_t size) { + if (size == 0) return; + if (curr_ptr_ + size > p_buffer_->length()) { + p_buffer_->resize(curr_ptr_+size); + } + memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); + curr_ptr_ += size; + } + virtual void Seek(size_t pos) { + curr_ptr_ = static_cast(pos); + } + virtual size_t Tell(void) { + return curr_ptr_; + } + + private: + /*! \brief in memory buffer */ + std::string *p_buffer_; + /*! \brief current pointer */ + size_t curr_ptr_; +}; // class MemoryBufferStream + +/*! \brief implementation of file i/o stream */ +class StdFile: public ISeekStream { + public: + /*! \brief constructor */ + StdFile(const char *fname, const char *mode) { + Open(fname, mode); + } + StdFile() {} + virtual ~StdFile(void) { + this->Close(); + } + virtual void Open(const char *fname, const char *mode) { + fp_ = utils::FopenCheck(fname, mode); + fseek(fp_, 0L, SEEK_END); + sz_ = ftell(fp_); + fseek(fp_, 0L, SEEK_SET); + } + virtual size_t Read(void *ptr, size_t size) { + return fread(ptr, size, 1, fp_); + } + virtual void Write(const void *ptr, size_t size) { + fwrite(ptr, size, 1, fp_); + } + virtual void Seek(size_t pos) { + fseek(fp_, pos, SEEK_SET); + } + virtual size_t Tell(void) { + return static_cast(ftell(fp_)); + } + inline void Close(void) { + if (fp_ != NULL){ + fclose(fp_); fp_ = NULL; + } + } + inline size_t Size() { + return sz_; + } + private: + FILE *fp_; + size_t sz_; +}; // class StdFile + +/*! \brief Basic page class */ +class BinaryPage { + public: + /*! \brief page size 64 MB */ + static const size_t kPageSize = 64 << 18; + public: + /*! \brief memory data object */ + struct Obj{ + /*! \brief pointer to the data*/ + void *dptr; + /*! \brief size */ + size_t sz; + Obj(void * dptr, size_t sz) : dptr(dptr), sz(sz){} + }; + public: + /*! \brief constructor of page */ + BinaryPage(void) { + data_ = new int[kPageSize]; + utils::Check(data_ != NULL, "fail to allocate page, out of space"); + this->Clear(); + }; + ~BinaryPage() { + if (data_) delete [] data_; + } + /*! + * \brief load one page form instream + * \return true if loading is successful + */ + inline bool Load(utils::IStream &fi) { + return fi.Read(&data_[0], sizeof(int)*kPageSize) !=0; + } + /*! \brief save one page into outstream */ + inline void Save(utils::IStream &fo) { + fo.Write(&data_[0], sizeof(int)*kPageSize); + } + /*! \return number of elements */ + inline int Size(void){ + return data_[0]; + } + /*! \brief Push one binary object into page + * \param fname file name of obj need to be pushed into + * \return false or true to push into + */ + inline bool Push(const Obj &dat) { + if(this->FreeBytes() < dat.sz + sizeof(int)) return false; + data_[ Size() + 2 ] = data_[ Size() + 1 ] + dat.sz; + memcpy(this->offset(data_[ Size() + 2 ]), dat.dptr, dat.sz); + ++ data_[0]; + return true; + } + /*! \brief Clear the page */ + inline void Clear(void) { + memset(&data_[0], 0, sizeof(int) * kPageSize); + } + /*! + * \brief Get one binary object from page + * \param r r th obj in the page + */ + inline Obj operator[](int r) { + CHECK(r < Size()); + return Obj(this->offset(data_[ r + 2 ]), data_[ r + 2 ] - data_[ r + 1 ]); + } + private: + /*! \return number of elements */ + inline size_t FreeBytes(void) { + return (kPageSize - (Size() + 2)) * sizeof(int) - data_[ Size() + 1 ]; + } + inline void* offset(int pos) { + return (char*)(&data_[0]) + (kPageSize*sizeof(int) - pos); + } + private: + //int data_[ kPageSize ]; + int *data_; +}; // class BinaryPage +} // namespace utils +} // namespace cxxnet +#endif diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h new file mode 100644 index 000000000000..7df1ae17aa56 --- /dev/null +++ b/src/utils/thread_buffer.h @@ -0,0 +1,205 @@ +#ifndef CXXNET_UTILS_THREAD_BUFFER_H_ +#define CXXNET_UTILS_THREAD_BUFFER_H_ +/*! + * \file thread_buffer.h + * \brief multi-thread buffer, iterator, can be used to create parallel pipeline + * \author Tianqi Chen + */ +#include +#include +#include +#include "./utils.h" +#include "./thread.h" +namespace cxxnet { +namespace utils { +/*! + * \brief buffered loading iterator that uses multithread + * this template method will assume the following paramters + * \tparam Elem elememt type to be buffered + * \tparam ElemFactory factory type to implement in order to use thread buffer + */ +template +class ThreadBuffer { + public: + /*!\brief constructor */ + ThreadBuffer(void) { + this->init_end = false; + this->buf_size = 30; + } + ~ThreadBuffer(void) { + if(init_end) this->Destroy(); + } + /*!\brief set parameter, will also pass the parameter to factory */ + inline void SetParam(const char *name, const char *val) { + if (!strcmp( name, "buffer_size")) buf_size = atoi(val); + factory.SetParam(name, val); + } + /*! + * \brief initalize the buffered iterator + * \param param a initialize parameter that will pass to factory, ignore it if not necessary + * \return false if the initlization can't be done, e.g. buffer file hasn't been created + */ + inline bool Init(void) { + if (!factory.Init()) return false; + bufA.reserve(buf_size); + bufB.reserve(buf_size); + for (int i = 0; i < buf_size; ++i) { + bufA.push_back(factory.Create()); + bufB.push_back(factory.Create()); + } + this->init_end = true; + this->StartLoader(); + return true; + } + /*!\brief place the iterator before first value */ + inline void BeforeFirst(void) { + // wait till last loader end + loading_end.Wait(); + // critcal zone + current_buf = 1; + factory.BeforeFirst(); + // reset terminate limit + endA = endB = buf_size; + // wake up loader for first part + loading_need.Post(); + // wait til first part is loaded + loading_end.Wait(); + // set current buf to right value + current_buf = 0; + // wake loader for next part + data_loaded = false; + loading_need.Post(); + // set buffer value + buf_index = 0; + } + /*! \brief destroy the buffer iterator, will deallocate the buffer */ + inline void Destroy(void) { + // wait until the signal is consumed + this->destroy_signal = true; + loading_need.Post(); + loader_thread.Join(); + loading_need.Destroy(); + loading_end.Destroy(); + for (size_t i = 0; i < bufA.size(); ++i) { + factory.FreeSpace(bufA[i]); + } + for (size_t i = 0; i < bufB.size(); ++i) { + factory.FreeSpace(bufB[i]); + } + bufA.clear(); bufB.clear(); + factory.Destroy(); + this->init_end = false; + } + /*! + * \brief get the next element needed in buffer + * \param elem element to store into + * \return whether reaches end of data + */ + inline bool Next(Elem &elem) { + // end of buffer try to switch + if (buf_index == buf_size) { + this->SwitchBuffer(); + buf_index = 0; + } + if (buf_index >= (current_buf ? endA : endB)) { + return false; + } + std::vector &buf = current_buf ? bufA : bufB; + elem = buf[buf_index]; + ++buf_index; + return true; + } + /*! + * \brief get the factory object + */ + inline ElemFactory &get_factory(void) { + return factory; + } + inline const ElemFactory &get_factory(void) const{ + return factory; + } + // size of buffer + int buf_size; + private: + // factory object used to load configures + ElemFactory factory; + // index in current buffer + int buf_index; + // indicate which one is current buffer + int current_buf; + // max limit of visit, also marks termination + int endA, endB; + // double buffer, one is accessed by loader + // the other is accessed by consumer + // buffer of the data + std::vector bufA, bufB; + // initialization end + bool init_end; + // singal whether the data is loaded + bool data_loaded; + // signal to kill the thread + bool destroy_signal; + // thread object + Thread loader_thread; + // signal of the buffer + Semaphore loading_end, loading_need; + /*! + * \brief slave thread + * this implementation is like producer-consumer style + */ + inline void RunLoader(void) { + while(!destroy_signal) { + // sleep until loading is needed + loading_need.Wait(); + std::vector &buf = current_buf ? bufB : bufA; + int i; + for (i = 0; i < buf_size ; ++i) { + if (!factory.LoadNext(buf[i])) { + int &end = current_buf ? endB : endA; + end = i; // marks the termination + break; + } + } + // signal that loading is done + data_loaded = true; + loading_end.Post(); + } + } + /*!\brief entry point of loader thread */ + inline static CXXNET_THREAD_PREFIX LoaderEntry(void *pthread) { + static_cast< ThreadBuffer* >(pthread)->RunLoader(); + ThreadExit(NULL); + return NULL; + } + /*!\brief start loader thread */ + inline void StartLoader(void) { + destroy_signal = false; + // set param + current_buf = 1; + loading_need.Init(1); + loading_end .Init(0); + // reset terminate limit + endA = endB = buf_size; + loader_thread.Start(LoaderEntry, this); + // wait until first part of data is loaded + loading_end.Wait(); + // set current buf to right value + current_buf = 0; + // wake loader for next part + data_loaded = false; + loading_need.Post(); + buf_index = 0; + } + /*!\brief switch double buffer */ + inline void SwitchBuffer(void) { + loading_end.Wait(); + // loader shall be sleep now, critcal zone! + current_buf = !current_buf; + // wake up loader + data_loaded = false; + loading_need.Post(); + } +}; +} // namespace utils +} // namespace cxxnet +#endif From 9cad774d5c46fd154700a2504c370f6aa74b32ac Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Fri, 4 Sep 2015 01:15:31 +0800 Subject: [PATCH 09/15] pass compilation, not tested --- Makefile | 23 +++- include/mxnet/io.h | 2 +- make/config.mk | 4 +- src/common/utils.h | 5 +- src/io/image_augmenter.h | 21 ++-- src/io/inst_vector.h | 11 +- src/io/io.cc | 14 ++- src/io/iter_batch.h | 36 +++--- src/io/iter_image_recordio.cc | 38 ++++--- src/utils/decoder.h | 2 +- src/utils/io.h | 175 ----------------------------- src/utils/thread_buffer.h | 205 ---------------------------------- tests/python/test_io.py | 15 +++ 13 files changed, 106 insertions(+), 445 deletions(-) delete mode 100644 src/utils/io.h delete mode 100644 src/utils/thread_buffer.h diff --git a/Makefile b/Makefile index d758c443241e..5f9dcb83c3d7 100644 --- a/Makefile +++ b/Makefile @@ -13,10 +13,15 @@ ifndef DMLC_CORE endif +ifneq ($(USE_OPENMP_ITER), 1) + export NO_OPENMP = 1 +endif + # use customized config file include $(config) include mshadow/make/mshadow.mk include $(DMLC_CORE)/make/dmlc.mk +unexport NO_OPENMP # all tge possible warning tread WARNFLAGS= -Wall @@ -39,10 +44,21 @@ endif # setup opencv ifeq ($(USE_OPENCV),1) - CFLAGS+= -DCXXNET_USE_OPENCV=1 + CFLAGS+= -DMXNET_USE_OPENCV=1 LDFLAGS+= `pkg-config --libs opencv` else - CFLAGS+= -DCXXNET_USE_OPENCV=0 + CFLAGS+= -DMXNET_USE_OPENCV=0 +endif + +# setup opencv +ifeq ($(USE_OPENCV_DECODER),1) + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=1 +else + CFLAGS+= -DMXNET_USE_OPENCV_DECODER=0 +endif + +ifeq ($(USE_OPENMP_ITER), 1) + CFLAGS += -fopenmp endif ifeq ($(USE_CUDNN), 1) @@ -62,7 +78,7 @@ endif ENGINE=naive_engine.o BIN = tests/test_simple_engine OBJ = narray_function_cpu.o -OBJCXX11 = narray.o c_api.o operator.o symbol.o storage.o static_graph.o graph_executor.o io.o iter_mnist.o $(ENGINE) +OBJCXX11 = narray.o c_api.o operator.o symbol.o storage.o static_graph.o graph_executor.o io.o iter_mnist.o iter_image_recordio.o $(ENGINE) CUOBJ = narray_function_gpu.o SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -92,6 +108,7 @@ operator.o: src/operator/operator.cc c_api.o: src/c_api.cc io.o: src/io/io.cc iter_mnist.o: src/io/iter_mnist.cc src/io/*.h +iter_image_recordio.o: src/io/iter_image_recordio.cc # Rules for operators OPERATOR_HDR=$(wildcard src/operator/*-inl.h) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 5a8267befc1c..7bb86f4eece3 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -121,7 +121,7 @@ struct DataIteratorReg */ #define MXNET_REGISTER_IO_CHAINED_ITER(name, ChainedDataIterType, HoldingDataIterType) \ static ::mxnet::IIterator* __create__ ## ChainedDataIteratorType ## __() { \ - return new HoldingDataIteratorType(new ChainedDataIterType); \ + return new HoldingDataIterType(new ChainedDataIterType); \ } \ DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ .set_body(__create__ ## ChainedDataIteratorType ## __) diff --git a/make/config.mk b/make/config.mk index cd04b146180c..3e93e240e493 100644 --- a/make/config.mk +++ b/make/config.mk @@ -27,8 +27,8 @@ USE_CUDA_PATH = NONE # whether use opencv during compilation # you can disable it, however, you will not able to use # imbin iterator -USE_OPENCV = 0 -USE_OPENCV_DECODER = 0 +USE_OPENCV = 1 +USE_OPENCV_DECODER = 1 # whether use CUDNN R3 library USE_CUDNN = 0 # add the path to CUDNN libary to link and compile flag diff --git a/src/common/utils.h b/src/common/utils.h index f7a2dcce0470..b5edb78bd6f9 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -21,12 +21,11 @@ namespace common { * \brief Random Engine */ typedef std::mt19937 RANDOM_ENGINE; - // Get a double float, prnd is the pointer to a Random Engine #define NextDouble(prnd) std::generate_canonical(*prnd) +// Get a random int in [0, range) +#define NextUInt32(range, prnd) static_cast(floor(std::generate_canonical(*prnd) * range)) -#define NextUInt32(range, prnd) static_cast(\ - floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. */ diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index d33464c4a889..3ca373d768b0 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -19,7 +19,6 @@ struct ImageAugmentParam : public dmlc::Parameter { int crop_y_start_; /*! \brief whether we do nonrandom croping */ int crop_x_start_; - /*! \brief Indicate the max ratation angle for augmentation, we will random rotate */ /*! \brief [-max_rotate_angle, max_rotate_angle] */ int max_rotate_angle_; /*! \brief max aspect ratio */ @@ -77,6 +76,7 @@ struct ImageAugmentParam : public dmlc::Parameter { .describe("Rotate angle"); DMLC_DECLARE_FIELD(fill_value_).set_default(255) .describe("Filled value while padding"); + } }; /*! \brief helper class to do image augmentation */ @@ -99,7 +99,7 @@ class ImageAugmenter { << "input_shape must be three consecutive integers without space example: 1,1,200 "; } if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { - char* val = kwargs_left[i].second.c_str(); + const char* val = kwargs_left[i].second.c_str(); const char *end = val + strlen(val); char buf[128]; while (val < end) { @@ -121,9 +121,9 @@ class ImageAugmenter { virtual cv::Mat Process(const cv::Mat &src, common::RANDOM_ENGINE *prnd) { // shear - float s = common::NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + float s = NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; // rotate - int angle = common::NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; + int angle = NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; if (param_.rotate_ > 0) angle = param_.rotate_; if (rotate_list_.size() > 0) { angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; @@ -160,7 +160,7 @@ class ImageAugmenter { mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; - if (rand_crop_ != 0) { + if (param_.rand_crop_ != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); } @@ -171,8 +171,8 @@ class ImageAugmenter { cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); } else{ - utils::Check(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2], - "input image size smaller than input shape"); + CHECK(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2]) + << "input image size smaller than input shape"; mshadow::index_t y = res.rows - shape_[2]; mshadow::index_t x = res.cols - shape_[1]; if (param_.rand_crop_ != 0) { @@ -240,9 +240,9 @@ class ImageAugmenter { private: // whether skip processing inline bool NeedProcess(void) const { - if (max_rotate_angle_ > 0 || max_shear_ratio_ > 0.0f - || rotate_ > 0 || rotate_list_.size() > 0) return true; - if (min_crop_size_ > 0 && max_crop_size_ > 0) return true; + if (param_.max_rotate_angle_ > 0 || param_.max_shear_ratio_ > 0.0f + || param_.rotate_ > 0 || rotate_list_.size() > 0) return true; + if (param_.min_crop_size_ > 0 && param_.max_crop_size_ > 0) return true; return false; } // temp input space @@ -252,6 +252,7 @@ class ImageAugmenter { // rotation param cv::Mat rotateM; // parameters + ImageAugmentParam param_; /*! \brief input shape */ mshadow::Shape<4> shape_; /*! \brief list of possible rotate angle */ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 9490ceab94c1..4ced7dd64c63 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -7,10 +7,11 @@ #ifndef MXNET_INST_VECTOR_H_ #define MXNET_INST_VECTOR_H_ -#include "./data.h" -#include +#include +#include #include #include +#include namespace mxnet { namespace io { @@ -30,7 +31,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - ((DType*)BeginPtr(content_) + offset_[i], shape_[i]); + ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -73,8 +74,8 @@ class InstVector { inline DataInst operator[](size_t i) const { DataInst inst; inst.index = index_[i]; - inst.data = data_[i]; - inst.label = label_[i]; + inst.data.push_back(TBlob(data_[i])); + inst.data.push_back(TBlob(label_[i])); return inst; } // get back of instance vector diff --git a/src/io/io.cc b/src/io/io.cc index 9095f4089c92..b2dbc9f8c2c5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -4,14 +4,18 @@ #include #include -#include -#include <> -#include +#include "./image_augmenter.h" +#include "./iter_batch.h" // Registers namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + +namespace mxnet { +namespace io { // Register parameters in header files DMLC_REGISTER_PARAMETER(BatchParam); -DMLC_REGISTER_PARAMETER(ImageAugmenterParam); -} // namespace dmlc \ No newline at end of file +DMLC_REGISTER_PARAMETER(ImageAugmentParam); +} // namespace mxnet +} // namespace io diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index a0e4ab7e7ba5..f258bc2d6afd 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -36,7 +36,7 @@ struct BatchParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(test_skipread_).set_default(false) .describe("Skip read for testing."); DMLC_DECLARE_FIELD(silent_).set_default(false) - .describe("Whether to print batch information.") + .describe("Whether to print batch information."); } }; @@ -48,7 +48,7 @@ class BatchAdaptIter: public IIterator { } virtual ~BatchAdaptIter(void) { delete base_; - out_.FreeSpaceDense(); + FreeSpaceDense(); } virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; @@ -57,7 +57,7 @@ class BatchAdaptIter: public IIterator { for (size_t i = 0; i < kwargs_left.size(); i++) { if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 ") + << "input_shape must be three consecutive integers without space example: 1,1,200 "; } } // init base iterator @@ -88,13 +88,13 @@ class BatchAdaptIter: public IIterator { while (base_->Next()) { const DataInst& d = base_->Value(); - mshadow::Copy(label[top], d.data[1].get()); + mshadow::Copy(label[top], d.data[1].get()); out_.inst_index[top] = d.index; - mshadow::Copy(data[top], d.data[0].get()); + mshadow::Copy(data[top], d.data[0].get()); if (++ top >= param_.batch_size_) { - out.data[0] = TBlob(data); - out.data[1] = TBlob(label); + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); return true; } } @@ -105,16 +105,16 @@ class BatchAdaptIter: public IIterator { for (; top < param_.batch_size_; ++top, ++num_overflow_) { CHECK(base_->Next()) << "number of input must be bigger than batch size"; const DataInst& d = base_->Value(); - mshadow::Copy(label[top], d.data[1].get()); + mshadow::Copy(label[top], d.data[1].get()); out_.inst_index[top] = d.index; - mshadow::Copy(data[top], d.data[0].get()); + mshadow::Copy(data[top], d.data[0].get()); } out_.num_batch_padd = num_overflow_; } else { - out_.num_batch_padd = batch_size_ - top; + out_.num_batch_padd = param_.batch_size_ - top; } - out.data[0] = TBlob(data); - out.data[1] = TBlob(label); + out_.data[0] = TBlob(data); + out_.data[1] = TBlob(label); return true; } return false; @@ -124,6 +124,8 @@ class BatchAdaptIter: public IIterator { return out_; } private: + /*! \brief batch parameters */ + BatchParam param_; /*! \brief base iterator */ IIterator *base_; /*! \brief input shape */ @@ -141,16 +143,16 @@ class BatchAdaptIter: public IIterator { // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { data = mshadow::NewTensor(shape_, 0.0f, pad); - mshadow::Shape<2> lshape = mshadow::Shape2(batch_size, label_width); + mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size_, param_.label_width_); label = mshadow::NewTensor(lshape, 0.0f, pad); - out_.inst_index = new unsigned[batch_size]; - out_.batch_size = batch_size; + out_.inst_index = new unsigned[param_.batch_size_]; + out_.batch_size = param_.batch_size_; out_.data.resize(2); } /*! \brief auxiliary function to free space, if needed, dense only */ inline void FreeSpaceDense(void) { if (label.dptr_ != NULL) { - delete [] inst_index; + delete [] out_.inst_index; mshadow::FreeSpace(&label); mshadow::FreeSpace(&data); label.dptr_ = NULL; @@ -159,4 +161,4 @@ class BatchAdaptIter: public IIterator { }; // class BatchAdaptIter } // namespace io } // namespace cxxnet -#endif // MXNET_IO_ITER_BATCH_H_ \ No newline at end of file +#endif // MXNET_IO_ITER_BATCH_H_ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 2ab1aa8958cb..9977ddd2290c 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -16,6 +16,7 @@ iterator #include "./inst_vector.h" #include "./image_recordio.h" #include "./image_augmenter.h" +#include "./iter_batch.h" #include "../utils/decoder.h" namespace mxnet { namespace io { @@ -57,7 +58,7 @@ class ImageLabelMap { // be careful not to resize label_ afterwards idx2label_.reserve(image_index_.size()); for (size_t i = 0; i < image_index_.size(); ++i) { - idx2label_[image_index_[i]] = BeginPtr(label_) + i * label_width_; + idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width_; } if (!silent) { LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' @@ -101,7 +102,7 @@ struct ImageRecParserParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(ImageRecParserParam) { DMLC_DECLARE_FIELD(path_imglist_).set_default("") .describe("Path to image list."); - DMLC_DECLARE_FIELD(path_imagrec_).set_default("./data/imgrec.rec") + DMLC_DECLARE_FIELD(path_imgrec_).set_default("./data/imgrec.rec") .describe("Path to image record file."); DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) .describe("Number of thread to do parsing."); @@ -178,7 +179,7 @@ inline void ImageRecordIOParser::Init(const std::vectorInit(kwargs_left); prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); } @@ -186,16 +187,16 @@ inline void ImageRecordIOParser::Init(const std::vector *out_vec) { { CHECK(omp_get_num_threads() == param_.nthread_); int tid = omp_get_thread_num(); - dmlc::RecordIOChunkReader reader(chunk, tid, parser_.nthread_); - mxnet::ImageRecordIO rec; + dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread_); + ImageRecordIO rec; dmlc::InputSplit::Blob blob; // image data InstVector &out = (*out_vec)[tid]; @@ -238,18 +239,21 @@ ParseNext(std::vector *out_vec) { mshadow::Shape3(3, res.rows, res.cols), mshadow::Shape1(param_.label_width_)); DataInst inst = out.Back(); + // turn datainst into tensor + mshadow::Tensor data = inst.data[0].get(); + mshadow::Tensor label = inst.data[1].get(); for (int i = 0; i < res.rows; ++i) { for (int j = 0; j < res.cols; ++j) { cv::Vec3b bgr = res.at(i, j); - inst.data[0][i][j] = bgr[2]; - inst.data[1][i][j] = bgr[1]; - inst.data[2][i][j] = bgr[0]; + data[0][i][j] = bgr[2]; + data[1][i][j] = bgr[1]; + data[2][i][j] = bgr[0]; } } if (label_map_ != NULL) { - mshadow::Copy(inst.label, label_map_->Find(rec.image_index())); + mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { - inst.label[0] = rec.header.label; + label[0] = rec.header.label; } res.release(); } @@ -324,7 +328,7 @@ class ImageRecordIter : public IIterator { } // shuffle instance order if needed if (shuffle_ != 0) { - std::shuffle(inst_order_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + std::shuffle(inst_order_.begin(), inst_order_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); } inst_ptr_ = 0; } @@ -353,17 +357,15 @@ class ImageRecordIter : public IIterator { // backend thread dmlc::ThreadedIter > iter_; // parameters - ImageRecParserParam param_; + ImageRecordParam param_; }; DMLC_REGISTER_PARAMETER(ImageRecParserParam); DMLC_REGISTER_PARAMETER(ImageRecordParam); -MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) MXNET_REGISTER_IO_CHAINED_ITER(ImageRecordIter, ImageRecordIter, BatchAdaptIter) .describe("Create iterator for dataset packed in recordio.") .add_arguments(ImageRecordParam::__FIELDS__()) .add_arguments(ImageRecParserParam::__FIELDS__()) .add_arguments(BatchParam::__FIELDS__()) - .add_arguments(ImageAugmenterParam::__FIELDS__()); + .add_arguments(ImageAugmentParam::__FIELDS__()); } // namespace io } // namespace mxnet -#endif // ITER_IMAGE_RECORDIO_INL_HPP_ diff --git a/src/utils/decoder.h b/src/utils/decoder.h index 17203392cc60..52db01edee23 100644 --- a/src/utils/decoder.h +++ b/src/utils/decoder.h @@ -13,7 +13,7 @@ #include #endif -namespace cxxnet { +namespace mxnet { namespace utils { #if MXNET_USE_OPENCV_DECODER == 0 diff --git a/src/utils/io.h b/src/utils/io.h deleted file mode 100644 index 3781ce98b012..000000000000 --- a/src/utils/io.h +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef CXXNET_UTILS_IO_H_ -#define CXXNET_UTILS_IO_H_ -/*! - * \file io.h - * \brief definition of abstract stream interface for IO - * \author Bing Xu Tianqi Chen - */ -#include "./utils.h" -#include -#include -#include -#include - -namespace cxxnet { -namespace utils { -typedef dmlc::Stream IStream; -typedef dmlc::SeekStream ISeekStream; - -/*! \brief a in memory buffer that can be read and write as stream interface */ -struct MemoryBufferStream : public ISeekStream { - public: - MemoryBufferStream(std::string *p_buffer) - : p_buffer_(p_buffer) { - curr_ptr_ = 0; - } - virtual ~MemoryBufferStream(void) {} - virtual size_t Read(void *ptr, size_t size) { - CHECK(curr_ptr_ <= p_buffer_->length()) - << " read can not have position excceed buffer length"; - size_t nread = std::min(p_buffer_->length() - curr_ptr_, size); - if (nread != 0) memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread); - curr_ptr_ += nread; - return nread; - } - virtual void Write(const void *ptr, size_t size) { - if (size == 0) return; - if (curr_ptr_ + size > p_buffer_->length()) { - p_buffer_->resize(curr_ptr_+size); - } - memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); - curr_ptr_ += size; - } - virtual void Seek(size_t pos) { - curr_ptr_ = static_cast(pos); - } - virtual size_t Tell(void) { - return curr_ptr_; - } - - private: - /*! \brief in memory buffer */ - std::string *p_buffer_; - /*! \brief current pointer */ - size_t curr_ptr_; -}; // class MemoryBufferStream - -/*! \brief implementation of file i/o stream */ -class StdFile: public ISeekStream { - public: - /*! \brief constructor */ - StdFile(const char *fname, const char *mode) { - Open(fname, mode); - } - StdFile() {} - virtual ~StdFile(void) { - this->Close(); - } - virtual void Open(const char *fname, const char *mode) { - fp_ = utils::FopenCheck(fname, mode); - fseek(fp_, 0L, SEEK_END); - sz_ = ftell(fp_); - fseek(fp_, 0L, SEEK_SET); - } - virtual size_t Read(void *ptr, size_t size) { - return fread(ptr, size, 1, fp_); - } - virtual void Write(const void *ptr, size_t size) { - fwrite(ptr, size, 1, fp_); - } - virtual void Seek(size_t pos) { - fseek(fp_, pos, SEEK_SET); - } - virtual size_t Tell(void) { - return static_cast(ftell(fp_)); - } - inline void Close(void) { - if (fp_ != NULL){ - fclose(fp_); fp_ = NULL; - } - } - inline size_t Size() { - return sz_; - } - private: - FILE *fp_; - size_t sz_; -}; // class StdFile - -/*! \brief Basic page class */ -class BinaryPage { - public: - /*! \brief page size 64 MB */ - static const size_t kPageSize = 64 << 18; - public: - /*! \brief memory data object */ - struct Obj{ - /*! \brief pointer to the data*/ - void *dptr; - /*! \brief size */ - size_t sz; - Obj(void * dptr, size_t sz) : dptr(dptr), sz(sz){} - }; - public: - /*! \brief constructor of page */ - BinaryPage(void) { - data_ = new int[kPageSize]; - utils::Check(data_ != NULL, "fail to allocate page, out of space"); - this->Clear(); - }; - ~BinaryPage() { - if (data_) delete [] data_; - } - /*! - * \brief load one page form instream - * \return true if loading is successful - */ - inline bool Load(utils::IStream &fi) { - return fi.Read(&data_[0], sizeof(int)*kPageSize) !=0; - } - /*! \brief save one page into outstream */ - inline void Save(utils::IStream &fo) { - fo.Write(&data_[0], sizeof(int)*kPageSize); - } - /*! \return number of elements */ - inline int Size(void){ - return data_[0]; - } - /*! \brief Push one binary object into page - * \param fname file name of obj need to be pushed into - * \return false or true to push into - */ - inline bool Push(const Obj &dat) { - if(this->FreeBytes() < dat.sz + sizeof(int)) return false; - data_[ Size() + 2 ] = data_[ Size() + 1 ] + dat.sz; - memcpy(this->offset(data_[ Size() + 2 ]), dat.dptr, dat.sz); - ++ data_[0]; - return true; - } - /*! \brief Clear the page */ - inline void Clear(void) { - memset(&data_[0], 0, sizeof(int) * kPageSize); - } - /*! - * \brief Get one binary object from page - * \param r r th obj in the page - */ - inline Obj operator[](int r) { - CHECK(r < Size()); - return Obj(this->offset(data_[ r + 2 ]), data_[ r + 2 ] - data_[ r + 1 ]); - } - private: - /*! \return number of elements */ - inline size_t FreeBytes(void) { - return (kPageSize - (Size() + 2)) * sizeof(int) - data_[ Size() + 1 ]; - } - inline void* offset(int pos) { - return (char*)(&data_[0]) + (kPageSize*sizeof(int) - pos); - } - private: - //int data_[ kPageSize ]; - int *data_; -}; // class BinaryPage -} // namespace utils -} // namespace cxxnet -#endif diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h deleted file mode 100644 index 7df1ae17aa56..000000000000 --- a/src/utils/thread_buffer.h +++ /dev/null @@ -1,205 +0,0 @@ -#ifndef CXXNET_UTILS_THREAD_BUFFER_H_ -#define CXXNET_UTILS_THREAD_BUFFER_H_ -/*! - * \file thread_buffer.h - * \brief multi-thread buffer, iterator, can be used to create parallel pipeline - * \author Tianqi Chen - */ -#include -#include -#include -#include "./utils.h" -#include "./thread.h" -namespace cxxnet { -namespace utils { -/*! - * \brief buffered loading iterator that uses multithread - * this template method will assume the following paramters - * \tparam Elem elememt type to be buffered - * \tparam ElemFactory factory type to implement in order to use thread buffer - */ -template -class ThreadBuffer { - public: - /*!\brief constructor */ - ThreadBuffer(void) { - this->init_end = false; - this->buf_size = 30; - } - ~ThreadBuffer(void) { - if(init_end) this->Destroy(); - } - /*!\brief set parameter, will also pass the parameter to factory */ - inline void SetParam(const char *name, const char *val) { - if (!strcmp( name, "buffer_size")) buf_size = atoi(val); - factory.SetParam(name, val); - } - /*! - * \brief initalize the buffered iterator - * \param param a initialize parameter that will pass to factory, ignore it if not necessary - * \return false if the initlization can't be done, e.g. buffer file hasn't been created - */ - inline bool Init(void) { - if (!factory.Init()) return false; - bufA.reserve(buf_size); - bufB.reserve(buf_size); - for (int i = 0; i < buf_size; ++i) { - bufA.push_back(factory.Create()); - bufB.push_back(factory.Create()); - } - this->init_end = true; - this->StartLoader(); - return true; - } - /*!\brief place the iterator before first value */ - inline void BeforeFirst(void) { - // wait till last loader end - loading_end.Wait(); - // critcal zone - current_buf = 1; - factory.BeforeFirst(); - // reset terminate limit - endA = endB = buf_size; - // wake up loader for first part - loading_need.Post(); - // wait til first part is loaded - loading_end.Wait(); - // set current buf to right value - current_buf = 0; - // wake loader for next part - data_loaded = false; - loading_need.Post(); - // set buffer value - buf_index = 0; - } - /*! \brief destroy the buffer iterator, will deallocate the buffer */ - inline void Destroy(void) { - // wait until the signal is consumed - this->destroy_signal = true; - loading_need.Post(); - loader_thread.Join(); - loading_need.Destroy(); - loading_end.Destroy(); - for (size_t i = 0; i < bufA.size(); ++i) { - factory.FreeSpace(bufA[i]); - } - for (size_t i = 0; i < bufB.size(); ++i) { - factory.FreeSpace(bufB[i]); - } - bufA.clear(); bufB.clear(); - factory.Destroy(); - this->init_end = false; - } - /*! - * \brief get the next element needed in buffer - * \param elem element to store into - * \return whether reaches end of data - */ - inline bool Next(Elem &elem) { - // end of buffer try to switch - if (buf_index == buf_size) { - this->SwitchBuffer(); - buf_index = 0; - } - if (buf_index >= (current_buf ? endA : endB)) { - return false; - } - std::vector &buf = current_buf ? bufA : bufB; - elem = buf[buf_index]; - ++buf_index; - return true; - } - /*! - * \brief get the factory object - */ - inline ElemFactory &get_factory(void) { - return factory; - } - inline const ElemFactory &get_factory(void) const{ - return factory; - } - // size of buffer - int buf_size; - private: - // factory object used to load configures - ElemFactory factory; - // index in current buffer - int buf_index; - // indicate which one is current buffer - int current_buf; - // max limit of visit, also marks termination - int endA, endB; - // double buffer, one is accessed by loader - // the other is accessed by consumer - // buffer of the data - std::vector bufA, bufB; - // initialization end - bool init_end; - // singal whether the data is loaded - bool data_loaded; - // signal to kill the thread - bool destroy_signal; - // thread object - Thread loader_thread; - // signal of the buffer - Semaphore loading_end, loading_need; - /*! - * \brief slave thread - * this implementation is like producer-consumer style - */ - inline void RunLoader(void) { - while(!destroy_signal) { - // sleep until loading is needed - loading_need.Wait(); - std::vector &buf = current_buf ? bufB : bufA; - int i; - for (i = 0; i < buf_size ; ++i) { - if (!factory.LoadNext(buf[i])) { - int &end = current_buf ? endB : endA; - end = i; // marks the termination - break; - } - } - // signal that loading is done - data_loaded = true; - loading_end.Post(); - } - } - /*!\brief entry point of loader thread */ - inline static CXXNET_THREAD_PREFIX LoaderEntry(void *pthread) { - static_cast< ThreadBuffer* >(pthread)->RunLoader(); - ThreadExit(NULL); - return NULL; - } - /*!\brief start loader thread */ - inline void StartLoader(void) { - destroy_signal = false; - // set param - current_buf = 1; - loading_need.Init(1); - loading_end .Init(0); - // reset terminate limit - endA = endB = buf_size; - loader_thread.Start(LoaderEntry, this); - // wait until first part of data is loaded - loading_end.Wait(); - // set current buf to right value - current_buf = 0; - // wake loader for next part - data_loaded = false; - loading_need.Post(); - buf_index = 0; - } - /*!\brief switch double buffer */ - inline void SwitchBuffer(void) { - loading_end.Wait(); - // loader shall be sleep now, critcal zone! - current_buf = !current_buf; - // wake up loader - data_loaded = false; - loading_need.Post(); - } -}; -} // namespace utils -} // namespace cxxnet -#endif diff --git a/tests/python/test_io.py b/tests/python/test_io.py index dfeb3f67c293..991a4813033e 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -39,3 +39,18 @@ def test_MNISTIter_reset(): label_1 = train_dataiter.getlabel().numpy.flatten() assert(sum(label_0 - label_1) == 0) +def test_ImageRecIter(): + dataiter = mx.io.ImageRecordIter(path_imgrec="data/val_cxxnet.rec", + image_mean="data/val_cxxnet_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape="3,224,224", + batch_size=128) + + + + + + + + From 2566b3e9dcb90e9322a5f9cfa24c27ffba1d0c29 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 6 Sep 2015 07:47:04 +0800 Subject: [PATCH 10/15] merge augmenter, modify param attribute --- src/io/image_augmenter.h | 279 +++++++++++++++++++++++++--------- src/io/iter_batch.h | 55 ++++--- src/io/iter_image_recordio.cc | 76 ++++----- 3 files changed, 275 insertions(+), 135 deletions(-) diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index 3ca373d768b0..a81e5297d5b3 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -1,7 +1,7 @@ /*! * \file image_augmenter_opencv.hpp * \brief threaded version of page iterator - * \author Naiyan Wang, Tianqi Chen + * \author Naiyan Wang, Tianqi Chen, Tianjun Xiao */ #ifndef MXNET_IO_IMAGE_AUGMENTER_H_ #define MXNET_IO_IMAGE_AUGMENTER_H_ @@ -14,68 +14,102 @@ namespace io { /*! \brief image augmentation parameters*/ struct ImageAugmentParam : public dmlc::Parameter { /*! \brief whether we do random cropping */ - bool rand_crop_; + bool rand_crop; /*! \brief whether we do nonrandom croping */ - int crop_y_start_; + int crop_y_start; /*! \brief whether we do nonrandom croping */ - int crop_x_start_; + int crop_x_start; /*! \brief [-max_rotate_angle, max_rotate_angle] */ - int max_rotate_angle_; + int max_rotate_angle; /*! \brief max aspect ratio */ - float max_aspect_ratio_; + float max_aspect_ratio; /*! \brief random shear the image [-max_shear_ratio, max_shear_ratio] */ - float max_shear_ratio_; + float max_shear_ratio; /*! \brief max crop size */ - int max_crop_size_; + int max_crop_size; /*! \brief min crop size */ - int min_crop_size_; + int min_crop_size; /*! \brief max scale ratio */ - float max_random_scale_; + float max_random_scale; /*! \brief min scale_ratio */ - float min_random_scale_; + float min_random_scale; /*! \brief min image size */ - float min_img_size_; + float min_img_size; /*! \brief max image size */ - float max_img_size_; - /*! \brief whether to mirror the image */ - bool mirror_; + float max_img_size; /*! \brief rotate angle */ - int rotate_; + int rotate; /*! \brief filled color while padding */ - int fill_value_; + int fill_value; + /*! \brief whether to mirror the image */ + bool mirror; + /*! \brief whether to perform rand mirror the image */ + bool rand_mirror; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief mean value for r channel */ + float mean_r; + /*! \brief mean value for g channel */ + float mean_g; + /*! \brief mean value for b channel */ + float mean_b; + /*! \brief shape of the image data*/ + TShape input_shape; + /*! \brief maximum ratio of contrast variation */ + float max_random_contrast_; + /*! \brief maximum value of illumination variation */ + float max_random_illumination_; // declare parameters // TODO: didn't understand the range for some params DMLC_DECLARE_PARAMETER(ImageAugmentParam) { DMLC_DECLARE_FIELD(rand_crop_).set_default(true) .describe("Whether we de random cropping"); - DMLC_DECLARE_FIELD(crop_y_start_).set_default(-1) + DMLC_DECLARE_FIELD(crop_y_start).set_default(-1) .describe("Where to nonrandom crop on y"); - DMLC_DECLARE_FIELD(crop_x_start_).set_default(-1) + DMLC_DECLARE_FIELD(crop_x_start).set_default(-1) .describe("Where to nonrandom crop on x"); - DMLC_DECLARE_FIELD(max_rotate_angle_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_rotate_angle).set_default(0.0f) .describe("Rotate can be [-max_rotate_angle, max_rotate_angle]"); - DMLC_DECLARE_FIELD(max_aspect_ratio_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_aspect_ratio).set_default(0.0f) .describe("Max aspect ratio"); - DMLC_DECLARE_FIELD(max_shear_ratio_).set_default(0.0f) + DMLC_DECLARE_FIELD(max_shear_ratio).set_default(0.0f) .describe("Shear rotate can be made between [-max_shear_ratio_, max_shear_ratio_]"); - DMLC_DECLARE_FIELD(max_crop_size_).set_default(-1) + DMLC_DECLARE_FIELD(max_crop_size).set_default(-1) .describe("Maximum crop size"); - DMLC_DECLARE_FIELD(min_crop_size_).set_default(-1) + DMLC_DECLARE_FIELD(min_crop_size).set_default(-1) .describe("Minimum crop size"); - DMLC_DECLARE_FIELD(max_random_scale_).set_default(1.0f) + DMLC_DECLARE_FIELD(max_random_scale).set_default(1.0f) .describe("Maxmum scale ratio"); - DMLC_DECLARE_FIELD(min_random_scale_).set_default(1.0f) + DMLC_DECLARE_FIELD(min_random_scale).set_default(1.0f) .describe("Minimum scale ratio"); - DMLC_DECLARE_FIELD(max_img_size_).set_default(1e10f) + DMLC_DECLARE_FIELD(max_img_size).set_default(1e10f) .describe("Maxmum image size"); - DMLC_DECLARE_FIELD(min_img_size_).set_default(0.0f) + DMLC_DECLARE_FIELD(min_img_size).set_default(0.0f) .describe("Minimum image size"); - DMLC_DECLARE_FIELD(mirror_).set_default(false) - .describe("Whether to mirror the image"); - DMLC_DECLARE_FIELD(rotate_).set_default(-1.0f) + DMLC_DECLARE_FIELD(rotate).set_default(-1.0f) .describe("Rotate angle"); - DMLC_DECLARE_FIELD(fill_value_).set_default(255) + DMLC_DECLARE_FIELD(fill_value).set_default(255) .describe("Filled value while padding"); + DMLC_DECLARE_FIELD(mirror).set_default(false) + .describe("Whether to mirror the image"); + DMLC_DECLARE_FIELD(rand_mirror).set_default(false) + .describe("Whether to mirror the image randomly"); + DMLC_DECLARE_FIELD(mean_img).set_default("") + .describe("Mean Image to be subtracted"); + DMLC_DECLARE_FIELD(mean_r).set_default(0.0f) + .describe("Mean value on R channel"); + DMLC_DECLARE_FIELD(mean_g).set_default(0.0f) + .describe("Mean value on G channel"); + DMLC_DECLARE_FIELD(mean_b).set_default(0.0f) + .describe("Mean value on B channel"); + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(max_random_contrast).set_default(0.0f) + .describe("Maximum ratio of contrast variation"); + DMLC_DECLARE_FIELD(max_random_illumination).set_default(0.0f) + .describe("Maximum value of illumination variation"); } }; @@ -84,8 +118,8 @@ class ImageAugmenter { public: // contructor ImageAugmenter(void) - : tmpres(false), - rotateM(2, 3, CV_32F) { + : tmpres_(false), + rotateM_(2, 3, CV_32F) { } virtual ~ImageAugmenter() { } @@ -94,10 +128,6 @@ class ImageAugmenter { std::vector > kwargs_left; kwargs_left = param_.InitAllowUnknown(kwargs); for (size_t i = 0; i < kwargs_left.size(); i++) { - if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { - CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[0], &shape_[1], &shape_[2]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 "; - } if (!strcmp(kwargs_left[i].first.c_str(), "rotate_list")) { const char* val = kwargs_left[i].second.c_str(); const char *end = val + strlen(val); @@ -109,6 +139,19 @@ class ImageAugmenter { } } } + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + this->CreateMeanImg(); + } else { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } } /*! * \brief augment src image, store result into dst @@ -118,27 +161,27 @@ class ImageAugmenter { * \param source of random number * \param dst the pointer to the place where we want to store the result */ - virtual cv::Mat Process(const cv::Mat &src, + virtual cv::Mat OpencvProcess(const cv::Mat &src, common::RANDOM_ENGINE *prnd) { // shear - float s = NextDouble(prnd) * param_.max_shear_ratio_ * 2 - param_.max_shear_ratio_; + float s = NextDouble(prnd) * param_.max_shear_ratio * 2 - param_.max_shear_ratio; // rotate - int angle = NextUInt32(param_.max_rotate_angle_ * 2, prnd) - param_.max_rotate_angle_; - if (param_.rotate_ > 0) angle = param_.rotate_; + int angle = NextUInt32(param_.max_rotate_angle * 2, prnd) - param_.max_rotate_angle; + if (param_.rotate > 0) angle = param_.rotate; if (rotate_list_.size() > 0) { angle = rotate_list_[NextUInt32(rotate_list_.size() - 1, prnd)]; } float a = cos(angle / 180.0 * M_PI); float b = sin(angle / 180.0 * M_PI); // scale - float scale = NextDouble(prnd) * (param_.max_random_scale_ - param_.min_random_scale_) + param_.min_random_scale_; + float scale = NextDouble(prnd) * (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; // aspect ratio - float ratio = NextDouble(prnd) * param_.max_aspect_ratio_ * 2 - param_.max_aspect_ratio_ + 1; + float ratio = NextDouble(prnd) * param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; float hs = 2 * scale / (1 + ratio); float ws = ratio * hs; // new width and height - float new_width = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.cols)); - float new_height = std::max(param_.min_img_size_, std::min(param_.max_img_size_, scale * src.rows)); + float new_width = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.cols)); + float new_height = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.rows)); //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); cv::Mat M(2, 3, CV_32F); M.at(0, 0) = hs * a - s * b * ws; @@ -152,15 +195,16 @@ class ImageAugmenter { cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), cv::INTER_LINEAR, cv::BORDER_CONSTANT, - cv::Scalar(param_.fill_value_, param_.fill_value_, param_.fill_value_)); + cv::Scalar(param_.fill_value, param_.fill_value, param_.fill_value)); cv::Mat res = temp; - if (param_.max_crop_size_ != -1 || param_.min_crop_size_ != -1){ - CHECK(res.cols >= param_.max_crop_size_ && res.rows >= param_.max_crop_size_&& param_.max_crop_size_ >= param_.min_crop_size_) + // crop + if (param_.max_crop_size != -1 || param_.min_crop_size != -1){ + CHECK(res.cols >= param_.max_crop_size && res.rows >= param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) << "input image size smaller than max_crop_size"; - mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size_- param_.min_crop_size_+1, prnd)+ param_.min_crop_size_; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size- param_.min_crop_size+1, prnd)+ param_.min_crop_size; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; - if (param_.rand_crop_ != 0) { + if (param_.rand_crop != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); } @@ -168,13 +212,13 @@ class ImageAugmenter { y /= 2; x /= 2; } cv::Rect roi(x, y, rand_crop_size, rand_crop_size); - cv::resize(res(roi), res, cv::Size(shape_[1], shape_[2])); + cv::resize(res(roi), res, cv::Size(param_.input_shape[1], param_.input_shape[2])); } else{ - CHECK(static_cast(res.cols) >= shape_[1] && static_cast(res.rows) >= shape_[2]) + CHECK(static_cast(res.cols) >= param_.input_shape[1] && static_cast(res.rows) >= param_.input_shape[2]) << "input image size smaller than input shape"; - mshadow::index_t y = res.rows - shape_[2]; - mshadow::index_t x = res.cols - shape_[1]; + mshadow::index_t y = res.rows - param_.input_shape[2]; + mshadow::index_t x = res.cols - param_.input_shape[1]; if (param_.rand_crop_ != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); @@ -182,7 +226,7 @@ class ImageAugmenter { else { y /= 2; x /= 2; } - cv::Rect roi(x, y, shape_[1], shape_[2]); + cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); res = res(roi); } return res; @@ -195,9 +239,9 @@ class ImageAugmenter { * \param source of random number * \param dst the pointer to the place where we want to store the result */ - virtual mshadow::Tensor Process(mshadow::Tensor data, + virtual mshadow::Tensor OpencvProcess(mshadow::Tensor data, common::RANDOM_ENGINE *prnd) { - if (!NeedProcess()) return data; + if (!NeedOpencvProcess()) return data; cv::Mat res(data.size(1), data.size(2), CV_8UC3); for (index_t i = 0; i < data.size(1); ++i) { for (index_t j = 0; j < data.size(2); ++j) { @@ -206,7 +250,7 @@ class ImageAugmenter { res.at(i, j)[2] = data[0][i][j]; } } - res = this->Process(res, prnd); + res = this->OpencvProcess(res, prnd); tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < tmpres.size(1); ++i) { for (index_t j = 0; j < tmpres.size(2); ++j) { @@ -219,12 +263,12 @@ class ImageAugmenter { return tmpres; } - virtual void Process(unsigned char *dptr, size_t sz, + virtual void OpencvProcess(unsigned char *dptr, size_t sz, mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { cv::Mat buf(1, sz, CV_8U, dptr); cv::Mat res = cv::imdecode(buf, 1); - res = this->Process(res, prnd); + res = this->OpencvProcess(res, prnd); p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < p_data->size(1); ++i) { for (index_t j = 0; j < p_data->size(2); ++j) { @@ -237,20 +281,117 @@ class ImageAugmenter { res.release(); } + void TensorProcess(mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); + if (param_.input_shape[1] == 1) { + img_ = (*p_data) * param_.scale; + } else { + CHECK(p_data->size(1) >= param_.input_shape[1] && p_data->size(2) >= param_.input_shape[2]) + << "Data size must be bigger than the input size to net."; + mshadow::index_t yy = p_data->size(1) - param_.input_shape[1]; + mshadow::index_t xx = p_data->size(2) - param_.input_shape[2]; + if (param_.rand_crop != 0 && (yy != 0 || xx != 0)) { + yy = NextUInt32(yy + 1, prnd); + xx = NextUInt32(xx + 1, prnd); + } else { + yy /= 2; xx /= 2; + } + if (p_data->size(1) != param_.input_shape[1] && param_.crop_y_start != -1) { + yy = param_.crop_y_start; + } + if (p_data->size(2) != param_.input_shape[2] && param_.crop_x_start != -1) { + xx = param_.crop_x_start; + } + float contrast = NextDouble(prnd) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1; + float illumination = NextDouble(prnd) * param_.max_random_illumination * 2 - param_.max_random_illumination; + if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { + // substract mean value + (*p_data)[0] -= param_.mean_b; (*p_data)[1] -= param_.mean_g; (*p_data)[2] -= param_.mean_r; + if ((param_.rand_mirror != 0 && NextDouble(rnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale ; + } + } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { + // do not substract anything + if (param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) { + img_ = mirror(crop((*p_data), img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale ; + } + } else { + // substract mean image + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + if (p_data->shape_ == meanimg_.shape_) { + img_ = mirror(crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + } else { + img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + } + } else { + if (p_data->shape_ == meanimg_.shape_){ + img_ = crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale; + } else { + img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + } + } + } + } + out_.data = img_; + } + + inline void CreateMeanImg(void) { + if (silent_ == 0) { + printf("cannot find %s: create mean image, this will take some time...\n", name_meanimg_.c_str()); + } + time_t start = time(NULL); + unsigned long elapsed = 0; + size_t imcnt = 1; + + CHECK(this->Next_()) << "input iterator failed."; + meanimg_.Resize(mshadow::Shape3(shape_[0], shape_[1], shape_[2])); + mshadow::Copy(meanimg_, img_); + while (this->Next()) { + meanimg_ += img_; imcnt += 1; + elapsed = (long)(time(NULL) - start); + if (imcnt % 1000 == 0 && silent_ == 0) { + printf("\r \r"); + printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); + fflush(stdout); + } + } + meanimg_ *= (1.0f / imcnt); + + dmlc::Stream *fo = dmlc::Stream::Create(name_meanimg_.c_str(), "w"); + meanimg_.SaveBinary(*fo); + delete fo; + if (silent_ == 0) { + printf("save mean image to %s..\n", name_meanimg_.c_str()); + } + meanfile_ready_ = true; + } + + private: - // whether skip processing - inline bool NeedProcess(void) const { - if (param_.max_rotate_angle_ > 0 || param_.max_shear_ratio_ > 0.0f - || param_.rotate_ > 0 || rotate_list_.size() > 0) return true; - if (param_.min_crop_size_ > 0 && param_.max_crop_size_ > 0) return true; + // whether skip opencv processing + inline bool NeedOpencvProcess(void) const { + if (param_.max_rotate_angle > 0 || param_.max_shear_ratio > 0.0f + || param_.rotate > 0 || rotate_list_.size() > 0) return true; + if (param_.min_crop_size > 0 && param_.max_crop_size > 0) return true; return false; } // temp input space - mshadow::TensorContainer tmpres; + mshadow::TensorContainer tmpres_; + // mean image + mshadow::TensorContainer meanimg_; + /*! \brief temp space */ + mshadow::TensorContainer img_; // temporal space - cv::Mat temp0, temp, temp2; + cv::Mat temp_; // rotation param - cv::Mat rotateM; + cv::Mat rotateM_; + // whether the mean file is ready + bool menafile_ready_; // parameters ImageAugmentParam param_; /*! \brief input shape */ diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index f258bc2d6afd..4d95b92cce1e 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -16,26 +16,33 @@ namespace io { // Batch parameters struct BatchParam : public dmlc::Parameter { /*! \brief label width */ - index_t batch_size_; + index_t batch_size; + /*! \brief input shape */ + // TODO: haven't modify all shape_ + TShape input_shape; /*! \brief label width */ - index_t label_width_; + index_t label_width; /*! \brief use round roubin to handle overflow batch */ - bool round_batch_; + bool round_batch; /*! \brief skip read */ - bool test_skipread_; + bool test_skipread; /*! \brief silent */ - bool silent_; + bool silent; // declare parameters DMLC_DECLARE_PARAMETER(BatchParam) { - DMLC_DECLARE_FIELD(batch_size_).set_default(1) + DMLC_DECLARE_FIELD(batch_size) .describe("Batch size."); - DMLC_DECLARE_FIELD(label_width_).set_default(1) + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(label_width).set_default(1) .describe("Label width."); - DMLC_DECLARE_FIELD(round_batch_).set_default(false) + DMLC_DECLARE_FIELD(round_batch).set_default(false) .describe("Use round robin to handle overflow batch."); - DMLC_DECLARE_FIELD(test_skipread_).set_default(false) + DMLC_DECLARE_FIELD(test_skipread).set_default(false) .describe("Skip read for testing."); - DMLC_DECLARE_FIELD(silent_).set_default(false) + DMLC_DECLARE_FIELD(silent).set_default(false) .describe("Whether to print batch information."); } }; @@ -54,20 +61,14 @@ class BatchAdaptIter: public IIterator { std::vector > kwargs_left; // init batch param, it could have similar param with kwargs_left = param_.InitAllowUnknown(kwargs); - for (size_t i = 0; i < kwargs_left.size(); i++) { - if (!strcmp(kwargs_left[i].first.c_str(), "input_shape")) { - CHECK(sscanf(kwargs_left[i].second.c_str(), "%u,%u,%u", &shape_[1], &shape_[2], &shape_[3]) == 3) - << "input_shape must be three consecutive integers without space example: 1,1,200 "; - } - } // init base iterator base_->Init(kwargs); mshadow::Shape<4> tshape = shape_; - tshape[0] = param_.batch_size_; + tshape[0] = param_.batch_size; AllocSpaceDense(false); } virtual void BeforeFirst(void) { - if (param_.round_batch_ == 0 || num_overflow_ == 0) { + if (param_.round_batch == 0 || num_overflow_ == 0) { // otherise, we already called before first base_->BeforeFirst(); } else { @@ -79,7 +80,7 @@ class BatchAdaptIter: public IIterator { out_.num_batch_padd = 0; // skip read if in head version - if (param_.test_skipread_ != 0 && head_ == 0) return true; + if (param_.test_skipread != 0 && head_ == 0) return true; else this->head_ = 0; // if overflow from previous round, directly return false, until before first is called @@ -92,17 +93,17 @@ class BatchAdaptIter: public IIterator { out_.inst_index[top] = d.index; mshadow::Copy(data[top], d.data[0].get()); - if (++ top >= param_.batch_size_) { + if (++ top >= param_.batch_size) { out_.data[0] = TBlob(data); out_.data[1] = TBlob(label); return true; } } if (top != 0) { - if (param_.round_batch_ != 0) { + if (param_.round_batch != 0) { num_overflow_ = 0; base_->BeforeFirst(); - for (; top < param_.batch_size_; ++top, ++num_overflow_) { + for (; top < param_.batch_size; ++top, ++num_overflow_) { CHECK(base_->Next()) << "number of input must be bigger than batch size"; const DataInst& d = base_->Value(); mshadow::Copy(label[top], d.data[1].get()); @@ -111,7 +112,7 @@ class BatchAdaptIter: public IIterator { } out_.num_batch_padd = num_overflow_; } else { - out_.num_batch_padd = param_.batch_size_ - top; + out_.num_batch_padd = param_.batch_size - top; } out_.data[0] = TBlob(data); out_.data[1] = TBlob(label); @@ -128,8 +129,6 @@ class BatchAdaptIter: public IIterator { BatchParam param_; /*! \brief base iterator */ IIterator *base_; - /*! \brief input shape */ - mshadow::Shape<4> shape_; /*! \brief output data */ DataBatch out_; /*! \brief on first */ @@ -143,10 +142,10 @@ class BatchAdaptIter: public IIterator { // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { data = mshadow::NewTensor(shape_, 0.0f, pad); - mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size_, param_.label_width_); + mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); - out_.inst_index = new unsigned[param_.batch_size_]; - out_.batch_size = param_.batch_size_; + out_.inst_index = new unsigned[param_.batch_size]; + out_.batch_size = param_.batch_size; out_.data.resize(2); } /*! \brief auxiliary function to free space, if needed, dense only */ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 9977ddd2290c..1589fd5ad6c7 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -31,7 +31,7 @@ class ImageLabelMap { explicit ImageLabelMap(const char *path_imglist, mshadow::index_t label_width, bool silent) { - label_width_ = label_width; + label_width = label_width; image_index_.clear(); label_.clear(); idx2label_.clear(); @@ -45,7 +45,7 @@ class ImageLabelMap { // skip space while (isspace(*p) && p != end) ++p; image_index_.push_back(static_cast(atol(p))); - for (size_t i = 0; i < label_width_; ++i) { + for (size_t i = 0; i < label_width; ++i) { // skip till space while (!isspace(*p) && p != end) ++p; // skip space @@ -58,7 +58,7 @@ class ImageLabelMap { // be careful not to resize label_ afterwards idx2label_.reserve(image_index_.size()); for (size_t i = 0; i < image_index_.size(); ++i) { - idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width_; + idx2label_[image_index_[i]] = dmlc::BeginPtr(label_) + i * label_width; } if (!silent) { LOG(INFO) << "Loaded ImageList from " << path_imglist << ' ' @@ -70,12 +70,12 @@ class ImageLabelMap { std::unordered_map::const_iterator it = idx2label_.find(imid); CHECK(it != idx2label_.end()) << "fail to find imagelabel for id " << imid; - return mshadow::Tensor(it->second, mshadow::Shape1(label_width_)); + return mshadow::Tensor(it->second, mshadow::Shape1(label_width)); } private: // label with_ - mshadow::index_t label_width_; + mshadow::index_t label_width; // image index of each record std::vector image_index_; // real label content @@ -87,32 +87,32 @@ class ImageLabelMap { // Define image record parser parameters struct ImageRecParserParam : public dmlc::Parameter { /*! \brief path to image list */ - std::string path_imglist_; + std::string path_imglist; /*! \brief path to image recordio */ - std::string path_imgrec_; + std::string path_imgrec; /*! \brief number of threads */ - int nthread_; + int nthread; /*! \brief whether to remain silent */ - bool silent_; + bool silent; /*! \brief number of distributed worker */ - int dist_num_worker_, dist_worker_rank_; + int dist_num_worker, dist_worker_rank; /*! \brief label-width */ - int label_width_; + int label_width; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecParserParam) { - DMLC_DECLARE_FIELD(path_imglist_).set_default("") + DMLC_DECLARE_FIELD(path_imglist).set_default("") .describe("Path to image list."); - DMLC_DECLARE_FIELD(path_imgrec_).set_default("./data/imgrec.rec") + DMLC_DECLARE_FIELD(path_imgrec).set_default("./data/imgrec.rec") .describe("Path to image record file."); - DMLC_DECLARE_FIELD(nthread_).set_lower_bound(1).set_default(4) + DMLC_DECLARE_FIELD(nthread).set_lower_bound(1).set_default(4) .describe("Number of thread to do parsing."); - DMLC_DECLARE_FIELD(label_width_).set_lower_bound(1).set_default(1) + DMLC_DECLARE_FIELD(label_width).set_lower_bound(1).set_default(1) .describe("How many labels for an image."); - DMLC_DECLARE_FIELD(silent_).set_default(false) + DMLC_DECLARE_FIELD(silent).set_default(false) .describe("Whether to output parser information."); - DMLC_DECLARE_FIELD(dist_num_worker_).set_lower_bound(1).set_default(1) + DMLC_DECLARE_FIELD(dist_num_worker).set_lower_bound(1).set_default(1) .describe("Dist worker number."); - DMLC_DECLARE_FIELD(dist_worker_rank_).set_default(0) + DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); } }; @@ -170,12 +170,12 @@ inline void ImageRecordIOParser::Init(const std::vectorHintChunkSize(8 << 20UL); } @@ -217,12 +217,12 @@ ParseNext(std::vector *out_vec) { CHECK(source_ != NULL); dmlc::InputSplit::Blob chunk; if (!source_->NextChunk(&chunk)) return false; - out_vec->resize(param_.nthread_); - #pragma omp parallel num_threads(param_.nthread_) + out_vec->resize(param_.nthread); + #pragma omp parallel num_threads(param_.nthread) { - CHECK(omp_get_num_threads() == param_.nthread_); + CHECK(omp_get_num_threads() == param_.nthread); int tid = omp_get_thread_num(); - dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread_); + dmlc::RecordIOChunkReader reader(chunk, tid, param_.nthread); ImageRecordIO rec; dmlc::InputSplit::Blob blob; // image data @@ -237,7 +237,7 @@ ParseNext(std::vector *out_vec) { res = augmenters_[tid]->Process(res, prnds_[tid]); out.Push(static_cast(rec.image_index()), mshadow::Shape3(3, res.rows, res.cols), - mshadow::Shape1(param_.label_width_)); + mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor mshadow::Tensor data = inst.data[0].get(); From bb6006376e3c431371ac1e0a358950ff0625235b Mon Sep 17 00:00:00 2001 From: tianjun Date: Sun, 6 Sep 2015 10:05:36 +0800 Subject: [PATCH 11/15] call augprocess in base iter --- src/io/image_augmenter.h | 40 +++++++++++++++++------------------ src/io/iter_batch.h | 4 ++-- src/io/iter_image_recordio.cc | 24 +++++++-------------- 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index a81e5297d5b3..38efcc58e61a 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -263,24 +263,6 @@ class ImageAugmenter { return tmpres; } - virtual void OpencvProcess(unsigned char *dptr, size_t sz, - mshadow::TensorContainer *p_data, - common::RANDOM_ENGINE *prnd) { - cv::Mat buf(1, sz, CV_8U, dptr); - cv::Mat res = cv::imdecode(buf, 1); - res = this->OpencvProcess(res, prnd); - p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); - for (index_t i = 0; i < p_data->size(1); ++i) { - for (index_t j = 0; j < p_data->size(2); ++j) { - cv::Vec3b bgr = res.at(i, j); - (*p_data)[0][i][j] = bgr[2]; - (*p_data)[1][i][j] = bgr[1]; - (*p_data)[2][i][j] = bgr[0]; - } - } - res.release(); - } - void TensorProcess(mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); @@ -337,7 +319,7 @@ class ImageAugmenter { } } } - out_.data = img_; + (*p_data) = img_; } inline void CreateMeanImg(void) { @@ -371,7 +353,25 @@ class ImageAugmenter { meanfile_ready_ = true; } - + virtual void Process(unsigned char *dptr, size_t sz, + mshadow::TensorContainer *p_data, + common::RANDOM_ENGINE *prnd) { + cv::Mat buf(1, sz, CV_8U, dptr); + cv::Mat res = cv::imdecode(buf, 1); + res = this->OpencvProcess(res, prnd); + p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < p_data->size(1); ++i) { + for (index_t j = 0; j < p_data->size(2); ++j) { + cv::Vec3b bgr = res.at(i, j); + (*p_data)[0][i][j] = bgr[2]; + (*p_data)[1][i][j] = bgr[1]; + (*p_data)[2][i][j] = bgr[0]; + } + } + res.release(); + this->TensorProcess(p_data, prnd); + } + private: // whether skip opencv processing inline bool NeedOpencvProcess(void) const { diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index 4d95b92cce1e..7fe8f4440513 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -63,7 +63,7 @@ class BatchAdaptIter: public IIterator { kwargs_left = param_.InitAllowUnknown(kwargs); // init base iterator base_->Init(kwargs); - mshadow::Shape<4> tshape = shape_; + mshadow::Shape<4> tshape = param_.input_shape; tshape[0] = param_.batch_size; AllocSpaceDense(false); } @@ -141,7 +141,7 @@ class BatchAdaptIter: public IIterator { mshadow::Tensor data; // Functions that allocate and free tensor space inline void AllocSpaceDense(bool pad = false) { - data = mshadow::NewTensor(shape_, 0.0f, pad); + data = mshadow::NewTensor(param_.input_shape, 0.0f, pad); mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); out_.inst_index = new unsigned[param_.batch_size]; diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 1589fd5ad6c7..0c44a2346e4a 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -98,6 +98,8 @@ struct ImageRecParserParam : public dmlc::Parameter { int dist_num_worker, dist_worker_rank; /*! \brief label-width */ int label_width; + /*! \brief input shape */ + TShape input_shape; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecParserParam) { DMLC_DECLARE_FIELD(path_imglist).set_default("") @@ -114,6 +116,10 @@ struct ImageRecParserParam : public dmlc::Parameter { .describe("Dist worker number."); DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); + float input_shape_default = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + .set_expect_ndim(3).enforce_nonzero() + .describe("Input shape of the neural net"); } }; @@ -229,33 +235,19 @@ ParseNext(std::vector *out_vec) { InstVector &out = (*out_vec)[tid]; out.Clear(); while (reader.NextRecord(&blob)) { - // result holder - cv::Mat res; - rec.Load(blob.dptr, blob.size); - cv::Mat buf(1, rec.content_size, CV_8U, rec.content); - res = cv::imdecode(buf, 1); - res = augmenters_[tid]->Process(res, prnds_[tid]); out.Push(static_cast(rec.image_index()), - mshadow::Shape3(3, res.rows, res.cols), + mshadow::Shape3(param_.input_shape[0], param_.input_shape[0], param_.input_shape[0]), mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor mshadow::Tensor data = inst.data[0].get(); mshadow::Tensor label = inst.data[1].get(); - for (int i = 0; i < res.rows; ++i) { - for (int j = 0; j < res.cols; ++j) { - cv::Vec3b bgr = res.at(i, j); - data[0][i][j] = bgr[2]; - data[1][i][j] = bgr[1]; - data[2][i][j] = bgr[0]; - } - } + augmenters_[tid]->Process(rec.content, rec.content_size, &data, prnd); if (label_map_ != NULL) { mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { label[0] = rec.header.label; } - res.release(); } } return true; From 0913273a7ad6154218db920de4950d2e2b7466d7 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Tue, 8 Sep 2015 01:13:59 +0800 Subject: [PATCH 12/15] recio works --- src/common/utils.h | 5 +- src/io/image_augmenter.h | 192 ++++++++++++++++++---------------- src/io/image_recordio.h | 8 +- src/io/inst_vector.h | 16 +-- src/io/io.cc | 2 +- src/io/iter_batch.h | 51 +++++---- src/io/iter_image_recordio.cc | 99 ++++++++++++++---- src/utils/decoder.h | 128 ----------------------- tests/python/test_io.py | 70 +++++++++---- 9 files changed, 273 insertions(+), 298 deletions(-) delete mode 100644 src/utils/decoder.h diff --git a/src/common/utils.h b/src/common/utils.h index b5edb78bd6f9..29cb9f0e2f2a 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -22,9 +22,10 @@ namespace common { */ typedef std::mt19937 RANDOM_ENGINE; // Get a double float, prnd is the pointer to a Random Engine -#define NextDouble(prnd) std::generate_canonical(*prnd) +#define NextDouble(prnd) std::generate_canonical(*prnd) // Get a random int in [0, range) -#define NextUInt32(range, prnd) static_cast(floor(std::generate_canonical(*prnd) * range)) +#define NextUInt32(range, prnd) static_cast \ +(floor(std::generate_canonical(*prnd) * range)) /*! * \brief Helper functions. diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index 38efcc58e61a..a4b77f5a41df 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2015 by Contributors * \file image_augmenter_opencv.hpp * \brief threaded version of page iterator * \author Naiyan Wang, Tianqi Chen, Tianjun Xiao @@ -7,6 +8,10 @@ #define MXNET_IO_IMAGE_AUGMENTER_H_ #include +#include +#include +#include +#include #include "../common/utils.h" namespace mxnet { @@ -41,6 +46,7 @@ struct ImageAugmentParam : public dmlc::Parameter { int rotate; /*! \brief filled color while padding */ int fill_value; + // The following are params for tensor process /*! \brief whether to mirror the image */ bool mirror; /*! \brief whether to perform rand mirror the image */ @@ -55,14 +61,17 @@ struct ImageAugmentParam : public dmlc::Parameter { float mean_b; /*! \brief shape of the image data*/ TShape input_shape; + /*! \brief scale on color space */ + float scale; /*! \brief maximum ratio of contrast variation */ - float max_random_contrast_; + float max_random_contrast; /*! \brief maximum value of illumination variation */ - float max_random_illumination_; + float max_random_illumination; + /*! \brief whether to print augment info */ + bool silent; // declare parameters - // TODO: didn't understand the range for some params DMLC_DECLARE_PARAMETER(ImageAugmentParam) { - DMLC_DECLARE_FIELD(rand_crop_).set_default(true) + DMLC_DECLARE_FIELD(rand_crop).set_default(true) .describe("Whether we de random cropping"); DMLC_DECLARE_FIELD(crop_y_start).set_default(-1) .describe("Where to nonrandom crop on y"); @@ -81,7 +90,7 @@ struct ImageAugmentParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(max_random_scale).set_default(1.0f) .describe("Maxmum scale ratio"); DMLC_DECLARE_FIELD(min_random_scale).set_default(1.0f) - .describe("Minimum scale ratio"); + .describe("Minimum scale ratio"); DMLC_DECLARE_FIELD(max_img_size).set_default(1e10f) .describe("Maxmum image size"); DMLC_DECLARE_FIELD(min_img_size).set_default(0.0f) @@ -99,13 +108,16 @@ struct ImageAugmentParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(mean_r).set_default(0.0f) .describe("Mean value on R channel"); DMLC_DECLARE_FIELD(mean_g).set_default(0.0f) - .describe("Mean value on G channel"); + .describe("Mean value on G channel"); DMLC_DECLARE_FIELD(mean_b).set_default(0.0f) .describe("Mean value on B channel"); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() .describe("Input shape of the neural net"); + DMLC_DECLARE_FIELD(scale).set_default(1.0f) + .describe("Scale in color space"); DMLC_DECLARE_FIELD(max_random_contrast).set_default(0.0f) .describe("Maximum ratio of contrast variation"); DMLC_DECLARE_FIELD(max_random_illumination).set_default(0.0f) @@ -123,7 +135,6 @@ class ImageAugmenter { } virtual ~ImageAugmenter() { } - // TODO: Hack the shape and rotate list, didn't use param virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; kwargs_left = param_.InitAllowUnknown(kwargs); @@ -142,7 +153,7 @@ class ImageAugmenter { if (param_.mean_img.length() != 0) { dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); if (fi == NULL) { - this->CreateMeanImg(); + meanfile_ready_ = false; } else { if (param_.silent == 0) { printf("loading mean image from %s\n", param_.mean_img.c_str()); @@ -174,15 +185,18 @@ class ImageAugmenter { float a = cos(angle / 180.0 * M_PI); float b = sin(angle / 180.0 * M_PI); // scale - float scale = NextDouble(prnd) * (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; + float scale = NextDouble(prnd) * \ + (param_.max_random_scale - param_.min_random_scale) + param_.min_random_scale; // aspect ratio - float ratio = NextDouble(prnd) * param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; + float ratio = NextDouble(prnd) * \ + param_.max_aspect_ratio * 2 - param_.max_aspect_ratio + 1; float hs = 2 * scale / (1 + ratio); float ws = ratio * hs; // new width and height - float new_width = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.cols)); - float new_height = std::max(param_.min_img_size, std::min(param_.max_img_size, scale * src.rows)); - //printf("%f %f %f %f %f %f %f %f %f\n", s, a, b, scale, ratio, hs, ws, new_width, new_height); + float new_width = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.cols)); + float new_height = std::max(param_.min_img_size, \ + std::min(param_.max_img_size, scale * src.rows)); cv::Mat M(2, 3, CV_32F); M.at(0, 0) = hs * a - s * b * ws; M.at(1, 0) = -b * ws; @@ -192,42 +206,42 @@ class ImageAugmenter { float ori_center_height = M.at(1, 0) * src.cols + M.at(1, 1) * src.rows; M.at(0, 2) = (new_width - ori_center_width) / 2; M.at(1, 2) = (new_height - ori_center_height) / 2; - cv::warpAffine(src, temp, M, cv::Size(new_width, new_height), + cv::warpAffine(src, temp_, M, cv::Size(new_width, new_height), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(param_.fill_value, param_.fill_value, param_.fill_value)); - cv::Mat res = temp; + cv::Mat res = temp_; // crop - if (param_.max_crop_size != -1 || param_.min_crop_size != -1){ - CHECK(res.cols >= param_.max_crop_size && res.rows >= param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) + if (param_.max_crop_size != -1 || param_.min_crop_size != -1) { + CHECK(res.cols >= param_.max_crop_size && res.rows >= \ + param_.max_crop_size && param_.max_crop_size >= param_.min_crop_size) << "input image size smaller than max_crop_size"; - mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size- param_.min_crop_size+1, prnd)+ param_.min_crop_size; + mshadow::index_t rand_crop_size = NextUInt32(param_.max_crop_size \ + - param_.min_crop_size+1, prnd)+ param_.min_crop_size; mshadow::index_t y = res.rows - rand_crop_size; mshadow::index_t x = res.cols - rand_crop_size; if (param_.rand_crop != 0) { y = NextUInt32(y + 1, prnd); x = NextUInt32(x + 1, prnd); - } - else { + } else { y /= 2; x /= 2; } cv::Rect roi(x, y, rand_crop_size, rand_crop_size); cv::resize(res(roi), res, cv::Size(param_.input_shape[1], param_.input_shape[2])); - } - else{ - CHECK(static_cast(res.cols) >= param_.input_shape[1] && static_cast(res.rows) >= param_.input_shape[2]) - << "input image size smaller than input shape"; - mshadow::index_t y = res.rows - param_.input_shape[2]; - mshadow::index_t x = res.cols - param_.input_shape[1]; - if (param_.rand_crop_ != 0) { - y = NextUInt32(y + 1, prnd); - x = NextUInt32(x + 1, prnd); - } - else { - y /= 2; x /= 2; - } - cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); - res = res(roi); + } else { + CHECK(static_cast(res.cols) >= param_.input_shape[1] \ + && static_cast(res.rows) >= param_.input_shape[2]) + << "input image size smaller than input shape"; + mshadow::index_t y = res.rows - param_.input_shape[2]; + mshadow::index_t x = res.cols - param_.input_shape[1]; + if (param_.rand_crop != 0) { + y = NextUInt32(y + 1, prnd); + x = NextUInt32(x + 1, prnd); + } else { + y /= 2; x /= 2; + } + cv::Rect roi(x, y, param_.input_shape[1], param_.input_shape[2]); + res = res(roi); } return res; } @@ -251,20 +265,32 @@ class ImageAugmenter { } } res = this->OpencvProcess(res, prnd); - tmpres.Resize(mshadow::Shape3(3, res.rows, res.cols)); - for (index_t i = 0; i < tmpres.size(1); ++i) { - for (index_t j = 0; j < tmpres.size(2); ++j) { + tmpres_.Resize(mshadow::Shape3(3, res.rows, res.cols)); + for (index_t i = 0; i < tmpres_.size(1); ++i) { + for (index_t j = 0; j < tmpres_.size(2); ++j) { cv::Vec3b bgr = res.at(i, j); - tmpres[0][i][j] = bgr[2]; - tmpres[1][i][j] = bgr[1]; - tmpres[2][i][j] = bgr[0]; + tmpres_[0][i][j] = bgr[2]; + tmpres_[1][i][j] = bgr[1]; + tmpres_[2][i][j] = bgr[0]; } } - return tmpres; + return tmpres_; } void TensorProcess(mshadow::TensorContainer *p_data, common::RANDOM_ENGINE *prnd) { + // Check Newly Created mean image + if (meanfile_ready_ == false && param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi != NULL) { + if (param_.silent == 0) { + printf("loading mean image from %s\n", param_.mean_img.c_str()); + } + meanimg_.LoadBinary(*fi); + delete fi; + meanfile_ready_ = true; + } + } img_.Resize(mshadow::Shape3((*p_data).shape_[0], param_.input_shape[1], param_.input_shape[2])); if (param_.input_shape[1] == 1) { img_ = (*p_data) * param_.scale; @@ -285,72 +311,51 @@ class ImageAugmenter { if (p_data->size(2) != param_.input_shape[2] && param_.crop_x_start != -1) { xx = param_.crop_x_start; } - float contrast = NextDouble(prnd) * param_.max_random_contrast * 2 - param_.max_random_contrast + 1; - float illumination = NextDouble(prnd) * param_.max_random_illumination * 2 - param_.max_random_illumination; + float contrast = NextDouble(prnd) * param_.max_random_contrast \ + * 2 - param_.max_random_contrast + 1; + float illumination = NextDouble(prnd) * param_.max_random_illumination \ + * 2 - param_.max_random_illumination; if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { // substract mean value - (*p_data)[0] -= param_.mean_b; (*p_data)[1] -= param_.mean_g; (*p_data)[2] -= param_.mean_r; - if ((param_.rand_mirror != 0 && NextDouble(rnd) < 0.5f) || param_.mirror == 1) { - img_ = mirror(crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + (*p_data)[0] -= param_.mean_b; + (*p_data)[1] -= param_.mean_g; + (*p_data)[2] -= param_.mean_r; + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { + img_ = mirror(crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = crop((*p_data) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale ; + img_ = crop((*p_data) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; } } else if (!meanfile_ready_ || param_.mean_img.length() == 0) { // do not substract anything - if (param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) { + if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { img_ = mirror(crop((*p_data), img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale ; + img_ = crop((*p_data), img_[0].shape_, yy, xx) * param_.scale; } } else { // substract mean image if ((param_.rand_mirror != 0 && NextDouble(prnd) < 0.5f) || param_.mirror == 1) { if (p_data->shape_ == meanimg_.shape_) { - img_ = mirror(crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx)) * param_.scale; + img_ = mirror(crop(((*p_data) - meanimg_) * contrast \ + + illumination, img_[0].shape_, yy, xx)) * param_.scale; } else { - img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + img_ = (mirror(crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) \ + * contrast + illumination) * param_.scale; } } else { - if (p_data->shape_ == meanimg_.shape_){ - img_ = crop(((*p_data) - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx) * param_.scale; + if (p_data->shape_ == meanimg_.shape_) { + img_ = crop(((*p_data) - meanimg_) * contrast + illumination, \ + img_[0].shape_, yy, xx) * param_.scale; } else { - img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * param_.scale; + img_ = ((crop((*p_data), img_[0].shape_, yy, xx) - meanimg_) * \ + contrast + illumination) * param_.scale; } } } } (*p_data) = img_; - } - - inline void CreateMeanImg(void) { - if (silent_ == 0) { - printf("cannot find %s: create mean image, this will take some time...\n", name_meanimg_.c_str()); - } - time_t start = time(NULL); - unsigned long elapsed = 0; - size_t imcnt = 1; - - CHECK(this->Next_()) << "input iterator failed."; - meanimg_.Resize(mshadow::Shape3(shape_[0], shape_[1], shape_[2])); - mshadow::Copy(meanimg_, img_); - while (this->Next()) { - meanimg_ += img_; imcnt += 1; - elapsed = (long)(time(NULL) - start); - if (imcnt % 1000 == 0 && silent_ == 0) { - printf("\r \r"); - printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); - fflush(stdout); - } - } - meanimg_ *= (1.0f / imcnt); - - dmlc::Stream *fo = dmlc::Stream::Create(name_meanimg_.c_str(), "w"); - meanimg_.SaveBinary(*fo); - delete fo; - if (silent_ == 0) { - printf("save mean image to %s..\n", name_meanimg_.c_str()); - } - meanfile_ready_ = true; } virtual void Process(unsigned char *dptr, size_t sz, @@ -358,7 +363,8 @@ class ImageAugmenter { common::RANDOM_ENGINE *prnd) { cv::Mat buf(1, sz, CV_8U, dptr); cv::Mat res = cv::imdecode(buf, 1); - res = this->OpencvProcess(res, prnd); + if (NeedOpencvProcess()) + res = this->OpencvProcess(res, prnd); p_data->Resize(mshadow::Shape3(3, res.rows, res.cols)); for (index_t i = 0; i < p_data->size(1); ++i) { for (index_t j = 0; j < p_data->size(2); ++j) { @@ -371,7 +377,7 @@ class ImageAugmenter { res.release(); this->TensorProcess(p_data, prnd); } - + private: // whether skip opencv processing inline bool NeedOpencvProcess(void) const { @@ -391,7 +397,7 @@ class ImageAugmenter { // rotation param cv::Mat rotateM_; // whether the mean file is ready - bool menafile_ready_; + bool meanfile_ready_; // parameters ImageAugmentParam param_; /*! \brief input shape */ @@ -400,5 +406,5 @@ class ImageAugmenter { std::vector rotate_list_; }; } // namespace io -} // namespace cxxnet -#endif +} // namespace mxnet +#endif // MXNET_IO_IMAGE_AUGMENTER_H_ diff --git a/src/io/image_recordio.h b/src/io/image_recordio.h index 4aea8aabcb47..3b4fa0302435 100644 --- a/src/io/image_recordio.h +++ b/src/io/image_recordio.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2015 by Contributors * \file image_recordio.h * \brief image recordio struct */ @@ -7,6 +8,7 @@ #include #include +#include namespace mxnet { namespace io { @@ -67,9 +69,9 @@ struct ImageRecordIO { */ inline void SaveHeader(std::string *blob) const { blob->resize(sizeof(header)); - std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); - } -}; + std::memcpy(dmlc::BeginPtr(*blob), &header, sizeof(header)); + } +}; } // namespace io } // namespace mxnet #endif // MXNET_IO_IMAGE_RECORDIO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h index 4ced7dd64c63..ed560fc2b5da 100644 --- a/src/io/inst_vector.h +++ b/src/io/inst_vector.h @@ -1,11 +1,12 @@ /*! + * Copyright (c) 2015 by Contributors * \file inst_vector.h * \brief holder of a sequence of DataInst in CPU * that are not necessarily of same shape */ -#ifndef MXNET_INST_VECTOR_H_ -#define MXNET_INST_VECTOR_H_ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ #include #include @@ -31,7 +32,7 @@ class TensorVector { CHECK(i + 1 < offset_.size()); CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); return mshadow::Tensor - ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); + ((DType*)dmlc::BeginPtr(content_) + offset_[i], shape_[i]); // NOLINT(*) } inline mshadow::Tensor Back() const { return (*this)[Size() - 1]; @@ -52,6 +53,7 @@ class TensorVector { content_.clear(); shape_.clear(); } + private: // offset of the data content std::vector offset_; @@ -66,7 +68,7 @@ class TensorVector { * non-uniform shape data instance in a shape efficient way */ class InstVector { - public: + public: inline size_t Size(void) const { return index_.size(); } @@ -94,8 +96,8 @@ class InstVector { data_.Push(dshape); label_.Push(lshape); } - - private: + + private: /*! \brief index of the data */ std::vector index_; // label @@ -105,4 +107,4 @@ class InstVector { }; } // namespace io } // namespace mxnet -#endif // MXNET_TENSOR_VECTOR_H_ +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc index b2dbc9f8c2c5..8bfb5dbdd570 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -17,5 +17,5 @@ namespace io { // Register parameters in header files DMLC_REGISTER_PARAMETER(BatchParam); DMLC_REGISTER_PARAMETER(ImageAugmentParam); -} // namespace mxnet } // namespace io +} // namespace mxnet diff --git a/src/io/iter_batch.h b/src/io/iter_batch.h index 7fe8f4440513..b45dfd3328e1 100644 --- a/src/io/iter_batch.h +++ b/src/io/iter_batch.h @@ -1,7 +1,8 @@ /*! + * Copyright (c) 2015 by Contributors * \file iter_batch_proc-inl.hpp * \brief definition of preprocessing iterators that takes an iterator and do some preprocessing - * \author Tianqi Chen + * \author Tianqi Chen, Tianjun Xiao */ #ifndef MXNET_IO_ITER_BATCH_H_ #define MXNET_IO_ITER_BATCH_H_ @@ -10,6 +11,9 @@ #include #include #include +#include +#include +#include namespace mxnet { namespace io { @@ -18,7 +22,6 @@ struct BatchParam : public dmlc::Parameter { /*! \brief label width */ index_t batch_size; /*! \brief input shape */ - // TODO: haven't modify all shape_ TShape input_shape; /*! \brief label width */ index_t label_width; @@ -32,13 +35,14 @@ struct BatchParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(BatchParam) { DMLC_DECLARE_FIELD(batch_size) .describe("Batch size."); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() - .describe("Input shape of the neural net"); + .describe("Input shape of the neural net"); DMLC_DECLARE_FIELD(label_width).set_default(1) .describe("Label width."); - DMLC_DECLARE_FIELD(round_batch).set_default(false) + DMLC_DECLARE_FIELD(round_batch).set_default(true) .describe("Use round robin to handle overflow batch."); DMLC_DECLARE_FIELD(test_skipread).set_default(false) .describe("Skip read for testing."); @@ -46,25 +50,25 @@ struct BatchParam : public dmlc::Parameter { .describe("Whether to print batch information."); } }; - + /*! \brief create a batch iterator from single instance iterator */ class BatchAdaptIter: public IIterator { -public: - BatchAdaptIter(IIterator *base): base_(base) { - num_overflow_ = 0; - } + public: + explicit BatchAdaptIter(IIterator *base): base_(base), num_overflow_(0) {} virtual ~BatchAdaptIter(void) { delete base_; FreeSpaceDense(); } virtual void Init(const std::vector >& kwargs) { std::vector > kwargs_left; - // init batch param, it could have similar param with + // init batch param, it could have similar param with kwargs_left = param_.InitAllowUnknown(kwargs); // init base iterator base_->Init(kwargs); - mshadow::Shape<4> tshape = param_.input_shape; - tshape[0] = param_.batch_size; + data_shape_[1] = param_.input_shape[0]; + data_shape_[2] = param_.input_shape[1]; + data_shape_[3] = param_.input_shape[2]; + data_shape_[0] = param_.batch_size; AllocSpaceDense(false); } virtual void BeforeFirst(void) { @@ -80,8 +84,10 @@ class BatchAdaptIter: public IIterator { out_.num_batch_padd = 0; // skip read if in head version - if (param_.test_skipread != 0 && head_ == 0) return true; - else this->head_ = 0; + if (param_.test_skipread != 0 && head_ == 0) + return true; + else + this->head_ = 0; // if overflow from previous round, directly return false, until before first is called if (num_overflow_ != 0) return false; @@ -124,7 +130,8 @@ class BatchAdaptIter: public IIterator { CHECK(head_ == 0) << "must call Next to get value"; return out_; } -private: + + private: /*! \brief batch parameters */ BatchParam param_; /*! \brief base iterator */ @@ -139,9 +146,11 @@ class BatchAdaptIter: public IIterator { mshadow::Tensor label; /*! \brief content of dense data, if this DataBatch is dense */ mshadow::Tensor data; + /*! \brief data shape */ + mshadow::Shape<4> data_shape_; // Functions that allocate and free tensor space - inline void AllocSpaceDense(bool pad = false) { - data = mshadow::NewTensor(param_.input_shape, 0.0f, pad); + inline void AllocSpaceDense(bool pad = false) { + data = mshadow::NewTensor(data_shape_, 0.0f, pad); mshadow::Shape<2> lshape = mshadow::Shape2(param_.batch_size, param_.label_width); label = mshadow::NewTensor(lshape, 0.0f, pad); out_.inst_index = new unsigned[param_.batch_size]; @@ -157,7 +166,7 @@ class BatchAdaptIter: public IIterator { label.dptr_ = NULL; } } -}; // class BatchAdaptIter +}; // class BatchAdaptIter } // namespace io -} // namespace cxxnet +} // namespace mxnet #endif // MXNET_IO_ITER_BATCH_H_ diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 0c44a2346e4a..701c28deb4c9 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -1,9 +1,9 @@ /*! + * Copyright (c) 2015 by Contributors * \file iter_image_recordio-inl.hpp * \brief recordio data iterator */ -#include #include #include #include @@ -13,11 +13,11 @@ iterator #include #include #include +#include #include "./inst_vector.h" #include "./image_recordio.h" #include "./image_augmenter.h" #include "./iter_batch.h" -#include "../utils/decoder.h" namespace mxnet { namespace io { /*! \brief data structure to hold labels for images */ @@ -31,7 +31,7 @@ class ImageLabelMap { explicit ImageLabelMap(const char *path_imglist, mshadow::index_t label_width, bool silent) { - label_width = label_width; + this->label_width = label_width; image_index_.clear(); label_.clear(); idx2label_.clear(); @@ -116,10 +116,11 @@ struct ImageRecParserParam : public dmlc::Parameter { .describe("Dist worker number."); DMLC_DECLARE_FIELD(dist_worker_rank).set_default(0) .describe("Dist worker rank."); - float input_shape_default = {3, 224, 224}; - DMLC_DECLARE_FIELD(input_shape).set_default(TShape(input_shape_default, input_shape_default + 3)) + index_t input_shape_default[] = {3, 224, 224}; + DMLC_DECLARE_FIELD(input_shape) + .set_default(TShape(input_shape_default, input_shape_default + 3)) .set_expect_ndim(3).enforce_nonzero() - .describe("Input shape of the neural net"); + .describe("Input shape of the neural net"); } }; @@ -143,7 +144,7 @@ class ImageRecordIOParser { } // initialize the parser inline void Init(const std::vector >& kwargs); - + // set record to the head inline void BeforeFirst(void) { return source_->BeforeFirst(); @@ -151,11 +152,12 @@ class ImageRecordIOParser { // parse next set of records, return an array of // instance vector to the user inline bool ParseNext(std::vector *out); + private: // magic nyumber to see prng static const int kRandMagic = 111; /*! \brief parameters */ - ImageRecParserParam param_; + ImageRecParserParam param_; /*! \brief augmenters */ std::vector augmenters_; /*! \brief random samplers */ @@ -164,9 +166,12 @@ class ImageRecordIOParser { dmlc::InputSplit *source_; /*! \brief label information, if any */ ImageLabelMap *label_map_; + /*! \brief temp space */ + mshadow::TensorContainer img_; }; -inline void ImageRecordIOParser::Init(const std::vector >& kwargs) { +inline void ImageRecordIOParser::Init( + const std::vector >& kwargs) { // initialize parameter std::vector > kwargs_left; // init image rec param @@ -185,12 +190,11 @@ inline void ImageRecordIOParser::Init(const std::vectorInit(kwargs_left); + augmenters_[i]->Init(kwargs); prnds_.push_back(new common::RANDOM_ENGINE((i + 1) * kRandMagic)); } - + // handling for hadoop - // TODO, hack const char *ps_rank = getenv("PS_RANK"); if (ps_rank != NULL) { param_.dist_worker_rank = atoi(ps_rank); @@ -205,7 +209,6 @@ inline void ImageRecordIOParser::Init(const std::vector *out_vec) { InstVector &out = (*out_vec)[tid]; out.Clear(); while (reader.NextRecord(&blob)) { + rec.Load(blob.dptr, blob.size); out.Push(static_cast(rec.image_index()), - mshadow::Shape3(param_.input_shape[0], param_.input_shape[0], param_.input_shape[0]), + mshadow::Shape3(param_.input_shape[0], param_.input_shape[1], param_.input_shape[2]), mshadow::Shape1(param_.label_width)); DataInst inst = out.Back(); // turn datainst into tensor - mshadow::Tensor data = inst.data[0].get(); - mshadow::Tensor label = inst.data[1].get(); - augmenters_[tid]->Process(rec.content, rec.content_size, &data, prnd); + mshadow::Tensor data = inst.data[0].get(); + mshadow::Tensor label = inst.data[1].get(); + augmenters_[tid]->Process(rec.content, rec.content_size, &img_, prnds_[tid]); + mshadow::Copy(data, img_); if (label_map_ != NULL) { mshadow::Copy(label, label_map_->Find(rec.image_index())); } else { @@ -259,12 +264,20 @@ struct ImageRecordParam: public dmlc::Parameter { bool shuffle; /*! \brief random seed */ int seed; + /*! \brief mean file string*/ + std::string mean_img; + /*! \brief whether to remain silent */ + bool silent; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecordParam) { DMLC_DECLARE_FIELD(shuffle).set_default(true) .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(seed).set_default(0) .describe("Random Seed."); + DMLC_DECLARE_FIELD(mean_img).set_default("./data/mean.bin") + .describe("Path to image mean file."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to output information."); } }; @@ -283,8 +296,8 @@ class ImageRecordIter : public IIterator { std::vector > kwargs_left; // init image rec param kwargs_left = param_.InitAllowUnknown(kwargs); - // use the left kwarg to init parser - parser_.Init(kwargs_left); + // use the kwarg to init parser + parser_.Init(kwargs); // init thread iter iter_.set_max_capacity(4); iter_.Init([this](std::vector **dptr) { @@ -294,6 +307,15 @@ class ImageRecordIter : public IIterator { return parser_.ParseNext(*dptr); }, [this]() { parser_.BeforeFirst(); }); + // Check Meanfile + if (param_.mean_img.length() != 0) { + dmlc::Stream *fi = dmlc::Stream::Create(param_.mean_img.c_str(), "r", true); + if (fi == NULL) { + this->CreateMeanImg(); + } else { + delete fi; + } + } inst_ptr_ = 0; } virtual void BeforeFirst(void) { @@ -320,7 +342,8 @@ class ImageRecordIter : public IIterator { } // shuffle instance order if needed if (shuffle_ != 0) { - std::shuffle(inst_order_.begin(), inst_order_.end(), common::RANDOM_ENGINE(kRandMagic + param_.seed)); + std::shuffle(inst_order_.begin(), inst_order_.end(), \ + common::RANDOM_ENGINE(kRandMagic + param_.seed)); } inst_ptr_ = 0; } @@ -332,6 +355,40 @@ class ImageRecordIter : public IIterator { } private: + inline void CreateMeanImg(void) { + if (param_.silent == 0) { + printf("cannot find %s: create mean image, this will take some time...\n", + param_.mean_img.c_str()); + } + time_t start = time(NULL); + uint64_t elapsed = 0; + size_t imcnt = 1; + this->BeforeFirst(); + CHECK(this->Next()) << "input iterator failed."; + // Get the first data + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_.Resize(img_tensor.shape_); + mshadow::Copy(meanimg_, img_tensor); + while (this->Next()) { + mshadow::Tensor img_tensor = out_.data[0].get(); + meanimg_ += img_tensor; imcnt += 1; + elapsed = (uint64_t)(time(NULL) - start); + if (imcnt % 1000 == 0 && param_.silent == 0) { + printf("\r \r"); + printf("[%8lu] images processed, %ld sec elapsed", imcnt, elapsed); + fflush(stdout); + } + } + meanimg_ *= (1.0f / imcnt); + + dmlc::Stream *fo = dmlc::Stream::Create(param_.mean_img.c_str(), "w"); + meanimg_.SaveBinary(*fo); + delete fo; + if (param_.silent == 0) { + printf("save mean image to %s..\n", param_.mean_img.c_str()); + } + } + // random magic static const int kRandMagic = 111; // output instance @@ -350,6 +407,8 @@ class ImageRecordIter : public IIterator { dmlc::ThreadedIter > iter_; // parameters ImageRecordParam param_; + // mean image + mshadow::TensorContainer meanimg_; }; DMLC_REGISTER_PARAMETER(ImageRecParserParam); DMLC_REGISTER_PARAMETER(ImageRecordParam); diff --git a/src/utils/decoder.h b/src/utils/decoder.h deleted file mode 100644 index 52db01edee23..000000000000 --- a/src/utils/decoder.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef MXNET_UTILS_DECODER_H_ -#define MXNET_UTILS_DECODER_H_ - -#include -#if MXNET_USE_OPENCV_DECODER == 0 - #include - #include - #include -#endif -#include -#include -#if MXNET_USE_OPENCV - #include -#endif - -namespace mxnet { -namespace utils { - -#if MXNET_USE_OPENCV_DECODER == 0 -struct JpegDecoder { -public: - JpegDecoder(void) { - cinfo.err = jpeg_std_error(&jerr.base); - jerr.base.error_exit = jerror_exit; - jerr.base.output_message = joutput_message; - jpeg_create_decompress(&cinfo); - } - // destructor - ~JpegDecoder(void) { - jpeg_destroy_decompress(&cinfo); - } - - inline void Decode(unsigned char *ptr, size_t sz, - mshadow::TensorContainer *p_data) { - if(setjmp(jerr.jmp)) { - jpeg_destroy_decompress(&cinfo); - dmlc::Error("Libjpeg fail to decode"); - } - this->jpeg_mem_src(&cinfo, ptr, sz); - CHECK(jpeg_read_header(&cinfo, TRUE) == JPEG_HEADER_OK) << "libjpeg: failed to decode"; - CHECK(jpeg_start_decompress(&cinfo) == true) << "libjpeg: failed to decode"; - p_data->Resize(mshadow::Shape3(cinfo.output_height, cinfo.output_width, cinfo.output_components)); - JSAMPROW jptr = &((*p_data)[0][0][0]); - while (cinfo.output_scanline < cinfo.output_height) { - CHECK(jpeg_read_scanlines(&cinfo, &jptr, 1) == true) << "libjpeg: failed to decode"; - jptr += cinfo.output_width * cinfo.output_components; - } - CHECK(jpeg_finish_decompress(&cinfo) == true) << "libjpeg: failed to decode"); - } -private: - struct jerror_mgr { - jpeg_error_mgr base; - jmp_buf jmp; - }; - - METHODDEF(void) jerror_exit(j_common_ptr jinfo) { - jerror_mgr* err = (jerror_mgr*)jinfo->err; - longjmp(err->jmp, 1); - } - - METHODDEF(void) joutput_message(j_common_ptr) {} - - static boolean mem_fill_input_buffer_ (j_decompress_ptr cinfo) { - dmlc::Error("JpegDecoder: bad jpeg image"); - return true; - } - - static void mem_skip_input_data_ (j_decompress_ptr cinfo, long num_bytes_) { - jpeg_source_mgr *src = cinfo->src; - size_t num_bytes = static_cast(num_bytes_); - if (num_bytes > 0) { - src->next_input_byte += num_bytes; - CHECK(src->bytes_in_buffer >= num_bytes) << "fail to decode"; - src->bytes_in_buffer -= num_bytes; - } else { - dmlc::Error("JpegDecoder: bad jpeg image"); - - } - } - - static void mem_term_source_ (j_decompress_ptr cinfo) {} - static void mem_init_source_ (j_decompress_ptr cinfo) {} - static boolean jpeg_resync_to_restart_(j_decompress_ptr cinfo, int desired) { - dmlc::Error("JpegDecoder: bad jpeg image"); - return true; - } - void jpeg_mem_src (j_decompress_ptr cinfo, void* buffer, long nbytes) { - src.init_source = mem_init_source_; - src.fill_input_buffer = mem_fill_input_buffer_; - src.skip_input_data = mem_skip_input_data_; - src.resync_to_restart = jpeg_resync_to_restart_; - src.term_source = mem_term_source_; - src.bytes_in_buffer = nbytes; - src.next_input_byte = static_cast(buffer); - cinfo->src = &src; - } - -private: - jpeg_decompress_struct cinfo; - jpeg_source_mgr src; - jerror_mgr jerr; -}; -#endif - -#if MXNET_USE_OPENCV -struct OpenCVDecoder { - void Decode(unsigned char *ptr, size_t sz, mshadow::TensorContainer *p_data) { - cv::Mat buf(1, sz, CV_8U, ptr); - cv::Mat res = cv::imdecode(buf, 1); - CHECK(res.data != NULL) << "decoding fail"; - p_data->Resize(mshadow::Shape3(res.rows, res.cols, 3)); - for (int y = 0; y < res.rows; ++y) { - for (int x = 0; x < res.cols; ++x) { - cv::Vec3b bgr = res.at(y, x); - // store in RGB order - (*p_data)[y][x][2] = bgr[0]; - (*p_data)[y][x][1] = bgr[1]; - (*p_data)[y][x][0] = bgr[2]; - } - } - res.release(); - } -}; -#endif -} // namespace utils -} // namespace mxnet - -#endif // DECODER_H diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 991a4813033e..8706b062e5d7 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -5,28 +5,29 @@ import pickle as pickle import sys import get_data +from PIL import Image -# prepare data -get_data.GetMNIST_ubyte() -batch_size = 100 -train_dataiter = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) -val_dataiter = mx.io.MNISTIter( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - batch_size=batch_size, shuffle=0, flat=1, silent=0) +def test_MNISTIter(): + # prepare data + get_data.GetMNIST_ubyte() -def test_MNISTIter_loop(): + batch_size = 100 + train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) + val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=0, flat=1, silent=0) + # test_loop nbatch = 60000 / batch_size batch_count = 0 for data, label in train_dataiter: batch_count += 1 assert(nbatch == batch_count) - -def test_MNISTIter_reset(): + # test_reset train_dataiter.reset() train_dataiter.iter_next() label_0 = train_dataiter.getlabel().numpy.flatten() @@ -40,17 +41,40 @@ def test_MNISTIter_reset(): assert(sum(label_0 - label_1) == 0) def test_ImageRecIter(): - dataiter = mx.io.ImageRecordIter(path_imgrec="data/val_cxxnet.rec", - image_mean="data/val_cxxnet_mean.bin", + dataiter = mx.io.ImageRecordIter( + #path_imglist="data/smallset/val_cxxnet5000.txt", + path_imgrec="data/val_cxxnet.rec", + #mean_img="data/smallset/image_net_mean.bin", rand_crop=True, - rand_mirror=True, - input_shape="3,224,224", - batch_size=128) - - - - + mirror=True, + input_shape=(3,227,227), + batch_size=100, + nthread=1, + seed=10) + # Test label read + labelcount = [0 for i in range(1000)] + batchcount = 0 + for data, label in dataiter: + npdata = data.numpy + print npdata[0,:,:,:] + imgdata = np.zeros([227, 227, 3], dtype=np.uint8) + imgdata[:,:,0] = npdata[10,2,:,:] + imgdata[:,:,1] = npdata[10,1,:,:] + imgdata[:,:,2] = npdata[10,0,:,:] + img = Image.fromarray(imgdata) + imgpath = "data/smallset/test_3.jpg" + img.save(imgpath, format='JPEG') + exit(0) + print batchcount + sys.stdout.flush() + batchcount += 1 + nplabel = label.numpy + for i in range(nplabel.shape[0]): + labelcount[int(nplabel[i])] += 1 + # Test image +if __name__ == '__main__': + test_ImageRecIter() From 36aab11489acf3a9088b73abffbc18ca098fd534 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Tue, 8 Sep 2015 02:11:46 +0800 Subject: [PATCH 13/15] checked cifar, work when nthread=1 --- dmlc-core | 2 +- example/cifar10/cifar10.py | 101 +++++++++++++++++++++++++++++++++++++ tests/python/get_data.py | 7 +++ tests/python/test_io.py | 36 ++++++++++--- 4 files changed, 139 insertions(+), 7 deletions(-) diff --git a/dmlc-core b/dmlc-core index 75f1950d386d..7d3c78428819 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit 75f1950d386d033b0b64919017515d27e698962a +Subproject commit 7d3c78428819dc84c4da8ae1f302ba6c6a235a5d diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 20694b7064da..95d7810cb7e9 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -144,8 +144,109 @@ def RandomInit(narray): flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") loss = mx.symbol.Softmax(data=fc, name="softmax") +args_list = loss.list_arguments() data_shape = (128, 3, 28, 28) arg_shapes, out_shapes, aux_shapes = loss.infer_shape(data=data_shape) +arg_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +grad_narrays = [mx.narray.create(shape, ctx=mx.Context("gpu")) for shape in arg_shapes] +inputs = dict(zip(args_list, arg_narrays)) + +name2shape = dict(zip(args_list, arg_shapes)) +pred = mx.narray.create(out_shapes[0]) + +np.random.seed(0) +# set random weight +for name, narray in inputs.items(): + if "weight" in name: + tmp = mx.narray.create(name2shape[name]) + tmp.numpy[:] = np.random.uniform(-0.07, 0.07, name2shape[name]) + tmp.copyto(narray) + if "bias" in name: + narray[:] = 0.0 + +# bind executer +# TODO(bing): think of a better bind interface +executor = loss.bind(mx.Context('gpu'), arg_narrays, grad_narrays) +# update + +out_narray = executor.heads()[0] +grad_narray = mx.narray.create(out_narray.shape) + +epoch = 9 +lr = 0.1 +wd = 0.0004 + +def Update(grad, weight): + weight[:] -= lr * grad / batch_size + +block = list(zip(grad_narrays, arg_narrays)) + +#check data +get_data.GetCifar10() +train_dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/train.rec", + mean_img="data/cifar10/cifar_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=128, + nthread=1) +test_dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/test.rec", + mean_img="data/cifar/cifar_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=100, + nthread=1) + +tmp_label = mx.narray.create(name2shape["sm_label"]) + +def test_cifar(): + acc_train = 0. + acc_val = 0. + for i in range(epoch): + # train + print("Epoch %d" % i) + train_acc = 0.0 + val_acc = 0.0 + train_nbatch = 0 + val_nbatch = 0 + for data, label in train_dataiter: + data = data + tmp_label.numpy[:] = label.numpy.reshape(tmp_label.shape) + data.copyto(inputs["data"]) + tmp_label.copyto(inputs["sm_label"]) + executor.forward() + out_narray.copyto(pred) + train_acc += CalAcc(pred.numpy, label.numpy.flatten()) + train_nbatch += 1 + out_narray.copyto(grad_narray) + executor.backward([grad_narray]) + + for grad, weight in block: + Update(grad, weight) + + # evaluate + for data, label in val_dataiter: + data = data + label = label.numpy.flatten() + data.copyto(inputs["data"]) + executor.forward() + out_narray.copyto(pred) + val_acc += CalAcc(pred.numpy, label) + val_nbatch += 1 + acc_train = train_acc / train_nbatch + acc_val = val_acc / val_nbatch + print("Train Acc: ", train_acc / train_nbatch) + print("Valid Acc: ", val_acc / val_nbatch) + train_dataiter.reset() + val_dataiter.reset() + assert(acc_train > 0.98) + assert(acc_val > 0.97) + +if __name__ == "__main__": + test_cifar() diff --git a/tests/python/get_data.py b/tests/python/get_data.py index 82d25d9072fb..828809f3e757 100644 --- a/tests/python/get_data.py +++ b/tests/python/get_data.py @@ -27,3 +27,10 @@ def GetMNIST_ubyte(): os.system("wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz -P data/") os.system("gunzip data/t10k-labels-idx1-ubyte.gz") +# download cifar +def GetCifar10(): + if not os.path.isdir("data/"): + os.system("mkdir data/") + if not os.path.exists('data/cifar10.zip'): + os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip -P data/") + os.system("unzip data/cifar10.zip") diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 8706b062e5d7..a348725c4c72 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -42,16 +42,14 @@ def test_MNISTIter(): def test_ImageRecIter(): dataiter = mx.io.ImageRecordIter( - #path_imglist="data/smallset/val_cxxnet5000.txt", path_imgrec="data/val_cxxnet.rec", - #mean_img="data/smallset/image_net_mean.bin", + mean_img="data/smallset/image_net_mean.bin", rand_crop=True, mirror=True, input_shape=(3,227,227), batch_size=100, nthread=1, seed=10) - # Test label read labelcount = [0 for i in range(1000)] batchcount = 0 for data, label in dataiter: @@ -64,7 +62,6 @@ def test_ImageRecIter(): img = Image.fromarray(imgdata) imgpath = "data/smallset/test_3.jpg" img.save(imgpath, format='JPEG') - exit(0) print batchcount sys.stdout.flush() @@ -72,9 +69,36 @@ def test_ImageRecIter(): nplabel = label.numpy for i in range(nplabel.shape[0]): labelcount[int(nplabel[i])] += 1 - # Test image +def test_Cifar10Rec(): + dataiter = mx.io.ImageRecordIter( + path_imgrec="data/cifar/test.rec", + mean_img="data/cifar/cifar10_mean.bin", + rand_crop=True, + rand_mirror=True, + input_shape=(3,28,28), + batch_size=100, + nthread=1) + labelcount = [0 for i in range(10)] + batchcount = 0 + for data, label in dataiter: + npdata = data.numpy + print npdata[0,:,:,:] + imgdata = np.zeros([28, 28, 3], dtype=np.uint8) + imgdata[:,:,0] = npdata[0,2,:,:] + imgdata[:,:,1] = npdata[0,1,:,:] + imgdata[:,:,2] = npdata[0,0,:,:] + img = Image.fromarray(imgdata) + imgpath = "data/cifar/test.jpg" + img.save(imgpath, format='JPEG') + exit(0) + print batchcount + sys.stdout.flush() + batchcount += 1 + nplabel = label.numpy + for i in range(nplabel.shape[0]): + labelcount[int(nplabel[i])] += 1 if __name__ == '__main__': - test_ImageRecIter() + test_Cifar10Rec() From ff58b247bd65b6ea729faac1ca88543b822d58d9 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Tue, 8 Sep 2015 02:33:09 +0800 Subject: [PATCH 14/15] do not run auto test_io, just visualize test --- tests/python/test_io.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 770caf76c070..1156782bdfef 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -5,7 +5,7 @@ import pickle as pickle import sys import get_data -from PIL import Image +#from PIL import Image def test_MNISTIter(): @@ -40,6 +40,7 @@ def test_MNISTIter(): label_1 = train_dataiter.getlabel().numpy.flatten() assert(sum(label_0 - label_1) == 0) +''' def test_ImageRecIter(): dataiter = mx.io.ImageRecordIter( path_imgrec="data/val_cxxnet.rec", @@ -82,7 +83,6 @@ def test_Cifar10Rec(): labelcount = [0 for i in range(10)] batchcount = 0 for data, label in dataiter: - ''' npdata = data.numpy print npdata[0,:,:,:] imgdata = np.zeros([28, 28, 3], dtype=np.uint8) @@ -93,7 +93,6 @@ def test_Cifar10Rec(): imgpath = "data/cifar/test.jpg" img.save(imgpath, format='JPEG') exit(0) - ''' print "Batch: ", batchcount sys.stdout.flush() batchcount += 1 @@ -102,5 +101,4 @@ def test_Cifar10Rec(): labelcount[int(nplabel[i])] += 1 for i in range(10): assert(labelcount[i] == 1000) -if __name__ == '__main__': - test_Cifar10Rec() +''' From 528b5a4f037224578372d3d0374edc3c82136a63 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Tue, 8 Sep 2015 02:43:28 +0800 Subject: [PATCH 15/15] one path bug --- example/cifar10/cifar10.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index 95d7810cb7e9..14d9bd1b8971 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -188,7 +188,7 @@ def Update(grad, weight): get_data.GetCifar10() train_dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/train.rec", - mean_img="data/cifar10/cifar_mean.bin", + mean_img="data/cifar/cifar_mean.bin", rand_crop=True, rand_mirror=True, input_shape=(3,28,28),