From aaccdc381958063b18013d88d347081762ad6fb9 Mon Sep 17 00:00:00 2001 From: gstu1130 Date: Wed, 3 Mar 2021 17:05:46 -0800 Subject: [PATCH] Add profiler doc --- docs/development/profiler.md | 62 ++++++++++++++++++++++++++++++++++++ docs/mkdocs.yml | 1 + 2 files changed, 63 insertions(+) create mode 100644 docs/development/profiler.md diff --git a/docs/development/profiler.md b/docs/development/profiler.md new file mode 100644 index 00000000000..3a02ae8f0e5 --- /dev/null +++ b/docs/development/profiler.md @@ -0,0 +1,62 @@ +## Profiler (Experimental) + +Currently, DJL supports experimental profilers for developers that +investigate the performance of operator execution as well as memory consumption. +The profilers are from engines directly and DJL just expose them. +So different engines have different APIs and produce different output format. +We are still working in progress on the feature. +In the future, we are considering to design a unified APIs and output unified format. + +### MXNet + +By setting the following environment variable, it generates `profile.json` after executing the code. + +``` +export MXNET_PROFILER_AUTOSTART=1 +``` + +You can view it in a browser using trace consumer like `chrome://tracing `. Here is a snapshot that shows the sample output. +![img](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tutorials/python/profiler/profiler_output_chrome.png) + +### PyTorch + +DJL have integrated PyTorch C++ profiler API and expose `JniUtils.startProfile` and `JniUtils.stopProfile(outputFile)` Java APIs. +`JniUtils.startProfile` takes `useCuda(boolean)`, `recordShape(boolean)` and `profileMemory(boolean)` arguments respectively. +`useCuda` indicates if profiler enables timing of CUDA events using the cudaEvent API. +`recordShape` indicates if information about input dimensions will be collected or not. +`profileMemory` indicates if profiler report memory usage or not. +`JniUtils.stopProfile` takes a outputFile of String type. + +Wrap the code snippet you want to profile in between `JniUtils.startProfile` and `JniUtils.stopProfile`. +Here is an example. + +``` +try (ZooModel model = ModelZoo.loadModel(criteria)) { + try (Predictor predictor = model.newPredictor()) { + Image image = ImageFactory.getInstance() + .fromNDArray(manager.zeros(new Shape(3, 224, 224), DataType.UINT8)); + + JniUtils.startProfile(false, true, true); + predictor.predict(image); + JniUtils.stopProfile(outputFile); + } catch (TranslateException e) { + e.printStackTrace(); +} +``` + +The output format is composed of operator execution record. +Each record contains `name`(operator name), `dur`(time duration), `shape`(input shapes), `cpu mem`(cpu memory footprint). + +``` +{ + "name": "aten::empty", + "ph": "X", + "ts": 24528.313000, + "dur": 5.246000, + "tid": 1, + "pid": "CPU Functions", + "shape": [[], [], [], [], [], []], + "cpu mem": "0 b", + "args": {} +} +``` diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5699539d81a..6bf4f9c0cf7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -76,6 +76,7 @@ nav: - 'docs/development/memory_management.md' - 'docs/development/inference_performance_optimization.md' - 'docs/development/benchmark_with_djl.md' + - 'docs/development/profiler.md' - DJL Community: - 'docs/forums.md' - 'leaders.md'