PaddlePaddle · MingMingShangTian · Apr 1, 2021 · Mar 11, 2021 · Mar 11, 2021 · Mar 12, 2021
diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
@@ -36,7 +36,7 @@ DECLARE_bool(sort_sum_gradient);
 namespace paddle {
 namespace imperative {
 
-void BasicEngine::Init(VarBase* var, bool retain_graph) {
+void BasicEngine::Init(VarBase* var, bool retain_graph, VarBase* grad_tensor) {
   retain_graph_ = retain_graph;
   init_node_ = var->GradVarBase()->GradNode();
   PADDLE_ENFORCE_EQ(var->GradVarBase()->GraphIsFreed(), false,
@@ -75,9 +75,15 @@ void BasicEngine::Init(VarBase* var, bool retain_graph) {
           << " as stop_gradient false";
   var->GradVarBase()->InnerSetOverridedStopGradient(false);
   auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
-  grad_var->Resize(fwd_var.dims());
-  grad_var->mutable_data(fwd_var.place(), fwd_var.type());
-  operators::math::set_constant(*dev_ctx, grad_var, 1.0);
+  if (grad_tensor == nullptr) {
+    grad_var->Resize(fwd_var.dims());
+    grad_var->mutable_data(fwd_var.place(), fwd_var.type());
+    operators::math::set_constant(*dev_ctx, grad_var, 1.0);
+  } else {
+    paddle::framework::TensorCopy(
+        grad_tensor->Var().Get<framework::LoDTensor>(), fwd_var.place(),
+        *dev_ctx, grad_var);
+  }
 }
 
 void BasicEngine::CheckBackwardInputs(const OpBase& op) {

diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h
@@ -30,7 +30,8 @@ class OpBase;
 
 class BasicEngine : public Engine {
  public:
-  void Init(VarBase* var, bool retain_graph = false);
+  void Init(VarBase* var, bool retain_graph = false,
+            VarBase* grad_tensor = nullptr);
 
   void Execute() override;
 

diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
@@ -920,11 +920,12 @@ void BindImperative(py::module *m_ptr) {
        )DOC")
       .def("_run_backward",
            [](imperative::VarBase &self, const imperative::Tracer &tracer,
-              bool retain_graph) {
+              bool retain_graph,
+              std::shared_ptr<imperative::VarBase> &grad_tensor) {
              // TODO(jiabin): when we impl more backward execution we can
              // select them
              auto *engine = tracer.GetEngine();
-             engine->Init(&self, retain_graph);
+             engine->Init(&self, retain_graph, grad_tensor.get());
              VLOG(3) << "Start backward";
              engine->Execute();
              VLOG(3) << "Finish backward";

diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -133,7 +133,7 @@ def set_value(self, value):
                                       framework._current_expected_place())
 
     @framework.dygraph_only
-    def backward(self, retain_graph=False):
+    def backward(self, grad_tensor=None, retain_graph=False):
         """
         Run backward of current Graph which starts from current Tensor.
 
@@ -142,17 +142,22 @@ def backward(self, retain_graph=False):
         You can clear gradient by ``Tensor.clear_grad()`` .
 
         Args:
+            grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, 
+            the initial gradient values of the current Tensor would be Tensor filled with 1.0; 
+            if `grad_tensor` is not None, it must have the same length as the current Tensor.
+            Teh default value is None.
+
             retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would
                 like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter
                 :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
                 Defaults to False.
-
         Returns:
             NoneType: None
 
         Examples:
             .. code-block:: python
 
+                import paddle
                 x = paddle.to_tensor(5., stop_gradient=False)
                 for i in range(5):
                     y = paddle.pow(x, 4.0)
@@ -168,15 +173,34 @@ def backward(self, retain_graph=False):
                 print("{}".format(x.grad))
                 # 0.
 
+                grad_tensor=paddle.to_tensor(2.)
+                for i in range(5):
+                    y = paddle.pow(x, 4.0)
+                    y.backward(grad_tensor)
+                    print("{}: {}".format(i, x.grad))
+                # 0: [1000.]
+                # 1: [2000.]
+                # 2: [3000.]
+                # 3: [4000.]
+                # 4: [5000.]
+
         """
         if framework.in_dygraph_mode():
+            if grad_tensor is not None:
+                assert isinstance(
+                    grad_tensor, core.
+                    VarBase), "The type of grad_tensot must be paddle.VarBase"
+                assert grad_tensor.shape == self.shape, "Variable shape not match, Variable of grad_tensor [ {} ] with shape {} mismatch Variable [ {} ] with shape {}".format(
+                    grad_tensor.name, grad_tensor.shape, self.name, self.shape)
+
             if paddle.is_compiled_with_xpu():
                 # TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
                 scaled_loss = scale_loss(self)
                 scaled_loss._run_backward(framework._dygraph_tracer(),
-                                          retain_graph)
+                                          retain_graph, grad_tensor)
             else:
-                self._run_backward(framework._dygraph_tracer(), retain_graph)
+                self._run_backward(framework._dygraph_tracer(), retain_graph,
+                                   grad_tensor)
         else:
             raise ValueError(
                 "Variable.backward() is only available in DyGraph mode")

diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+
+import paddle
+import paddle.fluid.dygraph as dg
+from op_test import OpTest
+
+
+class TestBackward(unittest.TestCase):
+    def setUp(self):
+        self._dtypes = ["float32", "float64"]
+        self._places = [paddle.CPUPlace()]
+        if paddle.is_compiled_with_cuda():
+            self._places.append(paddle.CUDAPlace(0))
+
+    def test_all_positive(self):
+        for dtype in self._dtypes:
+            x = np.random.random([2, 100]).astype(dtype)
+            y = np.random.random([100, 2]).astype(dtype)
+            z = np.matmul(x, y)
+            grad = np.random.random(z.shape).astype(dtype)
+            for place in self._places:
+                with dg.guard(place):
+                    x_tensor = paddle.to_tensor(x, stop_gradient=False)
+                    y_tensor = paddle.to_tensor(y)
+                    z_tensor = paddle.matmul(x_tensor, y_tensor)
+
+                    grad_tensor = paddle.to_tensor(grad)
+                    z_tensor.backward(grad_tensor)
+
+                    x_grad = np.matmul(grad, y.T)
+
+                    self.assertTrue(np.allclose(x_grad, x_tensor.grad))
+
+
+if __name__ == '__main__':
+    unittest.main()