Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick 2.2] Move the ASP training API to paddle.static.sparsity. (#36525) #36860

Merged
merged 1 commit into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions python/paddle/fluid/contrib/sparsity/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
from .utils import check_sparsity
from .utils import MaskAlgo
from .utils import CheckMethod
from .asp import decorate, prune_model
from .asp import set_excluded_layers, reset_excluded_layers
from .asp import decorate
from .asp import prune_model
from .asp import set_excluded_layers
from .asp import reset_excluded_layers

__all__ = [
'calculate_density', 'check_mask_1d', 'get_mask_1d', 'check_mask_2d',
Expand Down
162 changes: 111 additions & 51 deletions python/paddle/fluid/contrib/sparsity/asp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
import copy
import numpy as np
import paddle
from paddle.fluid import framework, global_scope, program_guard, layers
from paddle.fluid import global_scope, program_guard, layers
from paddle.fluid.initializer import ConstantInitializer
from paddle.fluid.contrib import sparsity
from paddle.fluid import core

__all__ = [
'decorate', 'prune_model', 'set_excluded_layers', 'reset_excluded_layers'
Expand All @@ -36,6 +35,35 @@ def set_excluded_layers(main_program, param_names):
Args:
main_program (Program, optional): Program with model definition and its parameters.
param_names (list): A list contains names of parameters.
Examples:
.. code-block:: python

import paddle
from paddle.static import sparsity

paddle.enable_static()

main_program = paddle.static.Program()
startup_program = paddle.static.Program()

with paddle.static.program_guard(main_program, startup_program):
input_data = paddle.static.data(name='data', shape=[None, 128])
label = paddle.static.data(name='label', shape=[None, 10])
hidden = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None, name="need_sparse_fc")
hidden = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=32, activation=None, name="need_dense_fc")
prob = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=10, activation=None)
loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

# Setup exluded layers out from ASP workflow.
# Please note, excluded_layers must be set before calling `optimizer.minimize()`.
sparsity.set_excluded_layers(main_program, ["need_dense_fc"])

optimizer = paddle.optimizer.SGD(learning_rate=0.1)
optimizer = paddle.static.amp.decorate(optimizer )
# Calling sparsity.decorate() to wrap minimize() in optimizer, which
# will insert necessary masking operations for ASP workflow.
optimizer = sparsity.decorate(optimizer)
optimizer.minimize(loss, startup_program)
"""
ASPHelper.set_excluded_layers(
main_program=main_program, param_names=param_names)
Expand All @@ -48,6 +76,33 @@ def reset_excluded_layers(main_program=None):

Args:
main_program (Program, optional): Program with model definition and its parameters.
Examples:
.. code-block:: python

import paddle
from paddle.static import sparsity

paddle.enable_static()

main_program = paddle.static.Program()
startup_program = paddle.static.Program()

with paddle.static.program_guard(main_program, startup_program):
input_data = paddle.static.data(name='data', shape=[None, 128])
label = paddle.static.data(name='label', shape=[None, 10])
hidden = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None, name="my_first_fc")
hidden = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=32, activation=None, name="my_second_fc")
prob = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=10, activation=None)
loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

# Setup exluded layers out from ASP workflow.
# Please note, excluded_layers must be set before calling `optimizer.minimize()`.
sparsity.set_excluded_layers(main_program, ["my_second_fc"])
# Now the weights of "my_second_fc" would not be included in Automatic SParsity's workflow.

# Reset excluded_layers, all FC layers would be included into Automatic SParsity's workflow.
# Please note, reset_excluded_layers also must be called before calling `optimizer.minimize()`.
sparsity.reset_excluded_layers(main_program)
"""
ASPHelper.reset_excluded_layers(main_program=main_program)

Expand All @@ -65,22 +120,21 @@ def decorate(optimizer):
.. code-block:: python

import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib import sparsity
from paddle.static import sparsity

main_program = fluid.Program()
startup_program = fluid.Program()
main_program = paddle.static.Program()
startup_program = paddle.static.Program()

paddle.enable_static()

with fluid.program_guard(main_program, startup_program):
input_data = fluid.layers.data(name='data', shape=[None, 128])
label = fluid.layers.data(name='label', shape=[None, 10])
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None)
prob = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=10, act=None)
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))
with paddle.static.program_guard(main_program, startup_program):
input_data = paddle.static.data(name='data', shape=[None, 128])
label = paddle.static.data(name='label', shape=[None, 10])
hidden = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None)
prob = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=10, activation=None)
loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimizer = paddle.optimizer.SGD(learning_rate=0.1)
optimizer = sparsity.decorate(optimizer)
# if do sparse training with Fleet, please replace above decorate with:
# strategy = paddle.distributed.fleet.DistributedStrategy()
Expand All @@ -92,15 +146,14 @@ def decorate(optimizer):
return ASPHelper.decorate(optimizer)


def prune_model(place,
main_program=None,
def prune_model(main_program=None,
n=2,
m=4,
func_name=sparsity.MaskAlgo.MASK_1D,
mask_algo='mask_1d',
with_mask=True):
r"""
Pruning parameters of supported layers in :attr:`main_program` via
specified mask generation function given by :attr:`func_name`. This
specified mask generation function given by :attr:`mask_algo`. This
function supports both training and inference controlled by :attr:`with_mask`.
If :attr:`with_mask` is True, it would also prune parameter related ASP mask Variables,
else only prunes parameters.
Expand All @@ -114,62 +167,70 @@ def prune_model(place,
inference only. To obtain OptimizerWithSparsityGuarantee, please see `sparsity.decoreate()`.

Args:
place (fluid.CPUPlace()|fluid.CUDAPlace(N)): Device place for pruned parameter and mask Variables, and N means the GPU's id. It should be the same as created instance of Executor.
main_program (Program, optional): Program with model definition and its parameters. Default is `paddle.static.default_main_program()
n (int): n of `n:m` sparse pattern.
m (int): m of `n:m` sparse pattern.
func_name (MaskAlgo, optional): The function name to generate spase mask. Default is `MaskAlgo.MASK_1D`. All options please refer to `MaskAlgo`.
mask_algo (string, optional): The function name to generate spase mask. Default is `mask_1d`.
The vaild inputs should be one of 'mask_1d', 'mask_2d_greedy' and 'mask_2d_best'.
with_mask (bool, optional): To prune mask Variables related to parameters or not. Ture is purning also, False is not. Defalut is True.
Returns:
dictionary: A dictionary with key: `parameter name` (string) and value: its corresponding mask Variable.
Examples:
.. code-block:: python

import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.contrib import sparsity
from paddle.static import sparsity

paddle.enable_static()

main_program = fluid.Program()
startup_program = fluid.Program()
main_program = paddle.static.Program()
startup_program = paddle.static.Program()

place = paddle.CPUPlace()
if core.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)

with fluid.program_guard(main_program, startup_program):
input_data = fluid.layers.data(name='data', shape=[None, 128])
label = fluid.layers.data(name='label', shape=[None, 10])
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None, name="need_sparse")
hidden = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=32, act=None, name="need_dense")
prob = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=10, act=None)
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))
with paddle.static.program_guard(main_program, startup_program):
input_data = paddle.static.data(name='data', shape=[None, 128])
label = paddle.static.data(name='label', shape=[None, 10])
hidden = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None, name="need_sparse_fc")
hidden = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=32, activation=None, name="need_dense_fc")
prob = paddle.static.nn.fc(x=hidden, num_flatten_dims=-1, size=10, activation=None)
loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

# Setup exluded layers out from ASP workflow.
# Please note, excluded_layers must be set before calling `optimizer.minimize()`.
sparsity.set_excluded_layers(main_program, ["need_dense"])
sparsity.set_excluded_layers(main_program, ["need_dense_fc"])

optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimizer = fluid.contrib.mixed_precision.decorator.decorate(optimizer )
optimizer = paddle.optimizer.SGD(learning_rate=0.1)
optimizer = paddle.static.amp.decorate(optimizer )
# Calling sparsity.decorate() to wrap minimize() in optimizer, which
# will insert necessary masking operations for ASP workflow.
optimizer = sparsity.decorate(optimizer)
optimizer.minimize(loss, startup_program)

exe = fluid.Executor(place)
device = paddle.device.get_device()
place = paddle.set_device(device)

exe = paddle.static.Executor(place)
exe.run(startup_program)

# Must call `exe.run(startup_program)` first before calling `sparsity.prune_model`
sparsity.prune_model(place, main_program, func_name=sparsity.MaskAlgo.MASK_2D_BEST)
sparsity.prune_model(main_program, mask_algo='mask_2d_best')
"""
device = paddle.device.get_device()
place = paddle.set_device(device)

MaskAlgo_mapping = {
'mask_1d': sparsity.MaskAlgo.MASK_1D,
'mask_2d_greedy': sparsity.MaskAlgo.MASK_2D_GREEDY,
'mask_2d_best': sparsity.MaskAlgo.MASK_2D_BEST
}
assert (mask_algo in MaskAlgo_mapping), \
'The "mask_algo" should be one of ["mask_1d", "mask_2d_greedy", "mask_2d_best"]'

return ASPHelper.prune_model(
place=place,
main_program=main_program,
n=n,
m=m,
func_name=func_name,
mask_algo=MaskAlgo_mapping[mask_algo],
with_mask=with_mask)


Expand Down Expand Up @@ -256,12 +317,12 @@ def prune_model(cls,
main_program=None,
n=2,
m=4,
func_name=sparsity.MaskAlgo.MASK_1D,
mask_algo=sparsity.MaskAlgo.MASK_1D,
with_mask=True):
r"""
This is the implementation of `sparsity.prune_model`, for details please see explanation in `sparsity.prune_model`.
"""
checked_func_name = sparsity.CheckMethod.get_checking_method(func_name)
checked_func_name = sparsity.CheckMethod.get_checking_method(mask_algo)

if main_program is None:
main_program = paddle.static.default_main_program()
Expand All @@ -284,7 +345,7 @@ def prune_model(cls,
# matrices beforce invoking create_mask. Then we transpose the result maks to make
# sure its shape to be the same as the input weight.
weight_sparse_mask = sparsity.create_mask(
weight_nparray.T, func_name=func_name, n=n, m=m).T
weight_nparray.T, func_name=mask_algo, n=n, m=m).T
weight_pruned_nparray = np.multiply(weight_nparray,
weight_sparse_mask)
weight_tensor.set(weight_pruned_nparray, place)
Expand Down Expand Up @@ -347,15 +408,14 @@ def _is_supported_layer(cls, main_program, param_name):
Examples:
.. code-block:: python

import paddle.fluid as fluid
from paddle.fluid.contrib.sparsity.asp import ASPHelper
from paddle.static.sparsity.asp import ASPHelper

main_program = fluid.Program()
startup_program = fluid.Program()
main_program = paddle.static.Program()
startup_program = paddle.static.Program()

with fluid.program_guard(main_program, startup_program):
input_data = fluid.layers.data(name='data', shape=[None, 128])
fc = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None)
with paddle.static.program_guard(main_program, startup_program):
input_data = paddle.static.data(name='data', shape=[None, 128])
fc = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None)

for param in main_program.global_block().all_parameters():
ASPHelper._is_supported_layer(main_program, param.name)
Expand Down
7 changes: 4 additions & 3 deletions python/paddle/fluid/contrib/sparsity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def get_checking_method(mask_algo):
.. code-block:: python

import numpy as np
from paddle.fluid.contrib.sparsity import MaskAlgo, CheckMethod
from paddle.static.sparsity import MaskAlgo
from paddle.fluid.contrib.sparsity import CheckMethod

CheckMethod.get_checking_method(MaskAlgo.MASK_1D)
# CheckMethod.CHECK_1D
Expand Down Expand Up @@ -95,7 +96,7 @@ def calculate_density(x):
.. code-block:: python

import numpy as np
import paddle.fluid.contrib.sparsity as sparsity
import paddle.static.sparsity as sparsity

x = np.array([[0, 1, 3, 0],
[1, 1, 0, 1]])
Expand Down Expand Up @@ -446,7 +447,7 @@ def get_mask_2d_best(mat, n, m):
[5, 6, 3, 9],
[2, 4, 6, 9]])
mask_greedy = sparsity.get_mask_2d_greedy(mat, 2, 4)
mask_greedy = sparsity.get_mask_2d_best(mat, 2, 4)
mask_best = sparsity.get_mask_2d_best(mat, 2, 4)
print("L1 norm of `greedy` sparse matrix", np.multiply(mat, mask_greedy).sum()) # 56
print("L1 norm of `best` sparse matrix", np.multiply(mat, mask_best).sum()) # 61
"""
Expand Down
9 changes: 3 additions & 6 deletions python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.contrib import sparsity
from paddle.static import sparsity
from paddle.fluid.contrib.sparsity.asp import ASPHelper
import numpy as np

Expand Down Expand Up @@ -76,14 +76,11 @@ def __pruning_and_checking(self, exe, place, mask_func_name,
check_func_name, with_mask):
exe.run(self.startup_program)
sparsity.prune_model(
place,
self.main_program,
func_name=mask_func_name,
with_mask=with_mask)
self.main_program, mask_algo=mask_func_name, with_mask=with_mask)
for param in self.main_program.global_block().all_parameters():
if ASPHelper._is_supported_layer(self.main_program, param.name):
mat = np.array(fluid.global_scope().find_var(param.name)
.get_tensor())
self.assertTrue(
sparsity.check_sparsity(
paddle.fluid.contrib.sparsity.check_sparsity(
mat.T, func_name=check_func_name, n=2, m=4))
14 changes: 9 additions & 5 deletions python/paddle/fluid/tests/unittests/asp/test_asp_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.contrib import sparsity
from paddle.static import sparsity
from paddle.fluid.contrib.sparsity.asp import ASPHelper
import numpy as np

Expand Down Expand Up @@ -129,7 +129,7 @@ def test_asp_training(self):
feeder = fluid.DataFeeder(feed_list=[self.img, self.label], place=place)

exe.run(self.startup_program)
sparsity.prune_model(place, self.main_program)
sparsity.prune_model(self.main_program)

data = (np.random.randn(64, 3, 32, 32), np.random.randint(
10, size=(64, 1)))
Expand All @@ -139,7 +139,9 @@ def test_asp_training(self):
if ASPHelper._is_supported_layer(self.main_program, param.name):
mat = np.array(fluid.global_scope().find_var(param.name)
.get_tensor())
self.assertTrue(sparsity.check_sparsity(mat.T, n=2, m=4))
self.assertTrue(
paddle.fluid.contrib.sparsity.check_sparsity(
mat.T, n=2, m=4))

def test_asp_training_with_amp(self):
if core.is_compiled_with_cuda():
Expand All @@ -155,7 +157,7 @@ def test_asp_training_with_amp(self):
feed_list=[self.img, self.label], place=place)

exe.run(self.startup_program)
sparsity.prune_model(place, self.main_program)
sparsity.prune_model(self.main_program)

data = (np.random.randn(64, 3, 32, 32), np.random.randint(
10, size=(64, 1)))
Expand All @@ -165,7 +167,9 @@ def test_asp_training_with_amp(self):
if ASPHelper._is_supported_layer(self.main_program, param.name):
mat = np.array(fluid.global_scope().find_var(param.name)
.get_tensor())
self.assertTrue(sparsity.check_sparsity(mat.T, n=2, m=4))
self.assertTrue(
paddle.fluid.contrib.sparsity.check_sparsity(
mat.T, n=2, m=4))

def __get_param_names(self, params):
param_names = []
Expand Down
Loading