allenai · jacobdanovitch · Oct 2, 2020 · Oct 2, 2020 · Oct 2, 2020 · Oct 7, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+- Added `DeepspeedTrainer` and `FusedLambOptimizer`.
+
 ### Fixed
 
 - Ensured that `MeanAbsoluteError` always returns a `float` metric value instead of a `Tensor`.

diff --git a/allennlp/commands/__init__.py b/allennlp/commands/__init__.py
@@ -50,7 +50,9 @@ def add_argument(self, *args, **kwargs):
         super().add_argument(*args, **kwargs)
 
 
-def parse_args(prog: Optional[str] = None) -> Tuple[argparse.ArgumentParser, argparse.Namespace]:
+def parse_args(
+    prog: Optional[str] = None,
+) -> Tuple[argparse.ArgumentParser, argparse.Namespace]:
     """
     Creates the argument parser for the main program and uses it to parse the args.
     """

diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py
@@ -27,17 +27,23 @@ class Evaluate(Subcommand):
     def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
         description = """Evaluate the specified model + dataset"""
         subparser = parser.add_parser(
-            self.name, description=description, help="Evaluate the specified model + dataset."
+            self.name,
+            description=description,
+            help="Evaluate the specified model + dataset.",
         )
 
         subparser.add_argument("archive_file", type=str, help="path to an archived trained model")
 
         subparser.add_argument(
-            "input_file", type=str, help="path to the file containing the evaluation data"
+            "input_file",
+            type=str,
+            help="path to the file containing the evaluation data",
         )
 
         subparser.add_argument(
-            "--output-file", type=str, help="optional path to write the metrics to as JSON"
+            "--output-file",
+            type=str,
+            help="optional path to write the metrics to as JSON",
         )
 
         subparser.add_argument(
@@ -47,7 +53,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         )
 
         subparser.add_argument(
-            "--weights-file", type=str, help="a path that overrides which weights file to use"
+            "--weights-file",
+            type=str,
+            help="a path that overrides which weights file to use",
         )
 
         cuda_device = subparser.add_mutually_exclusive_group(required=False)
@@ -68,7 +76,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         )
 
         subparser.add_argument(
-            "--batch-size", type=int, help="If non-empty, the batch size to use during evaluation."
+            "--batch-size",
+            type=int,
+            help="If non-empty, the batch size to use during evaluation.",
         )
 
         subparser.add_argument(

diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py
@@ -39,7 +39,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         )
 
         subparser.add_argument(
-            "param_path", type=str, help="path to parameter file describing the model to be trained"
+            "param_path",
+            type=str,
+            help="path to parameter file describing the model to be trained",
         )
         subparser.add_argument(
             "-s",
@@ -60,10 +62,16 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
             ),
         )
         subparser.add_argument(
-            "--start-lr", type=float, default=1e-5, help="learning rate to start the search"
+            "--start-lr",
+            type=float,
+            default=1e-5,
+            help="learning rate to start the search",
         )
         subparser.add_argument(
-            "--end-lr", type=float, default=10, help="learning rate up to which search is done"
+            "--end-lr",
+            type=float,
+            default=10,
+            help="learning rate up to which search is done",
         )
         subparser.add_argument(
             "--num-batches",

diff --git a/allennlp/commands/predict.py b/allennlp/commands/predict.py
@@ -28,7 +28,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
 
         description = """Run the specified model against a JSON-lines input file."""
         subparser = parser.add_parser(
-            self.name, description=description, help="Use a trained model to make predictions."
+            self.name,
+            description=description,
+            help="Use a trained model to make predictions.",
         )
 
         subparser.add_argument(
@@ -38,12 +40,17 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
 
         subparser.add_argument("--output-file", type=str, help="path to output file")
         subparser.add_argument(
-            "--weights-file", type=str, help="a path that overrides which weights file to use"
+            "--weights-file",
+            type=str,
+            help="a path that overrides which weights file to use",
         )
 
         batch_size = subparser.add_mutually_exclusive_group(required=False)
         batch_size.add_argument(
-            "--batch-size", type=int, default=1, help="The batch size to use for processing"
+            "--batch-size",
+            type=int,
+            default=1,
+            help="The batch size to use for processing",
         )
 
         subparser.add_argument(
@@ -86,7 +93,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         )
 
         subparser.add_argument(
-            "--predictor", type=str, help="optionally specify a specific predictor to use"
+            "--predictor",
+            type=str,
+            help="optionally specify a specific predictor to use",
         )
 
         subparser.add_argument(

diff --git a/allennlp/commands/subcommand.py b/allennlp/commands/subcommand.py
@@ -39,7 +39,10 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
     @classmethod
     @overrides
     def register(
-        cls: Type[T], name: str, constructor: Optional[str] = None, exist_ok: bool = False
+        cls: Type[T],
+        name: str,
+        constructor: Optional[str] = None,
+        exist_ok: bool = False,
     ) -> Callable[[Type[T]], Type[T]]:
         super_register_fn = super().register(name, constructor=constructor, exist_ok=exist_ok)
 

diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py
@@ -24,7 +24,11 @@
 from allennlp.common.plugins import import_plugins
 from allennlp.data import DatasetReader, Vocabulary
 from allennlp.data import DataLoader
-from allennlp.models.archival import archive_model, CONFIG_NAME, verify_include_in_archive
+from allennlp.models.archival import (
+    archive_model,
+    CONFIG_NAME,
+    verify_include_in_archive,
+)
 from allennlp.models.model import _DEFAULT_WEIGHTS, Model
 from allennlp.training.trainer import Trainer
 from allennlp.training import util as training_util
@@ -40,7 +44,9 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         subparser = parser.add_parser(self.name, description=description, help="Train a model.")
 
         subparser.add_argument(
-            "param_path", type=str, help="path to parameter file describing the model to be trained"
+            "param_path",
+            type=str,
+            help="path to parameter file describing the model to be trained",
         )
 
         subparser.add_argument(
@@ -80,7 +86,10 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
         )
 
         subparser.add_argument(
-            "--node-rank", type=int, default=0, help="rank of this node in the distributed setup"
+            "--node-rank",
+            type=int,
+            default=0,
+            help="rank of this node in the distributed setup",
         )
 
         subparser.add_argument(

diff --git a/allennlp/common/cached_transformers.py b/allennlp/common/cached_transformers.py
@@ -66,7 +66,9 @@ def strip_prefix(s):
                 }
                 if len(valid_keys) > 0:
                     logger.info(
-                        "Loading %d tensors from %s", len(valid_keys), override_weights_file
+                        "Loading %d tensors from %s",
+                        len(valid_keys),
+                        override_weights_file,
                     )
                 else:
                     raise ValueError(

diff --git a/allennlp/common/file_utils.py b/allennlp/common/file_utils.py
@@ -652,7 +652,10 @@ class CacheFile:
     """
 
     def __init__(
-        self, cache_filename: Union[PathLike, str], mode: str = "w+b", suffix: str = ".tmp"
+        self,
+        cache_filename: Union[PathLike, str],
+        mode: str = "w+b",
+        suffix: str = ".tmp",
     ) -> None:
         self.cache_filename = (
             cache_filename if isinstance(cache_filename, Path) else Path(cache_filename)
@@ -671,7 +674,9 @@ def __exit__(self, exc_type, exc_value, traceback):
         if exc_value is None:
             # Success.
             logger.debug(
-                "Renaming temp file %s to cache at %s", self.temp_file.name, self.cache_filename
+                "Renaming temp file %s to cache at %s",
+                self.temp_file.name,
+                self.cache_filename,
             )
             # Rename the temp file to the actual cache filename.
             os.replace(self.temp_file.name, self.cache_filename)
@@ -922,7 +927,10 @@ def get_file_extension(path: str, dot=True, lower: bool = True):
 
 
 def open_compressed(
-    filename: Union[str, PathLike], mode: str = "rt", encoding: Optional[str] = "UTF-8", **kwargs
+    filename: Union[str, PathLike],
+    mode: str = "rt",
+    encoding: Optional[str] = "UTF-8",
+    **kwargs,
 ):
     if not isinstance(filename, str):
         filename = str(filename)

diff --git a/allennlp/common/from_params.py b/allennlp/common/from_params.py
@@ -86,7 +86,9 @@ def is_base_registrable(cls) -> bool:
     Checks whether this is a class that directly inherits from Registrable, or is a subclass of such
     a class.
     """
-    from allennlp.common.registrable import Registrable  # import here to avoid circular imports
+    from allennlp.common.registrable import (
+        Registrable,
+    )  # import here to avoid circular imports
 
     if not issubclass(cls, Registrable):
         return False
@@ -148,7 +150,10 @@ def infer_params(
     else:
         super_parameters = {}
 
-    return {**super_parameters, **parameters}  # Subclass parameters overwrite superclass ones
+    return {
+        **super_parameters,
+        **parameters,
+    }  # Subclass parameters overwrite superclass ones
 
 
 def create_kwargs(
@@ -245,7 +250,12 @@ def create_extras(cls: Type[T], extras: Dict[str, Any]) -> Dict[str, Any]:
 
 
 def pop_and_construct_arg(
-    class_name: str, argument_name: str, annotation: Type, default: Any, params: Params, **extras
+    class_name: str,
+    argument_name: str,
+    annotation: Type,
+    default: Any,
+    params: Params,
+    **extras,
 ) -> Any:
     """
     Does the work of actually constructing an individual argument for
@@ -261,7 +271,9 @@ def pop_and_construct_arg(
     `inspect.Parameter` object directly, so that we can handle `Union` types using recursion on
     this method, trying the different annotation types in the union in turn.
     """
-    from allennlp.models.archival import load_archive  # import here to avoid circular imports
+    from allennlp.models.archival import (
+        load_archive,
+    )  # import here to avoid circular imports
 
     # We used `argument_name` as the method argument to avoid conflicts with 'name' being a key in
     # `extras`, which isn't _that_ unlikely.  Now that we are inside the method, we can switch back
@@ -536,7 +548,9 @@ def from_params(
         constructor (because you inspect `__init__`, but call `cls()`).
         """
 
-        from allennlp.common.registrable import Registrable  # import here to avoid circular imports
+        from allennlp.common.registrable import (
+            Registrable,
+        )  # import here to avoid circular imports
 
         logger.debug(
             f"instantiating class {cls} from params {getattr(params, 'params', params)} "

diff --git a/allennlp/common/plugins.py b/allennlp/common/plugins.py
@@ -33,14 +33,21 @@
 The global plugins file will be found here.
 """
 
-DEFAULT_PLUGINS = ("allennlp_models", "allennlp_semparse", "allennlp_server")
+DEFAULT_PLUGINS = (
+    "allennlp_models",
+    "allennlp_semparse",
+    "allennlp_server",
+    "allennlp.training.deepspeed",
+)
 """
 Default plugins do not need to be declared in a plugins file. They will always
 be imported when they are installed in the current Python environment.
 """
 
 
-def discover_file_plugins(plugins_filename: str = LOCAL_PLUGINS_FILENAME) -> Iterable[str]:
+def discover_file_plugins(
+    plugins_filename: str = LOCAL_PLUGINS_FILENAME,
+) -> Iterable[str]:
     """
     Returns an iterable of the plugins found, declared within a file whose path is `plugins_filename`.
     """

diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py
@@ -54,7 +54,9 @@ def set_up_model(
         self.vocab = vocab
         self.instances = instances
         self.model = Model.from_params(
-            vocab=self.vocab, params=params["model"], serialization_dir=serialization_dir
+            vocab=self.vocab,
+            params=params["model"],
+            serialization_dir=serialization_dir,
         )
 
         # TODO(joelgrus) get rid of these
@@ -149,13 +151,17 @@ def ensure_model_can_train_save_and_load(
 
         print("Reading with original model")
         data_loader = DataLoader.from_params(
-            params=data_loader_params, reader=reader, data_path=params["validation_data_path"]
+            params=data_loader_params,
+            reader=reader,
+            data_path=params["validation_data_path"],
         )
         data_loader.index_with(model.vocab)
 
         print("Reading with loaded model")
         data_loader2 = DataLoader.from_params(
-            params=data_loader_params2, reader=reader, data_path=params["validation_data_path"]
+            params=data_loader_params2,
+            reader=reader,
+            data_path=params["validation_data_path"],
         )
         data_loader2.index_with(loaded_model.vocab)
 
@@ -193,7 +199,10 @@ def ensure_model_can_train_save_and_load(
         # Both outputs should have the same keys and the values for these keys should be close.
         for key in model_predictions.keys():
             self.assert_fields_equal(
-                model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance
+                model_predictions[key],
+                loaded_model_predictions[key],
+                name=key,
+                tolerance=tolerance,
             )
 
         # Check loaded model's loss exists and we can compute gradients, for continuing training.
@@ -277,7 +286,10 @@ def assert_fields_equal(self, field1, field2, name: str, tolerance: float = 1e-6
             assert field1.keys() == field2.keys()
             for key in field1:
                 self.assert_fields_equal(
-                    field1[key], field2[key], tolerance=tolerance, name=name + "." + str(key)
+                    field1[key],
+                    field2[key],
+                    tolerance=tolerance,
+                    name=name + "." + str(key),
                 )
         elif isinstance(field1, (list, tuple)):
             assert len(field1) == len(field2)

diff --git a/allennlp/common/testing/test_case.py b/allennlp/common/testing/test_case.py
@@ -23,7 +23,8 @@ class AllenNlpTestCase:
 
     def setup_method(self):
         logging.basicConfig(
-            format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", level=logging.DEBUG
+            format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+            level=logging.DEBUG,
         )
         # Disabling some of the more verbose logging statements that typically aren't very helpful
         # in tests.

diff --git a/allennlp/common/util.py b/allennlp/common/util.py
@@ -466,7 +466,10 @@ def int_to_device(device: Union[int, torch.device]) -> torch.device:
 
 
 def log_frozen_and_tunable_parameter_names(model: torch.nn.Module) -> None:
-    frozen_parameter_names, tunable_parameter_names = get_frozen_and_tunable_parameter_names(model)
+    (
+        frozen_parameter_names,
+        tunable_parameter_names,
+    ) = get_frozen_and_tunable_parameter_names(model)
 
     logger.info("The following parameters are Frozen (without gradient):")
     for name in frozen_parameter_names: