diff --git a/docs/api.rst b/docs/api.rst index 1dc066926..23f81cfcd 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -31,7 +31,7 @@ Datasets .. autoclass:: torchani.data.CachedDataset :members: .. autofunction:: torchani.data.load_ani_dataset -.. autoclass:: torchani.data.BatchedANIDataset +.. autoclass:: torchani.data.PaddedBatchChunkDataset diff --git a/torchani/data/__init__.py b/torchani/data/__init__.py index e4dabe124..bc90fedca 100644 --- a/torchani/data/__init__.py +++ b/torchani/data/__init__.py @@ -7,7 +7,6 @@ from ._pyanitools import anidataloader import torch from .. import utils -import warnings from .new import CachedDataset, ShuffledDataset, find_threshold default_device = 'cuda' if torch.cuda.is_available() else 'cpu' @@ -159,6 +158,13 @@ def split_whole_into_batches_and_chunks(atomic_properties, properties, batch_siz class PaddedBatchChunkDataset(Dataset): + r""" Dataset that contains batches in 'chunks', with padded structures + + This dataset acts as a container of batches to be used when training. Each + of the batches is broken up into 'chunks', each of which is a tensor has + molecules with a smiliar number of atoms, but which have been padded with + dummy atoms in order for them to have the same tensor dimensions. + """ def __init__(self, atomic_properties, properties, batch_size, dtype=torch.get_default_dtype(), device=default_device): @@ -193,26 +199,6 @@ def __len__(self): return len(self.batches) -class BatchedANIDataset(PaddedBatchChunkDataset): - """Same as :func:`torchani.data.load_ani_dataset`. This API has been deprecated.""" - - def __init__(self, path, species_tensor_converter, batch_size, - shuffle=True, properties=('energies',), atomic_properties=(), transform=(), - dtype=torch.get_default_dtype(), device=default_device): - self.properties = properties - self.atomic_properties = atomic_properties - warnings.warn("BatchedANIDataset is deprecated; use load_ani_dataset()", DeprecationWarning) - - atomic_properties, properties = load_and_pad_whole_dataset( - path, species_tensor_converter, shuffle, properties, atomic_properties) - - # do transformations on data - for t in transform: - atomic_properties, properties = t(atomic_properties, properties) - - super().__init__(atomic_properties, properties, batch_size, dtype, device) - - def load_ani_dataset(path, species_tensor_converter, batch_size, shuffle=True, rm_outlier=False, properties=('energies',), atomic_properties=(), transform=(), dtype=torch.get_default_dtype(), device=default_device, @@ -361,4 +347,4 @@ def load_ani_dataset(path, species_tensor_converter, batch_size, shuffle=True, return tuple(ret) -__all__ = ['load_ani_dataset', 'BatchedANIDataset', 'CachedDataset', 'ShuffledDataset', 'find_threshold'] +__all__ = ['load_ani_dataset', 'PaddedBatchChunkDataset', 'CachedDataset', 'ShuffledDataset', 'find_threshold'] diff --git a/torchani/utils.py b/torchani/utils.py index 126d9407d..ac22621ff 100644 --- a/torchani/utils.py +++ b/torchani/utils.py @@ -172,9 +172,7 @@ def sae(self, species): return self_energies.sum(dim=1) + intercept def subtract_from_dataset(self, atomic_properties, properties): - """Transformer for :class:`torchani.data.BatchedANIDataset` that - subtract self energies. - """ + """Transformer that subtracts self energies from a dataset""" if self.self_energies is None: self_energies = self.sae_from_dataset(atomic_properties, properties) self.self_energies = torch.tensor(self_energies, dtype=torch.double)