aiqm · farhadrgh · Mar 22, 2020 · Mar 21, 2020 · Mar 22, 2020 · Mar 22, 2020
@@ -34,3 +34,5 @@ dist
 *.qdstrm
 *.zip
 Untitled.ipynb
+/nnp_training.py
+/test*.py
@@ -82,9 +82,7 @@
 dspath = os.path.join(path, '../dataset/ani1-up_to_gdb4/ani_gdb_s01.h5')
 batch_size = 2560
 
-dataset = torchani.data.load(dspath).subtract_self_energies(energy_shifter).species_to_indices().shuffle()
-size = len(dataset)
-training, validation = dataset.split(int(0.8 * size), None)
+training, validation = torchani.data.load(dspath).subtract_self_energies(energy_shifter).species_to_indices().shuffle().split(0.8, None)
 training = training.collate(batch_size).cache()
 validation = validation.collate(batch_size).cache()
 print('Self atomic energies: ', energy_shifter.self_energies)

@@ -49,9 +49,7 @@
 
 batch_size = 2560
 
-dataset = torchani.data.load(dspath).subtract_self_energies(energy_shifter).species_to_indices().shuffle()
-size = len(dataset)
-training, validation = dataset.split(int(0.8 * size), None)
+training, validation = torchani.data.load(dspath).subtract_self_energies(energy_shifter).species_to_indices().shuffle().split(0.8, None)
 training = training.collate(batch_size).cache()
 validation = validation.collate(batch_size).cache()
 

@@ -1,9 +1,10 @@
 import os
+import torch
 import torchani
 import unittest
 
 path = os.path.dirname(os.path.realpath(__file__))
-dataset_path = os.path.join(path, 'dataset/ani-1x/sample.h5')
+dataset_path = os.path.join(path, '../dataset/ani-1x/sample.h5')
 batch_size = 256
 ani1x = torchani.models.ANI1x()
 consts = ani1x.consts
@@ -34,6 +35,87 @@ def testNoUnnecessaryPadding(self):
             non_padding = (species >= 0)[:, -1].nonzero()
             self.assertGreater(non_padding.numel(), 0)
 
+    def testReEnter(self):
+        # make sure that a dataset can be iterated multiple times
+        ds = torchani.data.load(dataset_path)
+        for d in ds:
+            pass
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+        ds = ds.subtract_self_energies(sae_dict)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+        ds = ds.species_to_indices()
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+        ds = ds.shuffle()
+        entered = False
+        for d in ds:
+            entered = True
+            pass
+        self.assertTrue(entered)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+        ds = ds.collate(batch_size)
+        entered = False
+        for d in ds:
+            entered = True
+            pass
+        self.assertTrue(entered)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+        ds = ds.cache()
+        entered = False
+        for d in ds:
+            entered = True
+            pass
+        self.assertTrue(entered)
+        entered = False
+        for d in ds:
+            entered = True
+        self.assertTrue(entered)
+
+    def testShapeInference(self):
+        shifter = torchani.EnergyShifter(None)
+        ds = torchani.data.load(dataset_path).subtract_self_energies(shifter)
+        len(ds)
+        ds = ds.species_to_indices()
+        len(ds)
+        ds = ds.shuffle()
+        len(ds)
+        ds = ds.collate(batch_size)
+        len(ds)
+
+    def testDataloader(self):
+        shifter = torchani.EnergyShifter(None)
+        dataset = list(torchani.data.load(dataset_path).subtract_self_energies(shifter).species_to_indices().shuffle())
+        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, collate_fn=torchani.data.collate_fn, num_workers=64)
+        for i in loader:
+            pass
+
 
 if __name__ == '__main__':
     unittest.main()