Merge remote-tracking branch 'origin' into avelez-cellxgene-dev

mims-harvard · Mar 6, 2024 · 8088dba · 8088dba
2 parents 95c2d19 + 22cdd74
commit 8088dba
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 0 deletions.
diff --git a/.github/workflows/conda-tests.yml b/.github/workflows/conda-tests.yml
@@ -10,7 +10,10 @@ on:
       - main
       - avelez-cellxgene-dev
       - avelez-dev
+      - avelez-cellxgene-dev
       - '*'
+  pull_request:
+    branches: [ "main" ]
   workflow_dispatch:
 
 jobs:

diff --git a/environment.yml b/environment.yml
@@ -4,6 +4,8 @@ channels:
   - defaults
   - pyg
   - pytorch
+  - pyg
+  - pytorch
 dependencies:
   - dataclasses=0.8
   - fuzzywuzzy=0.18.0
@@ -14,12 +16,16 @@ dependencies:
   - pandas=2.1.4
   - pyg=2.5.0
   - pytorch=2.2.1
+  - pyg=2.5.0
+  - pytorch=2.2.1
   - requests=2.31.0
   - scikit-learn=1.3.0
   - seaborn=0.12.2
   - tqdm=4.65.0
   - torchaudio=2.2.1
   - torchvision=0.17.1
+  - torchaudio=2.2.1
+  - torchvision=0.17.1
   - pip:
     - cellxgene-census==1.10.2 
     - gget==0.28.4

diff --git a/tdc/test/dev_tests/utils_tests/test_misc_utils.py b/tdc/test/dev_tests/utils_tests/test_misc_utils.py
@@ -13,14 +13,18 @@
 # if TDC is installed, no need to use the following line
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
+sys.path.append(
+    os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
 
 
 class TestFunctions(unittest.TestCase):
 
+
     def setUp(self):
         print(os.getcwd())
         pass
 
+    @unittest.skip("long running test")
     @unittest.skip("long running test")
     def test_neg_sample(self):
         from tdc.multi_pred import PPI
@@ -91,6 +95,7 @@ def test_to_graph(self):
         )
         # output: {'pyg_graph': the PyG graph object, 'index_to_entities': a dict map from ID in the data to node ID in the PyG object, 'split': {'train': df, 'valid': df, 'test': df}}
 
+    #
     #
     def tearDown(self):
         print(os.getcwd())

diff --git a/tdc/utils/label.py b/tdc/utils/label.py
@@ -22,6 +22,7 @@ def convert_y_unit(y, from_, to_):
         y = y
     elif from_ == "p":
         y = (10**(-y) - 1e-10) / 1e-9
+        y = (10**(-y) - 1e-10) / 1e-9
 
     if to_ == "p":
         y = -np.log10(y * 1e-9 + 1e-10)
@@ -31,6 +32,12 @@ def convert_y_unit(y, from_, to_):
     return y
 
 
+def label_transform(y,
+                    binary,
+                    threshold,
+                    convert_to_log,
+                    verbose=True,
+                    order="descending"):
 def label_transform(y,
                     binary,
                     threshold,
@@ -67,6 +74,8 @@ def label_transform(y,
         else:
             raise ValueError(
                 "Please select order from 'descending or ascending!")
+            raise ValueError(
+                "Please select order from 'descending or ascending!")
     else:
         if (len(np.unique(y)) > 2) and convert_to_log:
             if verbose:
@@ -148,6 +157,10 @@ def label_dist(y, name=None):
     median = np.median(y)
     mean = np.mean(y)
 
+    f, (ax_box,
+        ax_hist) = plt.subplots(2,
+                                sharex=True,
+                                gridspec_kw={"height_ratios": (0.15, 1)})
     f, (ax_box,
         ax_hist) = plt.subplots(2,
                                 sharex=True,
@@ -158,6 +171,8 @@ def label_dist(y, name=None):
     else:
         sns.boxplot(y, ax=ax_box).set_title("Label Distribution of " +
                                             str(name) + " Dataset")
+        sns.boxplot(y, ax=ax_box).set_title("Label Distribution of " +
+                                            str(name) + " Dataset")
     ax_box.axvline(median, color="b", linestyle="--")
     ax_box.axvline(mean, color="g", linestyle="--")
 
@@ -197,6 +212,8 @@ def NegSample(df, column_names, frac, two_types):
         samples = np.random.choice(df_unique, size=(x, 2), replace=True)
         neg_set = set([tuple([i[0], i[1]]) for i in samples if i[0] != i[1]
                       ]) - pos_set
+        neg_set = set([tuple([i[0], i[1]]) for i in samples if i[0] != i[1]
+                      ]) - pos_set
 
         while len(neg_set) < x:
             sample = np.random.choice(df_unique, 2, replace=False)
@@ -236,6 +253,11 @@ def NegSample(df, column_names, frac, two_types):
         sample_id1 = np.random.choice(df_unique_id1, size=len(df), replace=True)
         sample_id2 = np.random.choice(df_unique_id2, size=len(df), replace=True)
 
+        neg_set = (set([
+            tuple([sample_id1[i], sample_id2[i]])
+            for i in range(len(df))
+            if sample_id1[i] != sample_id2[i]
+        ]) - pos_set)
         neg_set = (set([
             tuple([sample_id1[i], sample_id2[i]])
             for i in range(len(df))