Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin' into avelez-cellxgene-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
amva13 committed Mar 6, 2024
2 parents 95c2d19 + 22cdd74 commit 8088dba
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/conda-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ on:
- main
- avelez-cellxgene-dev
- avelez-dev
- avelez-cellxgene-dev
- '*'
pull_request:
branches: [ "main" ]
workflow_dispatch:

jobs:
Expand Down
6 changes: 6 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ channels:
- defaults
- pyg
- pytorch
- pyg
- pytorch
dependencies:
- dataclasses=0.8
- fuzzywuzzy=0.18.0
Expand All @@ -14,12 +16,16 @@ dependencies:
- pandas=2.1.4
- pyg=2.5.0
- pytorch=2.2.1
- pyg=2.5.0
- pytorch=2.2.1
- requests=2.31.0
- scikit-learn=1.3.0
- seaborn=0.12.2
- tqdm=4.65.0
- torchaudio=2.2.1
- torchvision=0.17.1
- torchaudio=2.2.1
- torchvision=0.17.1
- pip:
- cellxgene-census==1.10.2
- gget==0.28.4
Expand Down
5 changes: 5 additions & 0 deletions tdc/test/dev_tests/utils_tests/test_misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@
# if TDC is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))


class TestFunctions(unittest.TestCase):


def setUp(self):
print(os.getcwd())
pass

@unittest.skip("long running test")
@unittest.skip("long running test")
def test_neg_sample(self):
from tdc.multi_pred import PPI
Expand Down Expand Up @@ -91,6 +95,7 @@ def test_to_graph(self):
)
# output: {'pyg_graph': the PyG graph object, 'index_to_entities': a dict map from ID in the data to node ID in the PyG object, 'split': {'train': df, 'valid': df, 'test': df}}

#
#
def tearDown(self):
print(os.getcwd())
Expand Down
22 changes: 22 additions & 0 deletions tdc/utils/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def convert_y_unit(y, from_, to_):
y = y
elif from_ == "p":
y = (10**(-y) - 1e-10) / 1e-9
y = (10**(-y) - 1e-10) / 1e-9

if to_ == "p":
y = -np.log10(y * 1e-9 + 1e-10)
Expand All @@ -31,6 +32,12 @@ def convert_y_unit(y, from_, to_):
return y


def label_transform(y,
binary,
threshold,
convert_to_log,
verbose=True,
order="descending"):
def label_transform(y,
binary,
threshold,
Expand Down Expand Up @@ -67,6 +74,8 @@ def label_transform(y,
else:
raise ValueError(
"Please select order from 'descending or ascending!")
raise ValueError(
"Please select order from 'descending or ascending!")
else:
if (len(np.unique(y)) > 2) and convert_to_log:
if verbose:
Expand Down Expand Up @@ -148,6 +157,10 @@ def label_dist(y, name=None):
median = np.median(y)
mean = np.mean(y)

f, (ax_box,
ax_hist) = plt.subplots(2,
sharex=True,
gridspec_kw={"height_ratios": (0.15, 1)})
f, (ax_box,
ax_hist) = plt.subplots(2,
sharex=True,
Expand All @@ -158,6 +171,8 @@ def label_dist(y, name=None):
else:
sns.boxplot(y, ax=ax_box).set_title("Label Distribution of " +
str(name) + " Dataset")
sns.boxplot(y, ax=ax_box).set_title("Label Distribution of " +
str(name) + " Dataset")
ax_box.axvline(median, color="b", linestyle="--")
ax_box.axvline(mean, color="g", linestyle="--")

Expand Down Expand Up @@ -197,6 +212,8 @@ def NegSample(df, column_names, frac, two_types):
samples = np.random.choice(df_unique, size=(x, 2), replace=True)
neg_set = set([tuple([i[0], i[1]]) for i in samples if i[0] != i[1]
]) - pos_set
neg_set = set([tuple([i[0], i[1]]) for i in samples if i[0] != i[1]
]) - pos_set

while len(neg_set) < x:
sample = np.random.choice(df_unique, 2, replace=False)
Expand Down Expand Up @@ -236,6 +253,11 @@ def NegSample(df, column_names, frac, two_types):
sample_id1 = np.random.choice(df_unique_id1, size=len(df), replace=True)
sample_id2 = np.random.choice(df_unique_id2, size=len(df), replace=True)

neg_set = (set([
tuple([sample_id1[i], sample_id2[i]])
for i in range(len(df))
if sample_id1[i] != sample_id2[i]
]) - pos_set)
neg_set = (set([
tuple([sample_id1[i], sample_id2[i]])
for i in range(len(df))
Expand Down

0 comments on commit 8088dba

Please sign in to comment.