-
Notifications
You must be signed in to change notification settings - Fork 0
/
gridsearch_ppgn_simple.py
93 lines (80 loc) · 4.15 KB
/
gridsearch_ppgn_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from subprocess import Popen
from os import mkdir
import os
import subprocess
import yaml
import copy
import numpy as np
import datetime
import itertools
## The script will run a gridsearch on all the chosen parameters so try not to specify multiple values for many different parameterers.
## If you wish to test multiple values for a parameter then simple specify them as a list of multiple elements
# Specify how many noiselevels we should use during training
noise_nums = [64]
# Specify the ppgn model dimensions as a tuple of (hiddenlayers, dimension of each hidden layer)
networksizes = [(8, 128)]
# Specify which models to test on from the following: ["planar_30_200_pkl","planar_60_200_pkl","planar_90_200_pkl","trees_30_200_pkl","trees_60_200_pkl","trees_90_200_pkl","sbm_27_200_pkl","ego_18_small","community_20_small"]
datasets = ["ego_18_small"]
# Choose the batchsize for training
batchsizes = [64]
# Chose model name (default is ppgn for normal ppgn implementation)
modelname = "ppgn"
# Choose random seeds to use
seeds = [1234]
## LEAVE THESE VALUES as these are deprecated and some functionality may produce an error if changed
noisetypes = ["switched"]
weighted_losses = [True]
# This creates new directories for the configs of our runs, for the slurm_job numbers, for the slurm_scripts to run based on the model parameters chosen and based on the timestamp
testdir = f"consec_ppgn_{datetime.datetime.now().day}.{datetime.datetime.now().month}_{datetime.datetime.now().hour}:{datetime.datetime.now().minute}"
os.system(f"mkdir config/gridsearch/{testdir}")
os.system(f"mkdir scripts/gridsearch/{testdir}")
os.system(f"mkdir gridsearch/{testdir}")
with open("config/gridsearch_ppgn_final.yaml") as f:
data_base = yaml.load(f, Loader=yaml.FullLoader)
for (
batchsize,
dataset,
networksize,
noisetype,
weighted_loss,
seed,
) in itertools.product(
batchsizes, datasets, networksizes, noisetypes, weighted_losses, seeds
):
data = copy.copy(data_base)
if "community" in dataset:
data["dataset"]["dataset_size"] = 100
data["num_layers"] = networksize[0]
data["train"]["batch_size"] = batchsize
data["hidden"] = networksize[1]
data["hidden_final"] = networksize[1]
data["dataset"]["max_node_num"] = int(
dataset[dataset.find("_") + 1 : dataset.find("_") + 3]
)
data["dataset"]["name"] = dataset
data["num_levels"] = noise_nums
data["noisetype"] = noisetype
data["weighted_loss"] = weighted_loss
data["model"]["name"] = modelname
data["seed"] = seed
data["model"]["models"]["model_1"]["name"] = modelname
with open(f"config/gridsearch/{testdir}/gridsearch_ppgn_consec_{dataset}_{networksize[0]},{networksize[1]}_{len(noise_nums)}_{batchsize}_{noisetype}_{weighted_loss}_{seed}.yaml","w+") as g:
yaml.dump(data, g)
commandstring = f"python3 ppgn_simple.py -c config/gridsearch/{testdir}/gridsearch_ppgn_consec_{dataset}_{networksize[0]},{networksize[1]}_{len(noise_nums)}_{batchsize}_{noisetype}_{weighted_loss}_{seed}.yaml"
with open("scripts/gridsearch.sh", "r") as firstfile, open(
f"scripts/gridsearch/{testdir}/gridsearch_ppgn_consec_{dataset}_{networksize[0]},{networksize[1]}_{len(noise_nums)}_{batchsize}_{noisetype}_{weighted_loss}_{seed}.sh","a+") as secondfile:
for line in firstfile:
secondfile.write(line)
secondfile.write(f"{commandstring}\n")
secondfile.write("exit 0;")
out = subprocess.check_output(
f"sbatch scripts/gridsearch/{testdir}/gridsearch_ppgn_consec_{dataset}_{networksize[0]},{networksize[1]}_{len(noise_nums)}_{batchsize}_{noisetype}_{weighted_loss}_{seed}.sh",
shell=True,
)
index = str(out).find("\n")
jobnumber = int(out[index - 6 : index])
with open(
f"gridsearch/{testdir}/gridsearch_ppgn_consec_{dataset}_{networksize[0]},{networksize[1]}_{len(noise_nums)}_{batchsize}_{noisetype}_{weighted_loss}_{seed}.txt",
"w+",
) as idfile:
idfile.write(f"{jobnumber}")