forked from EleutherAI/gpt-neox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
local_setup.yml
30 lines (25 loc) · 1.15 KB
/
local_setup.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Suggested data paths when using GPT-NeoX locally
{
"data-path": "data/enron/enron_text_document",
# or for weighted datasets:
# "train-data-paths": ["data/enron/enron_text_document", "data/enron/enron_text_document"],
# "test-data-paths": ["data/enron/enron_text_document", "data/enron/enron_text_document"],
# "valid-data-paths": ["data/enron/enron_text_document", "data/enron/enron_text_document"],
# "train-data-weights": [1., 2.],
# "test-data-weights": [2., 1.],
# "valid-data-weights": [0.5, 0.4],
# If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group.
# WARNING: setting this to True will override any user provided weights
# "weight_by_num_documents": false,
# "weighted_sampler_alpha": 0.3,
"vocab-file": "data/gpt2-vocab.json",
"merge-file": "data/gpt2-merges.txt",
"save": "checkpoints",
"load": "checkpoints",
"checkpoint_validation_with_forward_pass": False,
"tensorboard-dir": "tensorboard",
"log-dir": "logs",
"use_wandb": True,
"wandb_host": "https://api.wandb.ai",
"wandb_project": "neox"
}