forked from XME-anonymous/XME
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ft_mend_bloom_mid_layers.txt
86 lines (83 loc) · 1.99 KB
/
ft_mend_bloom_mid_layers.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
[2023-03-26 20:04:44,846][__main__][INFO] -
alg: mend
lr: 1.0e-06
edit_lr: 0.0001
seed: 0
debug: false
model_save_pt: 5000
edit_bs: 1
silent: false
max_iters: 1000000
log_interval: 100
val_interval: 5000
lr_lr: 0.0001
batch_size: 2
val_batch_size: 5
accumulate_bs: 10
cedit: 0.1
cloc: 1.0
cbase: 1.0
val_steps: 500
device: cuda
base_loss: distill
oracle: false
train: true
train_base: false
opt: Adam
single_batch: false
archive: null
grad_clip: 100.0
ref: null
early_stop_patience: 20000
early_stop_key: loss/total_edit_val
dropout: 0.0
tokenizer: null
results_dir: null
no_grad_layers: null
eval_only: false
half: false
save: false
model:
pt: ${hydra:runtime.cwd}data/fever/bloom-finetuned-on-ft/models/bloom-560m.2023-03-26_12-54-45_6262973532.bk
name: bigscience/bloom-560m
class_name: BloomForSequenceClassification
tokenizer_class: BloomTokenizerFast
tokenizer_name: bigscience/bloom-560m
inner_params:
- transformer.h.20.mlp.dense_h_to_4h.weight
- transformer.h.20.mlp.dense_4h_to_h.weight
- transformer.h.21.mlp.dense_h_to_4h.weight
- transformer.h.21.mlp.dense_4h_to_h.weight
- transformer.h.22.mlp.dense_h_to_4h.weight
- transformer.h.22.mlp.dense_4h_to_h.weight
- transformer.h.23.mlp.dense_h_to_4h.weight
- transformer.h.23.mlp.dense_4h_to_h.weight
data:
path: null
rephrase: true
zsre_nq: true
nq_path: ${hydra:runtime.cwd}/data/nq
wiki_webtext: true
n_edits: 1
eval:
verbose: true
log_interval: 100
final_eval: true
mend:
one_sided: false
n_hidden: 1
hidden_dim: null
init: id
norm: true
combine: true
x_only: false
delta_only: false
act: relu
rank: 1920
mlp_class: IDMLP
shared: true
task: fc
dataset: fever
tests: false
[2023-03-26 20:04:44,846][__main__][INFO] - Project base directory: /home/anonymous-xme/mend/mend
[2023-03-26 20:04:44,885][models][INFO] - Loading model class <class 'transformers.models.bloom.modeling_bloom.BloomForSequenceClassification'> with name bigscience/bloom-560m from cache dir /home/anonymous-xme/mend/mend/cache/