-
Notifications
You must be signed in to change notification settings - Fork 57
/
main.py
107 lines (79 loc) · 3.77 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved
Author: Dejiao Zhang ([email protected])
Date: 02/26/2021
"""
import os
import sys
sys.path.append( './' )
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import torch
import argparse
from models.Transformers import SCCLBert
import dataloader.dataloader as dataloader
from training import SCCLvTrainer
from utils.kmeans import get_kmeans_centers
from utils.logger import setup_path, set_global_random_seed
from utils.optimizer import get_optimizer, get_bert
import numpy as np
def run(args):
args.resPath, args.tensorboard = setup_path(args)
set_global_random_seed(args.seed)
# dataset loader
train_loader = dataloader.explict_augmentation_loader(args) if args.augtype == "explicit" else dataloader.virtual_augmentation_loader(args)
# model
torch.cuda.set_device(args.gpuid[0])
bert, tokenizer = get_bert(args)
# initialize cluster centers
cluster_centers = get_kmeans_centers(bert, tokenizer, train_loader, args.num_classes, args.max_length)
model = SCCLBert(bert, tokenizer, cluster_centers=cluster_centers, alpha=args.alpha)
model = model.cuda()
# optimizer
optimizer = get_optimizer(model, args)
trainer = SCCLvTrainer(model, tokenizer, optimizer, train_loader, args)
trainer.train()
return None
def get_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--train_instance', type=str, default='local')
parser.add_argument('--gpuid', nargs="+", type=int, default=[0], help="The list of gpuid, ex:--gpuid 3 1. Negative value means cpu-only")
parser.add_argument('--seed', type=int, default=0, help="")
parser.add_argument('--print_freq', type=float, default=100, help="")
parser.add_argument('--resdir', type=str, default='./results/')
parser.add_argument('--s3_resdir', type=str, default='./results')
parser.add_argument('--bert', type=str, default='distilroberta', help="")
parser.add_argument('--use_pretrain', type=str, default='BERT', choices=["BERT", "SBERT", "PAIRSUPCON"])
# Dataset
parser.add_argument('--datapath', type=str, default='../datasets/')
parser.add_argument('--dataname', type=str, default='searchsnippets', help="")
parser.add_argument('--num_classes', type=int, default=8, help="")
parser.add_argument('--max_length', type=int, default=32)
parser.add_argument('--label', type=str, default='label')
parser.add_argument('--text', type=str, default='text')
parser.add_argument('--augmentation_1', type=str, default='text1')
parser.add_argument('--augmentation_2', type=str, default='text2')
# Learning parameters
parser.add_argument('--lr', type=float, default=1e-5, help="")
parser.add_argument('--lr_scale', type=int, default=100, help="")
parser.add_argument('--max_iter', type=int, default=1000)
# contrastive learning
parser.add_argument('--objective', type=str, default='contrastive')
parser.add_argument('--augtype', type=str, default='virtual', choices=['virtual', 'explicit'])
parser.add_argument('--batch_size', type=int, default=400)
parser.add_argument('--temperature', type=float, default=0.5, help="temperature required by contrastive loss")
parser.add_argument('--eta', type=float, default=1, help="")
# Clustering
parser.add_argument('--alpha', type=float, default=1.0)
args = parser.parse_args(argv)
args.use_gpu = args.gpuid[0] >= 0
args.resPath = None
args.tensorboard = None
return args
if __name__ == '__main__':
import subprocess
args = get_args(sys.argv[1:])
if args.train_instance == "sagemaker":
run(args)
subprocess.run(["aws", "s3", "cp", "--recursive", args.resdir, args.s3_resdir])
else:
run(args)