Skip to content

Commit

Permalink
add RNN models
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 25, 2023
1 parent e293cbf commit c04af16
Show file tree
Hide file tree
Showing 7 changed files with 648 additions and 363,748 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"model_name": "funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo",
"architecture": "BidLSTM_CRF_FEATURES",
"embeddings_name": "glove-840B",
"char_vocab_size": 148,
"case_vocab_size": 8,
"char_embedding_size": 25,
"num_char_lstm_units": 25,
"max_char_length": 30,
"features_vocabulary_size": 12,
"features_indices": [
9,
10,
11,
12,
13,
14
],
"features_embedding_size": 4,
"features_lstm_units": 4,
"max_sequence_length": 500,
"word_embedding_size": 1324,
"num_word_lstm_units": 100,
"case_embedding_size": 5,
"dropout": 0.5,
"recurrent_dropout": 0.5,
"use_crf": true,
"use_chain_crf": false,
"fold_number": 1,
"batch_size": 30,
"transformer_name": null,
"use_ELMo": true,
"features_map_to_index": {
"9": {
"LINEEND": 1,
"LINEIN": 2,
"LINESTART": 3
},
"10": {
"ALLCAP": 13,
"INITCAP": 14,
"NOCAPS": 15
},
"11": {
"ALLDIGIT": 25,
"CONTAINSDIGITS": 26,
"NODIGIT": 27
},
"12": {
"0": 37,
"1": 38
},
"13": {
"0": 49,
"1": 50
},
"14": {
"COMMA": 61,
"DOT": 62,
"ENDBRACKET": 63,
"HYPHEN": 64,
"NOPUNCT": 65,
"OPENBRACKET": 66,
"PUNCT": 67,
"QUOTE": 68
}
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
{
"padding": true,
"return_lengths": true,
"return_word_embeddings": true,
"return_casing": false,
"return_features": true,
"return_chars": true,
"return_bert_embeddings": false,
"vocab_char": {
"<PAD>": 0,
"<UNK>": 1,
"\"": 2,
"#": 3,
"%": 4,
"&": 5,
"'": 6,
"(": 7,
")": 8,
"*": 9,
"+": 10,
",": 11,
"-": 12,
".": 13,
"/": 14,
"0": 15,
"1": 16,
"2": 17,
"3": 18,
"4": 19,
"5": 20,
"6": 21,
"7": 22,
"8": 23,
"9": 24,
":": 25,
";": 26,
"<": 27,
"=": 28,
">": 29,
"@": 30,
"A": 31,
"B": 32,
"C": 33,
"D": 34,
"E": 35,
"F": 36,
"G": 37,
"H": 38,
"I": 39,
"J": 40,
"K": 41,
"L": 42,
"M": 43,
"N": 44,
"O": 45,
"P": 46,
"Q": 47,
"R": 48,
"S": 49,
"T": 50,
"U": 51,
"V": 52,
"W": 53,
"X": 54,
"Y": 55,
"Z": 56,
"[": 57,
"\\": 58,
"]": 59,
"_": 60,
"`": 61,
"a": 62,
"b": 63,
"c": 64,
"d": 65,
"e": 66,
"f": 67,
"g": 68,
"h": 69,
"i": 70,
"j": 71,
"k": 72,
"l": 73,
"m": 74,
"n": 75,
"o": 76,
"p": 77,
"q": 78,
"r": 79,
"s": 80,
"t": 81,
"u": 82,
"v": 83,
"w": 84,
"x": 85,
"y": 86,
"z": 87,
"{": 88,
"\u00a3": 89,
"\u00a8": 90,
"\u00ab": 91,
"\u00ae": 92,
"\u00b0": 93,
"\u00b4": 94,
"\u00b5": 95,
"\u00b8": 96,
"\u00bb": 97,
"\u00c7": 98,
"\u00c9": 99,
"\u00ce": 100,
"\u00d6": 101,
"\u00d8": 102,
"\u00df": 103,
"\u00e0": 104,
"\u00e1": 105,
"\u00e2": 106,
"\u00e3": 107,
"\u00e4": 108,
"\u00e5": 109,
"\u00e7": 110,
"\u00e8": 111,
"\u00e9": 112,
"\u00ea": 113,
"\u00eb": 114,
"\u00ed": 115,
"\u00ee": 116,
"\u00f1": 117,
"\u00f2": 118,
"\u00f3": 119,
"\u00f4": 120,
"\u00f5": 121,
"\u00f6": 122,
"\u00f8": 123,
"\u00fa": 124,
"\u00fc": 125,
"\u0101": 126,
"\u0105": 127,
"\u0107": 128,
"\u011b": 129,
"\u011f": 130,
"\u0131": 131,
"\u0142": 132,
"\u0159": 133,
"\u015f": 134,
"\u016f": 135,
"\u0229": 136,
"\u03b2": 137,
"\u03b3": 138,
"\u0430": 139,
"\u1e4d": 140,
"\u2019": 141,
"\u201c": 142,
"\u201d": 143,
"\u2020": 144,
"\u20ac": 145,
"\u2116": 146,
"\u25a1": 147
},
"vocab_tag": {
"<PAD>": 0,
"B-<affiliation>": 1,
"B-<funderName>": 2,
"B-<grantName>": 3,
"B-<grantNumber>": 4,
"B-<institution>": 5,
"B-<person>": 6,
"B-<programName>": 7,
"B-<projectName>": 8,
"I-<affiliation>": 9,
"I-<funderName>": 10,
"I-<grantName>": 11,
"I-<grantNumber>": 12,
"I-<institution>": 13,
"I-<person>": 14,
"I-<programName>": 15,
"I-<projectName>": 16,
"O": 17
},
"vocab_case": [
"<PAD>",
"numeric",
"allLower",
"allUpper",
"initialUpper",
"other",
"mainly_numeric",
"contains_digit"
],
"max_char_length": 30,
"feature_preprocessor": {
"features_vocabulary_size": 12,
"features_indices": [
9,
10,
11,
12,
13,
14
],
"features_map_to_index": {
"9": {
"LINEEND": 1,
"LINEIN": 2,
"LINESTART": 3
},
"10": {
"ALLCAP": 13,
"INITCAP": 14,
"NOCAPS": 15
},
"11": {
"ALLDIGIT": 25,
"CONTAINSDIGITS": 26,
"NODIGIT": 27
},
"12": {
"0": 37,
"1": 38
},
"13": {
"0": 49,
"1": 50
},
"14": {
"COMMA": 61,
"DOT": 62,
"ENDBRACKET": 63,
"HYPHEN": 64,
"NOPUNCT": 65,
"OPENBRACKET": 66,
"PUNCT": 67,
"QUOTE": 68
}
}
},
"indice_tag": {
"0": "<PAD>",
"1": "B-<affiliation>",
"2": "B-<funderName>",
"3": "B-<grantName>",
"4": "B-<grantNumber>",
"5": "B-<institution>",
"6": "B-<person>",
"7": "B-<programName>",
"8": "B-<projectName>",
"9": "I-<affiliation>",
"10": "I-<funderName>",
"11": "I-<grantName>",
"12": "I-<grantNumber>",
"13": "I-<institution>",
"14": "I-<person>",
"15": "I-<programName>",
"16": "I-<projectName>",
"17": "O"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"model_name": "funding-acknowledgement-BidLSTM_CRF_FEATURES",
"architecture": "BidLSTM_CRF_FEATURES",
"embeddings_name": "glove-840B",
"char_vocab_size": 148,
"case_vocab_size": 8,
"char_embedding_size": 25,
"num_char_lstm_units": 25,
"max_char_length": 30,
"features_vocabulary_size": 12,
"features_indices": [
9,
10,
11,
12,
13,
14
],
"features_embedding_size": 4,
"features_lstm_units": 4,
"max_sequence_length": 800,
"word_embedding_size": 300,
"num_word_lstm_units": 100,
"case_embedding_size": 5,
"dropout": 0.5,
"recurrent_dropout": 0.5,
"use_crf": true,
"use_chain_crf": false,
"fold_number": 1,
"batch_size": 30,
"transformer_name": null,
"use_ELMo": false,
"features_map_to_index": {
"9": {
"LINEEND": 1,
"LINEIN": 2,
"LINESTART": 3
},
"10": {
"ALLCAP": 13,
"INITCAP": 14,
"NOCAPS": 15
},
"11": {
"ALLDIGIT": 25,
"CONTAINSDIGITS": 26,
"NODIGIT": 27
},
"12": {
"0": 37,
"1": 38
},
"13": {
"0": 49,
"1": 50
},
"14": {
"COMMA": 61,
"DOT": 62,
"ENDBRACKET": 63,
"HYPHEN": 64,
"NOPUNCT": 65,
"OPENBRACKET": 66,
"PUNCT": 67,
"QUOTE": 68
}
}
}
Binary file not shown.
Loading

0 comments on commit c04af16

Please sign in to comment.