-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.d.ts
174 lines (160 loc) · 4.07 KB
/
index.d.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/**
* The part-of-speech of a {@link KoreanToken Korean token}.
*/
export const enum KoreanPos {
// Word leved POS
Noun = "Noun",
Verb = "Verb",
Adjective = "Adjective",
Adverb = "Adverb",
Determiner = "Determiner",
Exclamation = "Exclamation",
Josa = "Josa",
Eomi = "Eomi",
PreEomi = "PreEomi",
Conjunction = "Conjunction",
Modifier = "Modifier",
VerbPrefix = "VerbPrefix",
Suffix = "Suffix",
Unknown = "Unknown",
// Chunk level POS
Korean = "Korean",
Foreign = "Foreign",
Number = "Number",
KoreanParticle = "KoreanParticle",
Alpha = "Alpha",
Punctuation = "Punctuation",
Hashtag = "Hashtag",
ScreenName = "ScreenName",
Email = "Email",
URL = "URL",
CashTag = "CashTag",
// Functional POS
Space = "Space",
Others = "Others",
}
/**
* A token extracted by {@link tokenize}.
*/
export interface KoreanToken<Pos extends KoreanPos = KoreanPos> {
/**
* Korean {@link KoreanPos part-of-speech}.
*/
readonly pos: Pos;
/**
* The text which makes up the token.
*/
readonly text: string;
/**
* The offset from the start of the input string where the token starts.
*/
readonly offset: number;
/**
* The length of the token, equivalent to `text.length`.
*/
readonly length: number;
/**
* The stem of the adjective, adverb, or verb represented by the token.
*/
readonly stem: Pos extends
KoreanPos.Adjective | KoreanPos.Adverb | KoreanPos.Verb
? string | undefined
: undefined;
}
/**
* A sentence extracted by {@link splitSentences}.
*/
export interface KoreanSentence {
/**
* The text which makes up the sentence.
*/
readonly text: string;
/**
* The offset from the start of the input string where the sentence starts.
*/
readonly offset: number;
/**
* The length of the sentence, equivalent to `text.length`.
*/
readonly length: number;
}
/**
* A phrase extracted by {@link extractPhrases}.
*/
export interface KoreanPhrase {
/**
* The {@link KoreanToken tokens} making up the phrase.
*/
readonly tokens: KoreanToken[];
/**
* The {@link KoreanPos part-of-speech} of the phrase in its sentence.
*/
readonly pos: KoreanPos;
/**
* The text which makes up the phrase, equal to the concatenation of all of
* its tokens.
*/
readonly text: string;
/**
* The offset from the start of the input string where the phrase starts.
*/
readonly offset: number;
/**
* The length of the phrase, equivalent to `text.length`.
*/
readonly length: number;
}
/**
* Options given to {@link extractPhrases}.
*/
export interface ExtractPhrasesOptions {
readonly filterSpam?: boolean;
readonly enableHashtags?: boolean;
}
/**
* Initializes the Open Korean Text API.
*
* Calling this function is not necessary as the initialization will be
* performed automatically, but some users may wish to initialize the API
* when they choose to do so.
*/
export function init(): void;
/**
* Normalizes the given text for further processing by e.g.
* {@link tokenize}.
*/
export function normalize(text: string): string;
/**
* Tokenizes the given text into a sequence of {@link KoreanToken tokens},
* which include {@link KoreanPos part-of-speech} information.
*/
export function tokenize(text: string): KoreanToken[];
/**
* Same as {@link tokenize}, but returns the top `n` candidates instead of
* the single best candidate.
*/
export function tokenizeTopN(text: string, n: number): KoreanToken[][];
/**
* Transforms a list of strings back into a string.
*/
export function detokenize(tokens: Iterable<string>): string;
/**
* Splits the given text into a sequence of
* {@link KoreanSentence sentences}.
*/
export function splitSentences(text: string): KoreanSentence[];
/**
* Extracts the {@link KoreanPhrase phrases} in the given text.
*/
export function extractPhrases(
text: string,
options?: ExtractPhrasesOptions,
): KoreanPhrase[];
/**
* Extracts the {@link KoreanPhrase phrases} in the given
* {@link KoreanToken tokens}.
*/
export function extractPhrases(
tokens: Iterable<KoreanToken>,
options?: ExtractPhrasesOptions,
): KoreanPhrase[];