-
Notifications
You must be signed in to change notification settings - Fork 1
/
tests.py
40 lines (34 loc) · 1.49 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import codecs
# from reads input file and creates new one. on every line of new file is one sentence
# string source_file, string outputFile
from language_recognizer.langRecognizer import recognize_language, number_of_ngrams
def createTestFile(inputFileName, outputFileName):
inputFile = codecs.open(inputFileName, encoding="utf-8")
outputFile = codecs.open(outputFileName, 'w+', encoding="utf-8") # creates/rewrites output file
str = ""
for line in inputFile:
str += line
str = str.replace('. ', '.') # i dont want to start lines with space
str = str.replace('\r\n', '')
str = str.replace('\n', '')
str = str.replace('"', ' ')
str = str.replace(';', '.')
str = str.replace(' ', ' ')
str = str.replace('\r', '')
output = str.split('.')
for outText in output:
if outText != "":
outputFile.write(outText + '.' + '\r\n')
inputFile.close()
outputFile.close()
def testFile(inputFileName, outputFileName, vectors):
inputFile = codecs.open(inputFileName, encoding="utf-8")
outputFile = codecs.open(outputFileName, 'w+', encoding="utf-8") # creates/rewrites output file
for line in inputFile:
language, probability = recognize_language(line, vectors, number_of_ngrams)
outputFile.write(language + '\r\n')
inputFile.close()
outputFile.close()
# createTestFile("tests_create/cze1.txt", "tests_created/cze1.txt")
if __name__ == "__main__":
testFile("tests_created/cze1.txt", "test_results/cze1.txt")