-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
72 lines (70 loc) · 3.71 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
[TEXT]
text_recognize_custom_config = r'--oem 3 --psm 6'
text_recognize_tesseract_language = fin
ocr_engine = tesseract
text_recognize_tesseract_cmd = C:/Program Files/Tesseract-OCR/tesseract.exe
[DEVELOPMENT]
DEBUG = False
[CLASSIFICATION]
line_classes = {"content":1, "line": 2, "unknown": 3}
region_classes = {"background":0, "paragraph":1, "header": 2, "heading": 3, "footnote": 4,"marginalia": 5,
"page-number": 6, "catch-word": 7, "signature-mark": 8, "TOC-entry": 9, "separator": 10,
"decoration": 11, "figure": 12, "other": 13, "unknown": 14}
#L_UNKNOWN = len(self.line_classes) - 1
#R_UNKNOWN = len(self.region_classes) - 1
#line_classLabels = [x for x in self.line_classes.keys()]
#region_classLabels = [x for x in self.region_classes.keys()]
#reverse_line_class = {v:k for k,v in self.line_classes.items()}
#reverse_region_class = {v:k for k,v in self.region_classes.items()}
merged_line_elements = [["content","content","content" ],
["line","line","line" ]]
RO_line_groups = [["content"], ["line"]]
merged_region_elements = [["paragraph","paragraph","paragraph" ],
["heading", "paragraph", "paragraph"],
["paragraph", "heading", "paragraph"],
["heading", "header", "header" ],
["header", "heading", "header" ],
["footnote", "paragraph", "footnote" ],
["paragraph", "footnote", "footnote" ],
["heading", "paragraph", "paragpraph"],
["paragraph", "heading", "paragpraph"],
["TOC-entry", "TOC-entry", "TOC-entry"],
["TOC-entry", "heading", "TOC-entry"],
["heading", "TOC-entry", "TOC-entry"],
["TOC-entry", "paragraph", "TOC-entry"],
["paragraph", "TOC-entry", "TOC-entry"],
["TOC-entry", "catch-word", "TOC-entry"],
["catch-word", "TOC-entry", "TOC-entry"],
["marginalia", "marginalia", "marginalia"]]
RO_region_groups = [["header"],
["page-number"],
["paragraph","heading","TOC-entry","separator", "index", "decoration"],
["footnote"],
["marginalia" ],
["signature-mark","catch-word"],
["figure"]]
one_page_per_image = False
[LINE]
overlap_threshold=0.1
dist_limit = 0.1
default_x_offset = 10
default_y_offset = 10
default_y_offset_multiplier = 0.9
baseline_sample_size = 200
baseline_offset_multiplier = 0.1
line_level_multiplier = 0.2
line_boundary_margin = 10
min_line_heigth = 40
min_line_width = 20
[REGION DETECTION]
region_config_file_path = COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
region_checkpoint_url = /home/arive/models/model_final_4to8vo_2311.pth
#region_num_classes = len(self.region_classLabels)
device = cpu
min_area = 1000
[LINE DETECTION]
line_config_file_path = COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
line_checkpoint_url = /home/arive/models/model_line_lines_adjusted.pth
#line_num_classes = len(self.line_classLabels)
kraken_model_path = /projappl/project_2005488/kraken-env/lib/python3.10/site-packages/kraken/blla.mlmodel
[TABLE DETECTION]