Skip to content

Commit

Permalink
Merge pull request #141 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Fix transformers bug
  • Loading branch information
VikParuchuri authored Jun 30, 2024
2 parents 0d7c170 + 7301718 commit f7c6c04
Show file tree
Hide file tree
Showing 11 changed files with 525 additions and 481 deletions.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,20 @@ Surya is named for the [Hindu sun god](https://en.wikipedia.org/wiki/Surya), who
| Scanned Form | [Image](static/images/funsd.png) | [Image](static/images/funsd_text.jpg) | [Image](static/images/funsd_layout.jpg) | [Image](static/images/funsd_reading.jpg) |
| Textbook | [Image](static/images/textbook.jpg) | [Image](static/images/textbook_text.jpg) | [Image](static/images/textbook_layout.jpg) | [Image](static/images/textbook_order.jpg) |

# Hosted API

There is a hosted API for all surya models available [here](https://www.datalab.to/):

- Works with PDF, images, word docs, and powerpoints
- Consistent speed, with no latency spikes
- High reliability and uptime

# Commercial usage

I want surya to be as widely accessible as possible, while still funding my development/training costs. Research and personal usage is always okay, but there are some restrictions on commercial usage.

The weights for the models are licensed `cc-by-nc-sa-4.0`, but I will waive that for any organization under $5M USD in gross revenue in the most recent 12-month period AND under $5M in lifetime VC/angel funding raised. If you want to remove the GPL license requirements (dual-license) and/or use the weights commercially over the revenue limit, check out the options [here](https://www.datalab.to).

# Hosted API

There is a hosted API for all surya models available [here](https://www.datalab.to/). It's currently in beta, and I'm working on optimizing speed.

# Installation

You'll need python 3.9+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine. See [here](https://pytorch.org/get-started/locally/) for more details.
Expand Down
2 changes: 1 addition & 1 deletion ocr_text.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import argparse
import json
from collections import defaultdict
Expand All @@ -13,7 +14,6 @@
from surya.ocr import run_ocr
from surya.postprocessing.text import draw_text_on_image
from surya.settings import settings
import os


def main():
Expand Down
964 changes: 498 additions & 466 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "surya-ocr"
version = "0.4.12"
version = "0.4.14"
description = "OCR, layout, reading order, and line detection in 90+ languages"
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion reading_order.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import argparse
import copy
import json
Expand All @@ -12,7 +13,6 @@
from surya.ordering import batch_ordering
from surya.postprocessing.heatmap import draw_polys_on_image
from surya.settings import settings
import os


def main():
Expand Down
3 changes: 2 additions & 1 deletion surya/model/detection/segformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def forward(
return encoder_outputs

class SegformerForRegressionMask(SegformerForSemanticSegmentation):
def __init__(self, config):
def __init__(self, config, **kwargs):
super().__init__(config)
self.segformer = SegformerModel(config)
self.decode_head = SegformerForMaskDecodeHead(config)
Expand All @@ -446,6 +446,7 @@ def __init__(self, config):
def forward(
self,
pixel_values: torch.FloatTensor,
**kwargs
) -> Union[Tuple, SemanticSegmenterOutput]:

encoder_hidden_states = self.segformer(
Expand Down
3 changes: 2 additions & 1 deletion surya/model/ordering/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ class MBartOrder(MBartForCausalLM):
config_class = MBartOrderConfig
_tied_weights_keys = []

def __init__(self, config):
def __init__(self, config, **kwargs):
config = copy.deepcopy(config)
config.is_decoder = True
config.is_encoder_decoder = False
Expand Down Expand Up @@ -515,6 +515,7 @@ def forward(
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
**kwargs
) -> Union[Tuple, CausalLMOutputWithCrossAttentions]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
Expand Down
6 changes: 3 additions & 3 deletions surya/model/ordering/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class VariableDonutSwinEmbeddings(DonutSwinEmbeddings):
Construct the patch and position embeddings. Optionally, also the mask token.
"""

def __init__(self, config, use_mask_token=False):
def __init__(self, config, use_mask_token=False, **kwargs):
super().__init__(config, use_mask_token)

self.patch_embeddings = DonutSwinPatchEmbeddings(config)
Expand All @@ -37,7 +37,7 @@ def __init__(self, config, use_mask_token=False):
self.dropout = nn.Dropout(config.hidden_dropout_prob)

def forward(
self, pixel_values: Optional[torch.FloatTensor], bool_masked_pos: Optional[torch.BoolTensor] = None
self, pixel_values: Optional[torch.FloatTensor], bool_masked_pos: Optional[torch.BoolTensor] = None, **kwargs
) -> Tuple[torch.Tensor]:

embeddings, output_dimensions = self.patch_embeddings(pixel_values)
Expand Down Expand Up @@ -68,7 +68,7 @@ def forward(

class VariableDonutSwinModel(DonutSwinModel):
config_class = VariableDonutSwinConfig
def __init__(self, config, add_pooling_layer=True, use_mask_token=False):
def __init__(self, config, add_pooling_layer=True, use_mask_token=False, **kwargs):
super().__init__(config)
self.config = config
self.num_layers = len(config.depths)
Expand Down
3 changes: 2 additions & 1 deletion surya/model/recognition/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ class MBartMoE(MBartForCausalLM):
config_class = MBartMoEConfig
_tied_weights_keys = ["lm_head.weight"]

def __init__(self, config):
def __init__(self, config, **kwargs):
config = copy.deepcopy(config)
config.is_decoder = True
config.is_encoder_decoder = False
Expand Down Expand Up @@ -467,6 +467,7 @@ def forward(
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
**kwargs
) -> Union[Tuple, CausalLMOutputWithCrossAttentions]:
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

Expand Down
3 changes: 2 additions & 1 deletion surya/model/recognition/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def forward(

class VariableDonutSwinModel(DonutSwinModel):
config_class = VariableDonutSwinConfig
def __init__(self, config, add_pooling_layer=True, use_mask_token=False):
def __init__(self, config, add_pooling_layer=True, use_mask_token=False, **kwargs):
super().__init__(config)
self.config = config
self.num_layers = len(config.depths)
Expand All @@ -413,6 +413,7 @@ def forward(
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
**kwargs
) -> Union[Tuple, DonutSwinModelOutput]:
r"""
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
Expand Down
6 changes: 5 additions & 1 deletion surya/postprocessing/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def detect_boxes(linemap, text_threshold, low_text):
segmap[labels == k] = 255
x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
try:
niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
except ValueError:
# Overflow when size is too large
niter = 0
sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1

# boundary checks
Expand Down

0 comments on commit f7c6c04

Please sign in to comment.