neomatrix369 · neomatrix369 · Mar 12, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 11, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -194,6 +194,15 @@ Enabled nightly run of build and test via Github actions
 
 [dde3172](https://github.com/neomatrix369/nlp_profiler/commit/dde31723b7cb1c1105b9df828bf7429094113de4) [@neomatrix369](https://github.com/neomatrix369) _Sun Nov 14 09:12:33 2021 +0000_
 
+---
+### GitHub branch `grammar_check` Grammar_quality_check: language tool replaced with Gingerit
+
+Implemented functionality via PR [#69](https://github.com/neomatrix369/nlp_profiler/pull/69) - details described in the body of the PR.
+
+Replaced language tool with Gingerit for faster calculations
+
+[b5a5dda](https://github.com/neomatrix369/nlp_profiler/pull/69/commits/b5a5ddaad01f07230cf232712671d43dd9db9862) [@bitanb1999](https://github.com/bitanb1999) _Sun March 13 00:31:31 2023 +0000_
+
 ---
 
 Return to [README.md](README.md)
diff --git a/nlp_profiler/high_level_features/grammar_quality_check.py b/nlp_profiler/high_level_features/grammar_quality_check.py
@@ -1,36 +1,39 @@
-import language_tool_python
+# changing the grammar checker from language tool to gingerit for better results
+from gingerit.gingerit import GingerIt
 
-language_tool = language_tool_python.LanguageTool('en-GB')
+parser = GingerIt()
 import pandas as pd
 import math
 
-from nlp_profiler.constants import NOT_APPLICABLE, NaN, DEFAULT_PARALLEL_METHOD, \
-    GRAMMAR_CHECK_SCORE_COL, GRAMMAR_CHECK_COL
+from nlp_profiler.constants import (
+    NOT_APPLICABLE,
+    NaN,
+    DEFAULT_PARALLEL_METHOD,
+    GRAMMAR_CHECK_SCORE_COL,
+    GRAMMAR_CHECK_COL,
+)
 from nlp_profiler.generate_features import generate_features
 
 
-def apply_grammar_check(heading: str,
-                        new_dataframe: pd.DataFrame,
-                        text_column: dict,
-                        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
+def apply_grammar_check(
+    heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD
+):
     grammar_checks_steps = [
         (GRAMMAR_CHECK_SCORE_COL, text_column, grammar_check_score),
         (GRAMMAR_CHECK_COL, GRAMMAR_CHECK_SCORE_COL, grammar_quality),
     ]
-    generate_features(
-        heading, grammar_checks_steps,
-        new_dataframe, parallelisation_method
-    )
+    generate_features(heading, grammar_checks_steps, new_dataframe, parallelisation_method)
 
 
 ### Grammar check: this is a very slow process
 ### take a lot of time per text it analysis
 def grammar_check_score(text: str) -> int:
-    if (not isinstance(text, str)) or (len(text.strip()) == 0):
+    if not (isinstance(text, str) or text.strip()):
         return NaN
-
-    matches = language_tool.check(text)
-    return len(matches)
+    # calling the parser function to parse through the text for errors
+    matches = parser.parse(text)
+    # the corrections is an array of dictionaries containing the position and the word that has been changed
+    return len(matches["corrections"])
 
 
 def grammar_quality(score: int) -> str:
@@ -40,6 +43,6 @@ def grammar_quality(score: int) -> str:
     if score == 1:
         return "1 issue"
     elif score > 1:
-        return f"{int(score)} issues"
+        return f"{score} issues"
 
     return "No issues"
diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ spacy >= 2.3.0,<3.0.0
 pandas < 1.3.0 # pinned to this version as higher versions conflicts with swifter version 1.0.5 and higher
 swifter >= 1.0.3
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.0/en_core_web_sm-2.3.0.tar.gz
-textstat >= 0.7.0
+textstat >= 0.7.0
+gingerit == 0.9.0