microsoft · prvenk · Oct 2, 2024 · Sep 13, 2024 · Sep 14, 2024 · Sep 14, 2024
diff --git a/docs/evaluation-metrics.md b/docs/evaluation-metrics.md
@@ -100,13 +100,13 @@ of elements in the union of the two sets.
 The Levenshtein distance is a measure of similarity between two strings. The Levenshtein distance is calculated as the
 minimum number of insertions, deletions, or substitutions required to transform one string into the other.
 
-### FuzzyWuzzy similarity
+### RapidFuzz similarity
 
 | Configuration Key | Calculation Base     | Possible Values       |
 | ----------------- | -------------------- | --------------------- |
 | `fuzzy`           | `actual`, `expected` | Integer (Fuzzy score) |
 
-This metric is backed by the [FuzzyWuzzy Python package](https://pypi.org/project/fuzzywuzzy/).
+This metric is backed by the [RapidfFuzz Python package](https://github.com/rapidfuzz/RapidFuzz).
 Calculates the fuzzy score between two documents using the levenshtein distance.
 
 ## BERT-based semantic similarity

diff --git a/rag_experiment_accelerator/evaluation/eval.py b/rag_experiment_accelerator/evaluation/eval.py
@@ -61,7 +61,7 @@ def compute_metrics(
             - "hamming": Hamming distance
             - "jaccard": Jaccard similarity
             - "levenshtein": Levenshtein distance
-            - "fuzzy": FuzzyWuzzy similarity
+            - "fuzzy": RapidFuzz similarity
             - "bert_all_MiniLM_L6_v2": BERT-based semantic similarity (MiniLM L6 v2 model)
             - "bert_base_nli_mean_tokens": BERT-based semantic similarity (base model, mean tokens)
             - "bert_large_nli_mean_tokens": BERT-based semantic similarity (large model, mean tokens)

diff --git a/rag_experiment_accelerator/evaluation/plain_metrics.py b/rag_experiment_accelerator/evaluation/plain_metrics.py
@@ -1,153 +1,163 @@
 import evaluate
-import textdistance
-from fuzzywuzzy import fuzz
-
-algorithms = textdistance.algorithms
+from rapidfuzz import fuzz
+from rapidfuzz import distance as d
+from textdistance import algorithms as alg
 
 
 # https://huggingface.co/spaces/evaluate-metric/bleu
-def bleu(predictions, references):
-    bleu = evaluate.load("bleu")
-
-    results = bleu.compute(predictions=predictions, references=references, max_order=2)
-    # multiplying by 100 to maintain consistency with previous implementation
-    return results["bleu"] * 100
-
-
-def fuzzy(doc1, doc2):
+def bleu(predictions: list[str], references: list[str]) -> float:
     """
-    Calculates the fuzzy score between two documents.
+    Computes the BLEU score between a list of candidate translations and a list of reference translations.
 
-    Parameters:
-        doc1 (str): The first document to compare.
-        doc2 (str): The second document to compare.
+    Args:
+        predictions (list): A list of candidate translations.
+        references (list): A list of reference translations.
 
     Returns:
-        int: The fuzzy score between the two documents.
+        float: The BLEU score between the candidate and reference translations.
     """
-    differences = fuzzy_compare_values(doc1, doc2)
+    bleu = evaluate.load("bleu")
 
-    return int(sum(differences) / len(differences))
+    results = bleu.compute(predictions=predictions, references=references, max_order=2)
+    # multiplying by 100 to maintain consistency with previous implementation
+    return results["bleu"] * 100
 
 
-def fuzzy_compare_values(value1, value2) -> list[float]:
+def fuzzy(str1: str, str2: str, match_type: str = "token_set_ratio") -> float:
     """
-    Compares two values using fuzzy string matching and appends the similarity score to a list of differences.
+    Compares two strings using fuzzy string matching and returns a similarity score.
 
     Args:
-        value1 (str): The first value to compare.
-        value2 (str): The second value to compare.
+        str1 (str): The first string to compare.
+        str2 (str): The second string to compare.
+        match_type (str): The type of fuzzy string matching to use. Options include:
+            - 'ratio'
+            - 'token_set_ratio'
+            - 'token_sort_ratio'
+            - 'partial_ratio'
+            - 'partial_token_sort_ratio'
+            - 'partial_token_set_ratio'
+            - 'WRatio'
+            - 'QRatio'
 
     Returns:
         A list of the similarity scores.
+
+    Raises:
+        ValueError: If the match type is not recognized.
     """
-    similarity_score = [fuzz.token_set_ratio(value1, value2)]
+    try:
+        fuzzy_match_fn = getattr(fuzz, match_type)
+    except AttributeError:
+        raise ValueError(f"Match type '{match_type}' is not recognized.")
+
+    similarity_score = fuzzy_match_fn(str1, str2)
     return similarity_score
 
 
-def levenshtein(value1, value2):
+def levenshtein(str1: str, str2: str) -> int:
     """
     Calculates the Levenshtein distance between two strings and returns the normalized similarity score as a percentage.
 
     Args:
-        value1 (str): The first string to compare.
-        value2 (str): The second string to compare.
+        str1 (str): The first string to compare.
+        str2 (str): The second string to compare.
 
     Returns:
         int: The normalized similarity score as a percentage.
     """
-    score = int(algorithms.levenshtein.normalized_similarity(value1, value2) * 100)
+    score = d.Levenshtein.normalized_similarity(str1, str2) * 100
     return score
 
 
-def jaccard(value1, value2):
+def jaccard(str1: str, str2: str) -> int:
     """
     Calculates the Jaccard similarity score between two sets of values.
 
     Args:
-        value1 (set): The first set of values.
-        value2 (set): The second set of values.
+        str1 (set): The first set of values.
+        str2 (set): The second set of values.
 
     Returns:
         int: The Jaccard similarity score between the two sets of values, as a percentage.
     """
-    score = int(algorithms.jaccard.normalized_similarity(value1, value2) * 100)
+    score = int(alg.jaccard.normalized_similarity(str1, str2) * 100)
     return score
 
 
-def hamming(value1, value2):
+def hamming(str1: str, str2: str) -> int:
     """
     Calculates the Hamming similarity score between two values.
 
     Args:
-        value1 (str): The first value to compare.
-        value2 (str): The second value to compare.
+        str1 (str): The first value to compare.
+        str2 (str): The second value to compare.
 
     Returns:
         int: The Hamming similarity score between the two values, as a percentage.
     """
-    score = int(algorithms.hamming.normalized_similarity(value1, value2) * 100)
+    score = int(d.Hamming.normalized_similarity(str1, str2) * 100)
     return score
 
 
-def jaro_winkler(value1, value2):
+def jaro_winkler(str1: str, str2: str) -> int:
     """
     Calculates the Jaro-Winkler similarity score between two strings.
 
     Args:
-        value1 (str): The first string to compare.
-        value2 (str): The second string to compare.
+        str1 (str): The first string to compare.
+        str2 (str): The second string to compare.
 
     Returns:
         int: The Jaro-Winkler similarity score between the two strings, as an integer between 0 and 100.
     """
-    score = int(algorithms.jaro_winkler.normalized_similarity(value1, value2) * 100)
+    score = int(d.JaroWinkler.normalized_similarity(str1, str2) * 100)
     return score
 
 
-def cosine(value1, value2):
+def cosine(str1: str, str2: str) -> int:
     """
     Calculates the cosine similarity (Ochiai coefficient) between two strings
     using token-frequency vectors
 
     https://en.wikipedia.org/wiki/Cosine_similarity.
 
     Args:
-        value1 (list): The first vector.
-        value2 (list): The second vector.
+        str1 (list): The first vector.
+        str2 (list): The second vector.
 
     Returns:
         int: The cosine similarity score between the two vectors, as a percentage.
     """
-    score = int(algorithms.cosine.normalized_similarity(value1, value2) * 100)
+    score = int(alg.cosine.normalized_similarity(str1, str2) * 100)
     return score
 
 
-def lcsseq(value1, value2):
+def lcsseq(str1: str, str2: str) -> int:
     """
     Computes the longest common subsequence (LCS) similarity score between two input strings.
 
     Args:
-        value1 (str): The first input string.
-        value2 (str): The second input string.
+        str1 (str): The first input string.
+        str2 (str): The second input string.
 
     Returns:
         int: The LCS similarity score between the two input strings, as a percentage (0-100).
     """
-    score = int(algorithms.lcsseq.normalized_similarity(value1, value2) * 100)
+    score = int(d.LCSseq.normalized_similarity(str1, str2) * 100)
     return score
 
 
-def lcsstr(value1, value2):
+def lcsstr(str1: str, str2: str) -> int:
     """
     Calculates the longest common substring (LCS) similarity score between two strings.
 
     Args:
-        value1 (str): The first string to compare.
-        value2 (str): The second string to compare.
+        str1 (str): The first string to compare.
+        str2 (str): The second string to compare.
 
     Returns:
         int: The LCS similarity score between the two strings, as a percentage (0-100).
     """
-    score = int(algorithms.lcsstr.normalized_similarity(value1, value2) * 100)
+    score = int(alg.lcsstr.normalized_similarity(str1, str2) * 100)
     return score
diff --git a/rag_experiment_accelerator/evaluation/tests/test_plain_metrics.py b/rag_experiment_accelerator/evaluation/tests/test_plain_metrics.py
@@ -17,7 +17,8 @@ def test_fuzzy():
     value1 = "Room, 2 Double Beds (19th to 25th Floors)"
     value2 = "Two Double Beds - Location Room (19th to 25th Floors)"
 
-    assert fuzzy(value1, value2) == 97
+    assert int(fuzzy(str1=value1, str2=value2)) == 89
+    assert int(fuzzy(str1=value1, str2=value2, match_type="partial_token_set_ratio")) == 100
 
 
 def test_levenshtein():

diff --git a/requirements.txt b/requirements.txt
@@ -12,7 +12,6 @@ beautifulsoup4==4.12.3
 datasets==3.0.0
 docx2txt==0.8
 evaluate==0.4.3
-fuzzywuzzy==0.18.0
 hnswlib==0.8.0
 jsonschema==4.23.0
 kaleido==0.2.1
@@ -29,6 +28,7 @@ pytesseract==0.3.13
 python-dotenv==1.0.1
 PyMuPDF==1.24.10
 PyPDF2~=3.0
+rapidfuzz==3.9.7
 rouge-score==0.1.2
 scikit-learn==1.5.2
 sentence-transformers==3.1.0