bihealth · gromdimon · Jul 29, 2024 · Jul 29, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,9 @@
 # Temporary comparison results files
 src/bench/tmp/*.csv
 
+# Temporary lock files from LibreOffice
+*.~lock.*
+
 # API cache
 cache/
 cache/**

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -6,7 +6,7 @@
           "type": "debugpy",
           "request": "launch",
           "module": "src.cli",
-          "args": ["NM_000277.2(PAH):c.707-7A>T", "--genome-release", "grch38"],
+          "args": ["NM_005633.3(SOS1):c.1018C>T", "--genome-release", "grch38"],
           "console": "integratedTerminal"
       },
       {

diff --git a/cohens_kappa_results.csv b/cohens_kappa_results.csv
diff --git a/src/bench/cohens_kappa_results.csv b/src/bench/cohens_kappa_results.csv
@@ -1,16 +1,16 @@
 Criteria,AutoACMG Kappa,Intervar Kappa,Genebe Kappa
 PVS1,0.9762036524626453,0.25605536332179923,0.9753581661891118
-PS1,0.7313899010931806,0.0,0.847877358490566
-PM1,0.5512584407612031,0.3137208194378275,0.9707283866575902
-PM2,-0.5895844576362654,0.5330528030405864,0.7667796610169492
+PS1,0.6980942828485457,0.0,0.9029345372460497
+PM1,0.5804878048780487,0.28900071890726087,0.9422043010752688
+PM2,0.19073470865001807,0.37180423666910145,0.950836072602544
 PM4,1.0,0.0,1.0
-PM5,0.8425989476092427,0.10158501440922196,0.9250707906774123
+PM5,0.80807855389422,0.09442934782608703,0.9633496697208609
 PP2,-0.011521843494959105,-0.06284038542103065,0.9707283866575902
 PP3,0.7973946784922394,0.6239067055393586,1.0
-BA1,0.12165815560228288,0.08231707317073167,0.9488501189532117
-BS1,-0.02033898305084758,-0.07499999999999996,0.9168278529980658
-BS2,-0.02178217821782158,0.08687258687258681,0.974435196195006
-BP1,-0.15495867768595062,0.32321528948847666,1.0
+BA1,0.2680851063829788,0.08231707317073167,0.9488501189532117
+BS1,0.31474103585657365,-0.08403361344537807,0.8870402802101576
+BS2,0.38888888888888884,0.08687258687258681,0.974435196195006
+BP1,-0.12074933822032174,0.32321528948847666,1.0
 BP3,1.0,0.0,1.0
 BP4,0.41028571428571425,0.6082004555808656,1.0
 BP7,0.26637314254265265,0.7831021437578815,1.0
diff --git a/src/bench/comparison_v3.py b/src/bench/comparison_v3.py
@@ -240,11 +240,11 @@ def eval_genebe(resp, expected):
 )
 
 for i, var in enumerate(variants):
-    # Save the stats every 50 variants
-    if i % 50 == 0 and i > 0:
-        print(f"Processed {i} variants")
-        output_path = os.path.join(path_to_root, "src", "bench", "tmp", f"stats_{i}.csv")
-        stats.to_csv(output_path, index=False)
+    # # Save the stats every 50 variants
+    # if i % 50 == 0 and i > 0:
+    #     print(f"Processed {i} variants")
+    #     output_path = os.path.join(path_to_root, "src", "bench", "tmp", f"stats_{i}.csv")
+    #     stats.to_csv(output_path, index=False)
 
     record = {
         "Variant": var[0],

diff --git a/src/bench/results.csv b/src/bench/results.csv
diff --git a/src/bench/results_analysis.ipynb b/src/bench/results_analysis.ipynb
@@ -494,6 +494,39 @@
     "kappa_df.to_csv(\"cohens_kappa_results.csv\", index=False)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41e54faf-0908-49de-a001-f3958ba9015d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_conflicting_criteria(row):\n",
+    "    criteria_to_remove = {\n",
+    "        'PVS1': ['PP3', 'PM4', 'BP4'],\n",
+    "        'BA1': ['BS1', 'BS2'],\n",
+    "        'PM1': ['BP3', 'PP2'],\n",
+    "        'PM2': ['BS1', 'BS2'],\n",
+    "        'PM4': ['PP3']\n",
+    "    }\n",
+    "    \n",
+    "    autoacmg_criteria = row['AutoACMG Criteria'].split(';')\n",
+    "    autoacmg_false_positives = row['AutoACMG False Positives'].split(';')\n",
+    "    \n",
+    "    for key, values in criteria_to_remove.items():\n",
+    "        if key in autoacmg_criteria:\n",
+    "            autoacmg_criteria = [c for c in autoacmg_criteria if c not in values]\n",
+    "            autoacmg_false_positives = [c for c in autoacmg_false_positives if c not in values]\n",
+    "    \n",
+    "    row['AutoACMG Criteria'] = ';'.join(autoacmg_criteria)\n",
+    "    row['AutoACMG False Positives'] = ';'.join(autoacmg_false_positives)\n",
+    "    \n",
+    "    return row\n",
+    "\n",
+    "# Apply the function to the dataframe\n",
+    "df = df.apply(remove_conflicting_criteria, axis=1)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 24,