Merge pull request #113 from rs-station/stats-refactor

resolves #107
rs-station · Jun 24, 2023 · 5a1dbf5 · 5a1dbf5
2 parents 11ce0f4 + 4f47e0c
commit 5a1dbf5
Show file tree

Hide file tree

Showing 8 changed files with 129 additions and 87 deletions.
diff --git a/careless/stats/ccanom.py b/careless/stats/ccanom.py
@@ -7,10 +7,10 @@
 import seaborn as sns
 
 
-class ArgumentParser(argparse.ArgumentParser):
+from careless.stats.parser import BaseParser
+class ArgumentParser(BaseParser):
     def __init__(self):
         super().__init__(
-            formatter_class=argparse.RawTextHelpFormatter, 
             description=__doc__
         )
 
@@ -27,24 +27,18 @@ def __init__(self):
             "--method",
             default="spearman",
             choices=["spearman", "pearson"],
-            help=("Method for computing correlation coefficient (spearman or pearson)"),
+            help="Method for computing correlation coefficient (spearman or pearson)",
         )
 
         self.add_argument(
             "-b",
             "--bins",
             default=10,
             type=int,
-            help=("Number of resolution bins to use, the default is 10."),
+            help="Number of resolution bins to use, the default is 10.",
         )
 
-        self.add_argument(
-            "-o",
-            "--output",
-            type=str,
-            default=None,
-            help=("Optionally save CCanom values to this file in csv format."),
-        )
+
 
 
 
@@ -92,7 +86,7 @@ def analyze_ccanom_mtz(mtzpath, bins=10, return_labels=True, method="spearman"):
         return result
 
 
-def run_analysis(args, show=True):
+def run_analysis(args):
     results = []
     labels = None
     for m in args.mtz:
@@ -114,6 +108,8 @@ def run_analysis(args, show=True):
 
     if args.output is not None:
         results.to_csv(args.output)
+    else:
+        print(results.to_string())
 
     sns.lineplot(
         data=results, x="bin", y="CCanom", hue="filename", palette="Dark2"
@@ -123,11 +119,14 @@ def run_analysis(args, show=True):
     plt.xlabel("Resolution ($\mathrm{\AA}$)")
     plt.grid(which='both', axis='both', ls='dashdot')
     plt.tight_layout()
-    if show:
-        print(results.to_string())
+
+    if args.image is not None:
+        plt.savefig(args.image)
+
+    if args.show:
         plt.show()
 
 def main():
     parser = ArgumentParser().parse_args()
-    run_analysis(parser, True)
+    run_analysis(parser)
 
diff --git a/careless/stats/cchalf.py b/careless/stats/cchalf.py
@@ -7,10 +7,10 @@
 import seaborn as sns
 
 
-class ArgumentParser(argparse.ArgumentParser):
+from careless.stats.parser import BaseParser
+class ArgumentParser(BaseParser):
     def __init__(self):
         super().__init__(
-            formatter_class=argparse.RawTextHelpFormatter, 
             description=__doc__
         )
 
@@ -37,15 +37,6 @@ def __init__(self):
             help=("Number of resolution bins to use, the default is 10."),
         )
 
-        self.add_argument(
-            "-o",
-            "--output",
-            type=str,
-            default=None,
-            help=("Optionally save CChalf values to this file in csv format."),
-        )
-
-
 def make_halves_cchalf(mtz, bins=10):
     """Construct half-datasets for computing CChalf"""
 
@@ -87,7 +78,7 @@ def analyze_cchalf_mtz(mtzpath, bins=10, return_labels=True, method="spearman"):
         return result
 
 
-def run_analysis(args, show=True):
+def run_analysis(args):
     results = []
     labels = None
     for m in args.mtz:
@@ -110,6 +101,8 @@ def run_analysis(args, show=True):
 
     if args.output is not None:
         results.to_csv(args.output)
+    else:
+        print(results.to_string())
 
     sns.lineplot(
         data=results, x="bin", y="CChalf", hue="filename", palette="viridis"
@@ -119,12 +112,15 @@ def run_analysis(args, show=True):
     plt.xlabel("Resolution ($\mathrm{\AA}$)")
     plt.grid(which='both', axis='both', ls='dashdot')
     plt.tight_layout()
-    if show:
-        print(results.to_string())
+
+    if args.image is not None:
+        plt.savefig(args.image)
+
+    if args.show:
         plt.show()
 
 
 def main():
     parser = ArgumentParser().parse_args()
-    run_analysis(parser, True)
+    run_analysis(parser)
 
diff --git a/careless/stats/ccpred.py b/careless/stats/ccpred.py
@@ -11,10 +11,10 @@
 import seaborn as sns
 
 
-class ArgumentParser(argparse.ArgumentParser):
+from careless.stats.parser import BaseParser
+class ArgumentParser(BaseParser):
     def __init__(self):
         super().__init__(
-            formatter_class=argparse.RawTextHelpFormatter, 
             description=__doc__
         )
 
@@ -48,20 +48,6 @@ def __init__(self):
             help="Pool all prediction mtz files into a single calculation rather than treating each file individually.",
         )
 
-        self.add_argument(
-            "-o",
-            "--output",
-            type=str,
-            default=None,
-            help="Optionally save CCpred values to this file in csv format.",
-        )
-
-        self.add_argument(
-            "--plot",
-            action="store_true",
-            help="Make a plot of the results with seaborn and display it using matplotlib.",
-        )
-
 
 def compute_ccpred(
     dataset, overall=False, bins=10, return_labels=True, method="spearman"
@@ -121,6 +107,8 @@ def run_analysis(args):
 
     if args.output is not None:
         results.to_csv(args.output)
+    else:
+        print(results.to_string())
 
     plot_kwargs = {
         'data' : results,
@@ -143,8 +131,11 @@ def run_analysis(args):
     plt.xlabel("Resolution ($\mathrm{\AA}$)")
     plt.grid(which='both', axis='both', ls='dashdot')
     plt.tight_layout()
-    print(results.to_string())
-    if args.plot:
+
+    if args.image is not None:
+        plt.savefig(args.image)
+
+    if args.show:
         plt.show()
 
 def main():

diff --git a/careless/stats/completeness.py b/careless/stats/completeness.py
@@ -7,11 +7,10 @@
 import seaborn as sns
 import numpy as np
 
-
-class ArgumentParser(argparse.ArgumentParser):
+from careless.stats.parser import BaseParser
+class ArgumentParser(BaseParser):
     def __init__(self):
         super().__init__(
-            formatter_class=argparse.RawTextHelpFormatter, 
             description=__doc__
         )
 
@@ -29,20 +28,14 @@ def __init__(self):
             help=("Number of resolution bins to use, the default is 10."),
         )
 
-        self.add_argument(
-            "-o",
-            "--output",
-            type=str,
-            default=None,
-            help=("Optionally save completeness values to this file in csv format."),
-        )
-
-def run_analysis(args, show=True):
+def run_analysis(args):
     ds = rs.read_mtz(args.mtz)
     results = rs.stats.compute_completeness(ds, bins=args.bins)
 
     if args.output is not None:
         results.to_csv(args.output)
+    else:
+        print(results.to_string())
 
     #Move overall to the beginning
     results = results.iloc[np.roll(np.arange(len(results)), 1)]
@@ -60,12 +53,14 @@ def run_analysis(args, show=True):
     plt.xlabel("Resolution ($\mathrm{\AA}$)")
     plt.grid(which='both', axis='both', ls='dashdot')
     plt.tight_layout()
-    if show:
-        print(results.to_string())
-        plt.show()
 
+    if args.image is not None:
+        plt.savefig(args.image)
+
+    if args.show:
+        plt.show()
 
 def main():
     parser = ArgumentParser().parse_args()
-    run_analysis(parser, True)
+    run_analysis(parser)
 
diff --git a/careless/stats/parser.py b/careless/stats/parser.py
@@ -0,0 +1,48 @@
+"""
+Compute CCpred from careless output.
+
+"""
+import argparse
+import numpy as np
+import reciprocalspaceship as rs
+import gemmi
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+class BaseParser(argparse.ArgumentParser):
+    def __init__(self, **kwargs):
+        super().__init__(
+            formatter_class=argparse.RawTextHelpFormatter, 
+            **kwargs
+        )
+
+        self.add_argument(
+            "-s",
+            "--show",
+            action="store_true",
+            help="Make a plot of the results and display it using matplotlib.",
+        )
+
+        self.add_argument(
+            "-i",
+            "--image",
+            type=str,
+            default=None,
+            help="Make a plot of the results and save it to this filename. "
+                 "The filetype will be determined from the filename. "
+                 "Any filetype supported by your matplotlib version will be available.",
+        )
+
+        self.add_argument(
+            "-o",
+            "--output",
+            type=str,
+            default=None,
+            help="Optionally save results to this file in csv format instead of printing "
+                 "them to the terminal.",
+        )
+
+
+
diff --git a/careless/stats/rsplit.py b/careless/stats/rsplit.py
@@ -9,10 +9,10 @@
 import numpy as np
 
 
-class ArgumentParser(argparse.ArgumentParser):
+from careless.stats.parser import BaseParser
+class ArgumentParser(BaseParser):
     def __init__(self):
         super().__init__(
-            formatter_class=argparse.RawTextHelpFormatter, 
             description=__doc__
         )
 
@@ -37,14 +37,6 @@ def __init__(self):
             help=("Optionally use intensities instead of structure factors to facilitate comparisons with other softwares."),
         )
 
-        self.add_argument(
-            "-o",
-            "--output",
-            type=str,
-            default=None,
-            help=("Optionally save Rsplit values to this file in csv format."),
-        )
-
 
 def make_halves_cchalf(mtz, bins=10):
     """Construct half-datasets for computing Rsplit"""
@@ -98,7 +90,7 @@ def analyze_cchalf_mtz(mtzpath, bins=10, return_labels=True, keys=("F1", "F2")):
         return result
 
 
-def run_analysis(args, show=True):
+def run_analysis(args):
     results = []
     labels = None
 
@@ -127,6 +119,8 @@ def run_analysis(args, show=True):
 
     if args.output is not None:
         results.to_csv(args.output)
+    else:
+        print(results.to_string())
 
     sns.lineplot(
         data=results, x="bin", y="Rsplit", hue="filename", palette="Dark2"
@@ -136,12 +130,15 @@ def run_analysis(args, show=True):
     plt.xlabel("Resolution ($\mathrm{\AA}$)")
     plt.grid(which='both', axis='both', ls='dashdot')
     plt.tight_layout()
-    if show:
-        print(results.to_string())
+
+    if args.image is not None:
+        plt.savefig(args.image)
+
+    if args.show:
         plt.show()
 
 
 def main():
     parser = ArgumentParser().parse_args()
-    run_analysis(parser, True)
+    run_analysis(parser)