Merge pull request #241 from madgik/fix_calibration_privacy_number

Fix privacy check for CB and add privacy test
madgik · Mar 27, 2020 · d27c28a · d27c28a
2 parents 4eb348b + 5442702
commit d27c28a
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 30 deletions.
diff --git a/Exareme-Docker/src/mip-algorithms/CALIBRATION_BELT/init/1/local.py b/Exareme-Docker/src/mip-algorithms/CALIBRATION_BELT/init/1/local.py
@@ -10,28 +10,17 @@
 from scipy.special import logit
 
 sys.path.append(
-    path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) + '/utils/')
-sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) +
-                '/CALIBRATION_BELT/')
+        path.dirname(path.dirname(
+                path.dirname(path.dirname(path.abspath(__file__))))) + '/utils/')
+sys.path.append(
+        path.dirname(
+            path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) +
+        '/CALIBRATION_BELT/')
 
-from algorithm_utils import StateData, PrivacyError
+from algorithm_utils import StateData, query_with_privacy
 from cb_lib import CBInit_Loc2Glob_TD
 
 
-# ======================= Remove after testing!! ======================= #
-PRIVACY_MAGIC_NUMBER = 1
-
-def query_with_privacy(fname_db, query):
-    conn = sqlite3.connect(fname_db)
-    cur = conn.cursor()
-    cur.execute(query)
-    schema = [description[0] for description in cur.description]
-    data = cur.fetchall()
-    if len(data) < PRIVACY_MAGIC_NUMBER:
-        raise PrivacyError('Query results in illegal number of datapoints.')
-    return schema, data
-# ====================================================================== #
-
 def cb_local_init(local_in):
     # Unpack local input
     e_vec, o_vec, e_name, o_name, max_deg = local_in
@@ -57,11 +46,13 @@ def main():
     parser = ArgumentParser()
     parser.add_argument('-x', required=True, help='Expected outcomes.')
     parser.add_argument('-y', required=True, help='Observed outcomes.')
-    parser.add_argument('-max_deg', required=True, help='Maximum degree of calibration curve.')
+    parser.add_argument('-max_deg', required=True,
+                        help='Maximum degree of calibration curve.')
     parser.add_argument('-cur_state_pkl', required=True,
                         help='Path to the pickle file holding the current state.')
     parser.add_argument('-input_local_DB', required=True, help='Path to local db.')
-    parser.add_argument('-db_query', required=True, help='Query to be executed on local db.')
+    parser.add_argument('-db_query', required=True,
+                        help='Query to be executed on local db.')
     args, unknown = parser.parse_known_args()
     fname_cur_state = path.abspath(args.cur_state_pkl)
     fname_loc_db = path.abspath(args.input_local_DB)
@@ -83,10 +74,13 @@ def main():
     mask_e = [ei is None for ei in e_vec]
     mask_o = [oi is None for oi in o_vec]
     mask = np.logical_or(mask_e, mask_o)
-    e_vec, o_vec = np.array(e_vec[~mask], dtype=np.float64), np.array(o_vec[~mask], dtype=np.int8)
+    e_vec, o_vec = np.array(e_vec[~mask], dtype=np.float64), np.array(o_vec[~mask],
+                                                                      dtype=np.int8)
     # todo perform privacy check here!
-    assert min(e_vec) >= 0. and max(e_vec) <= 1., "Variable e should take values only in [0, 1]"
-    assert set(o_vec).issubset({0, 1}), "Variable o should only contain values 0 and 1."
+    assert min(e_vec) >= 0. and max(
+            e_vec) <= 1., "Variable e should take values only in [0, 1]"
+    assert set(o_vec).issubset(
+            {0, 1}), "Variable o should only contain values 0 and 1."
 
     local_in = e_vec, o_vec, e_name, o_name, max_deg
     # Run algorithm local step

diff --git a/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_CalibrationBelt.py b/Exareme-Docker/src/mip-algorithms/tests/algorithm_tests/test_CalibrationBelt.py
@@ -2,7 +2,7 @@
 import json
 import requests
 import math
-from lib import vmUrl
+from tests.algorithm_tests.lib import vmUrl
 endpointUrl= vmUrl+'CALIBRATION_BELT'
 
 def get_test_params():

diff --git a/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py b/Exareme-Docker/src/mip-algorithms/tests/exareme_tests/test_privacy.py
@@ -15,7 +15,8 @@
 from tests.algorithm_tests_with_privacy.test_MultipleHistograms import endpointUrl \
     as url_multi_hist
 from tests.algorithm_tests_with_privacy.test_NaiveBayes import url1
-from tests.algorithm_tests_with_privacy.test_NaiveBayes_Training_Standalone import endpointUrl as url_naive_bayes_standalone
+from tests.algorithm_tests_with_privacy.test_NaiveBayes_Training_Standalone import \
+    endpointUrl as url_naive_bayes_standalone
 from tests.algorithm_tests.test_PearsonCorrelation import endpointUrl as url_pearson
 from tests.algorithm_tests_with_privacy.test_ttest_independent import endpointUrl \
     as url_ttest_indep
@@ -24,6 +25,9 @@
 from tests.algorithm_tests_with_privacy.test_ttest_paired import endpointUrl \
     as url_ttest_paired
 
+from tests.algorithm_tests.lib import vmUrl
+url_calibration= vmUrl+'CALIBRATION_BELT'
+
 url_descr_stat += 'DESCRIPTIVE_STATS'
 
 
@@ -215,15 +219,16 @@ def test_NAIVEBAYES_privacy():
 
 def test_NaiveBayesStandalone_Privacy():
     logging.info("---------- TEST : Algorithms for Privacy Error")
-    data = [{"name": "pathology","value":"dementia"},
-            {"name": "dataset","value": "adni_9rows"},
+    data = [{"name": "pathology", "value": "dementia"},
+            {"name": "dataset", "value": "adni_9rows"},
             {"name": "x", "value": "lefthippocampus,righthippocampus"},
             {"name": "y", "value": "alzheimerbroadcategory"},
-            {"name": "alpha","value": "0.1"},
-            { "name": "filter", "value": ""}]
+            {"name": "alpha", "value": "0.1"},
+            {"name": "filter", "value": ""}]
 
     headers = {'Content-type': 'application/json', "Accept": "text/plain"}
-    r = requests.post(url_naive_bayes_standalone, data=json.dumps(data), headers=headers)
+    r = requests.post(url_naive_bayes_standalone, data=json.dumps(data),
+                      headers=headers)
     result = json.loads(r.text)
     check_privacy_result(r.text)
 
@@ -289,5 +294,24 @@ def test_pairedttest_Privacy():
     check_privacy_result(r.text)
 
 
+def test_calibration_Privacy():
+    logging.info("---------- TEST : Algorithms for Privacy Error")
+    data = [{"name": "x", "value": "probGiViTI_2018_Complessiva"},
+            {"name": "y", "value": "hospOutcomeLatest_RIC10"},
+            {"name": "devel", "value": "external"},
+            {"name": "max_deg", "value": "4"},
+            {"name": "confLevels", "value": "0.80, 0.95"},
+            {"name": "thres", "value": "0.95"},
+            {"name": "num_points", "value": "60"},
+            {"name": "dataset", "value": "adni_9rows"},
+            {"name" : "filter",
+             "value": "\n{\n  \"condition\": \"AND\",\n  \"rules\": [\n    {\n      \"id\": \"cb_var1\",\n      \"field\": \"cb_var1\",\n      \"type\": \"integer\",\n      \"input\": \"select\",\n      \"operator\": \"equal\",\n      \"value\": 7\n    }\n  ],\n  \"valid\": true\n}\n"},
+            {"name": "pathology", "value": "dementia"}]
+    headers = {'Content-type': 'application/json', "Accept": "text/plain"}
+    r = requests.post(url_calibration, data=json.dumps(data), headers=headers)
+    result = json.loads(r.text)
+    check_privacy_result(r.text)
+
+
 if __name__ == '__main__':
     unittest.main()