Skip to content

Commit

Permalink
Merge pull request #241 from madgik/fix_calibration_privacy_number
Browse files Browse the repository at this point in the history
Fix privacy check for CB and add privacy test
  • Loading branch information
ThanKarab authored Mar 27, 2020
2 parents 4eb348b + 5442702 commit d27c28a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 30 deletions.
40 changes: 17 additions & 23 deletions Exareme-Docker/src/mip-algorithms/CALIBRATION_BELT/init/1/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,17 @@
from scipy.special import logit

sys.path.append(
path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) + '/utils/')
sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) +
'/CALIBRATION_BELT/')
path.dirname(path.dirname(
path.dirname(path.dirname(path.abspath(__file__))))) + '/utils/')
sys.path.append(
path.dirname(
path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) +
'/CALIBRATION_BELT/')

from algorithm_utils import StateData, PrivacyError
from algorithm_utils import StateData, query_with_privacy
from cb_lib import CBInit_Loc2Glob_TD


# ======================= Remove after testing!! ======================= #
PRIVACY_MAGIC_NUMBER = 1

def query_with_privacy(fname_db, query):
conn = sqlite3.connect(fname_db)
cur = conn.cursor()
cur.execute(query)
schema = [description[0] for description in cur.description]
data = cur.fetchall()
if len(data) < PRIVACY_MAGIC_NUMBER:
raise PrivacyError('Query results in illegal number of datapoints.')
return schema, data
# ====================================================================== #

def cb_local_init(local_in):
# Unpack local input
e_vec, o_vec, e_name, o_name, max_deg = local_in
Expand All @@ -57,11 +46,13 @@ def main():
parser = ArgumentParser()
parser.add_argument('-x', required=True, help='Expected outcomes.')
parser.add_argument('-y', required=True, help='Observed outcomes.')
parser.add_argument('-max_deg', required=True, help='Maximum degree of calibration curve.')
parser.add_argument('-max_deg', required=True,
help='Maximum degree of calibration curve.')
parser.add_argument('-cur_state_pkl', required=True,
help='Path to the pickle file holding the current state.')
parser.add_argument('-input_local_DB', required=True, help='Path to local db.')
parser.add_argument('-db_query', required=True, help='Query to be executed on local db.')
parser.add_argument('-db_query', required=True,
help='Query to be executed on local db.')
args, unknown = parser.parse_known_args()
fname_cur_state = path.abspath(args.cur_state_pkl)
fname_loc_db = path.abspath(args.input_local_DB)
Expand All @@ -83,10 +74,13 @@ def main():
mask_e = [ei is None for ei in e_vec]
mask_o = [oi is None for oi in o_vec]
mask = np.logical_or(mask_e, mask_o)
e_vec, o_vec = np.array(e_vec[~mask], dtype=np.float64), np.array(o_vec[~mask], dtype=np.int8)
e_vec, o_vec = np.array(e_vec[~mask], dtype=np.float64), np.array(o_vec[~mask],
dtype=np.int8)
# todo perform privacy check here!
assert min(e_vec) >= 0. and max(e_vec) <= 1., "Variable e should take values only in [0, 1]"
assert set(o_vec).issubset({0, 1}), "Variable o should only contain values 0 and 1."
assert min(e_vec) >= 0. and max(
e_vec) <= 1., "Variable e should take values only in [0, 1]"
assert set(o_vec).issubset(
{0, 1}), "Variable o should only contain values 0 and 1."

local_in = e_vec, o_vec, e_name, o_name, max_deg
# Run algorithm local step
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import requests
import math
from lib import vmUrl
from tests.algorithm_tests.lib import vmUrl
endpointUrl= vmUrl+'CALIBRATION_BELT'

def get_test_params():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from tests.algorithm_tests_with_privacy.test_MultipleHistograms import endpointUrl \
as url_multi_hist
from tests.algorithm_tests_with_privacy.test_NaiveBayes import url1
from tests.algorithm_tests_with_privacy.test_NaiveBayes_Training_Standalone import endpointUrl as url_naive_bayes_standalone
from tests.algorithm_tests_with_privacy.test_NaiveBayes_Training_Standalone import \
endpointUrl as url_naive_bayes_standalone
from tests.algorithm_tests.test_PearsonCorrelation import endpointUrl as url_pearson
from tests.algorithm_tests_with_privacy.test_ttest_independent import endpointUrl \
as url_ttest_indep
Expand All @@ -24,6 +25,9 @@
from tests.algorithm_tests_with_privacy.test_ttest_paired import endpointUrl \
as url_ttest_paired

from tests.algorithm_tests.lib import vmUrl
url_calibration= vmUrl+'CALIBRATION_BELT'

url_descr_stat += 'DESCRIPTIVE_STATS'


Expand Down Expand Up @@ -215,15 +219,16 @@ def test_NAIVEBAYES_privacy():

def test_NaiveBayesStandalone_Privacy():
logging.info("---------- TEST : Algorithms for Privacy Error")
data = [{"name": "pathology","value":"dementia"},
{"name": "dataset","value": "adni_9rows"},
data = [{"name": "pathology", "value": "dementia"},
{"name": "dataset", "value": "adni_9rows"},
{"name": "x", "value": "lefthippocampus,righthippocampus"},
{"name": "y", "value": "alzheimerbroadcategory"},
{"name": "alpha","value": "0.1"},
{ "name": "filter", "value": ""}]
{"name": "alpha", "value": "0.1"},
{"name": "filter", "value": ""}]

headers = {'Content-type': 'application/json', "Accept": "text/plain"}
r = requests.post(url_naive_bayes_standalone, data=json.dumps(data), headers=headers)
r = requests.post(url_naive_bayes_standalone, data=json.dumps(data),
headers=headers)
result = json.loads(r.text)
check_privacy_result(r.text)

Expand Down Expand Up @@ -289,5 +294,24 @@ def test_pairedttest_Privacy():
check_privacy_result(r.text)


def test_calibration_Privacy():
logging.info("---------- TEST : Algorithms for Privacy Error")
data = [{"name": "x", "value": "probGiViTI_2018_Complessiva"},
{"name": "y", "value": "hospOutcomeLatest_RIC10"},
{"name": "devel", "value": "external"},
{"name": "max_deg", "value": "4"},
{"name": "confLevels", "value": "0.80, 0.95"},
{"name": "thres", "value": "0.95"},
{"name": "num_points", "value": "60"},
{"name": "dataset", "value": "adni_9rows"},
{"name" : "filter",
"value": "\n{\n \"condition\": \"AND\",\n \"rules\": [\n {\n \"id\": \"cb_var1\",\n \"field\": \"cb_var1\",\n \"type\": \"integer\",\n \"input\": \"select\",\n \"operator\": \"equal\",\n \"value\": 7\n }\n ],\n \"valid\": true\n}\n"},
{"name": "pathology", "value": "dementia"}]
headers = {'Content-type': 'application/json', "Accept": "text/plain"}
r = requests.post(url_calibration, data=json.dumps(data), headers=headers)
result = json.loads(r.text)
check_privacy_result(r.text)


if __name__ == '__main__':
unittest.main()

0 comments on commit d27c28a

Please sign in to comment.