diff --git a/adaa.analytics.rules/build.gradle b/adaa.analytics.rules/build.gradle index c42ed1a9..e33f0077 100644 --- a/adaa.analytics.rules/build.gradle +++ b/adaa.analytics.rules/build.gradle @@ -5,7 +5,7 @@ plugins { id 'java' } -version = '2.1.17' +version = '2.1.18' java { sourceCompatibility = JavaVersion.VERSION_1_8 } diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/condition/CompoundCondition.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/condition/CompoundCondition.java index 0b25d48a..03a04f02 100644 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/condition/CompoundCondition.java +++ b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/condition/CompoundCondition.java @@ -108,8 +108,9 @@ protected void internalEvaluate(IExampleSet set, Set outIndices) { */ public String toString() { String s = ""; - String op = operator == LogicalOperator.ALTERNATIVE ? " OR " : " AND "; - + String op = operator == LogicalOperator.ALTERNATIVE ? " OR " : " AND "; + + /* Map shortened = new HashMap(); Set unshortened = new LinkedHashSet<>(); @@ -137,8 +138,37 @@ public String toString() { // add shortened conditions for (ConditionBase cnd : shortened.values()) { s += cnd.toString() + op; + }*/ + + List outConditions = new ArrayList<>(); + Map attr2position = new HashMap<>(); + + for (ConditionBase cnd : subconditions) { + if (cnd instanceof ElementaryCondition && cnd.isPrunable()) { + ElementaryCondition ec = (ElementaryCondition)cnd; + String attr = ec.getAttribute(); + + if (attr2position.containsKey(attr)) { + // if condition built upon current attribute exists - replace with intersection + int pos = attr2position.get(attr); + ElementaryCondition parent = (ElementaryCondition) outConditions.get(pos); + outConditions.set(pos, parent.intersect(ec)); + } else { + // otherwise - add condition and save positon of the attribute + attr2position.put(attr, outConditions.size()); + outConditions.add(ec); + } + } else { + // if not elementary condition - add it as it is + outConditions.add(cnd); + } } - + + for (ConditionBase cnd : outConditions) { + s += cnd.toString() + op; + } + + s = s.substring(0, Math.max(0, s.length() - op.length())); if (type == Type.FORCED) { diff --git a/adaa.analytics.rules/test/resources/reports/ClassificationExpertSnCTest/test_seismic-bumps.guided-c2, gimpuls_750 extended.seismic-bumps-train-minimal.txt b/adaa.analytics.rules/test/resources/reports/ClassificationExpertSnCTest/test_seismic-bumps.guided-c2, gimpuls_750 extended.seismic-bumps-train-minimal.txt index 61614d09..46e79421 100644 --- a/adaa.analytics.rules/test/resources/reports/ClassificationExpertSnCTest/test_seismic-bumps.guided-c2, gimpuls_750 extended.seismic-bumps-train-minimal.txt +++ b/adaa.analytics.rules/test/resources/reports/ClassificationExpertSnCTest/test_seismic-bumps.guided-c2, gimpuls_750 extended.seismic-bumps-train-minimal.txt @@ -2,5 +2,5 @@ Rules IF [gimpuls = (-inf, 1350.50)] THEN class = {0} - IF goenergy = <-32.50, inf) AND gimpuls = (-inf, 2784) AND goimpuls = <-39, 111) AND nbumps = (-inf, 5.50) THEN class = {0} - IF [gimpuls = <408.50, inf)] AND ghazard = {a} AND goenergy = (-inf, 94.50) AND maxenergy = (-inf, 6500) AND genergy = <35165, 294530) AND senergy = (-inf, 8750) AND nbumps3 = (-inf, 2.50) THEN class = {1} + IF gimpuls = (-inf, 2784) AND nbumps = (-inf, 5.50) AND goimpuls = <-39, 111) AND goenergy = <-32.50, inf) THEN class = {0} + IF [gimpuls = <408.50, inf)] AND genergy = <35165, 294530) AND goenergy = (-inf, 94.50) AND ghazard = {a} AND senergy = (-inf, 8750) AND nbumps3 = (-inf, 2.50) AND maxenergy = (-inf, 6500) THEN class = {1} diff --git a/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8, no_pruning.deals-train.txt b/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8, no_pruning.deals-train.txt index e195a532..09bf601e 100644 --- a/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8, no_pruning.deals-train.txt +++ b/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8, no_pruning.deals-train.txt @@ -11,7 +11,6 @@ Rules IF Gender = {female} AND Age = <27.50, inf) THEN Future Customer = {no} IF Gender = {female} AND Age = <24.50, inf) THEN Future Customer = {no} IF Gender = {female} AND Age = <19.50, inf) THEN Future Customer = {no} - IF Gender = {female} THEN Future Customer = {no} IF Payment Method = {credit card} AND Age = (-inf, 30.50) THEN Future Customer = {yes} IF Gender = {male} AND Payment Method = {credit card} AND Age = (-inf, 32.50) THEN Future Customer = {yes} IF Gender = {male} AND Age = <18.50, 21.50) THEN Future Customer = {yes} diff --git a/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8.deals-train.txt b/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8.deals-train.txt index c064dd61..281d59c8 100644 --- a/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8.deals-train.txt +++ b/adaa.analytics.rules/test/resources/reports/ClassificationSnCTest/test_deals.mincov=8.deals-train.txt @@ -7,7 +7,6 @@ Rules IF Gender = {female} AND Age = <28.50, inf) THEN Future Customer = {no} IF Gender = {female} AND Age = <23.50, inf) THEN Future Customer = {no} IF Gender = {female} AND Age = <17.50, inf) THEN Future Customer = {no} - IF Gender = {female} THEN Future Customer = {no} IF Payment Method = {credit card} AND Age = (-inf, 32.50) THEN Future Customer = {yes} IF Age = (-inf, 34.50) THEN Future Customer = {yes} IF Gender = {male} AND Age = (-inf, 36.50) THEN Future Customer = {yes} diff --git a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r1, PD=0 or PD=1.methane-train-minimal.txt b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r1, PD=0 or PD=1.methane-train-minimal.txt index 087f037c..01addc8e 100644 --- a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r1, PD=0 or PD=1.methane-train-minimal.txt +++ b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r1, PD=0 or PD=1.methane-train-minimal.txt @@ -3,11 +3,11 @@ Rules IF [PD = (-inf, 0.50)] AND MM31 = (-inf, 0.22) THEN MM116_pred = {0.40} IF [PD = (-inf, 0.50)] AND PG072 = <1.75, 1.95) AND BA13 = <1074.50, 1075.50) THEN MM116_pred = {0.50} - IF [PD = (-inf, 0.50)] AND MM116 = (-inf, 0.65) AND MM31 = <0.23, 0.28) THEN MM116_pred = {0.50} + IF [PD = (-inf, 0.50)] AND MM31 = <0.23, 0.28) AND MM116 = (-inf, 0.65) THEN MM116_pred = {0.50} IF [PD = <0.50, inf)] AND MM116 = <1.25, inf) THEN MM116_pred = {1.40} IF [PD = <0.50, inf)] AND MM116 = <1.05, inf) THEN MM116_pred = {1.20} IF [PD = <0.50, inf)] AND MM116 = <0.95, inf) THEN MM116_pred = {1.10} IF MM116 = (-inf, 0.75) AND MM31 = <0.23, inf) THEN MM116_pred = {0.60} - IF MM116 = (-inf, 0.85) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.33) THEN MM116_pred = {0.50} + IF MM31 = (-inf, 0.33) AND MM116 = (-inf, 0.85) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) THEN MM116_pred = {0.50} IF PD = (-inf, 0.50) AND MM116 = <0.75, inf) THEN MM116_pred = {0.90} - IF MM116 = (-inf, 0.95) AND MM31 = <0.32, inf) THEN MM116_pred = {0.80} + IF MM31 = <0.32, inf) AND MM116 = (-inf, 0.95) THEN MM116_pred = {0.80} diff --git a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r2, PD=1 and MM116 le 1.methane-train-minimal.txt b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r2, PD=1 and MM116 le 1.methane-train-minimal.txt index 672490ad..6abcfe66 100644 --- a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r2, PD=1 and MM116 le 1.methane-train-minimal.txt +++ b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r2, PD=1 and MM116 le 1.methane-train-minimal.txt @@ -19,4 +19,3 @@ Rules IF MM116 = (-inf, 0.85) AND DMM116 = (-inf, 0.05) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.36) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} IF MM31 = (-inf, 0.36) THEN MM116_pred = {0.50} IF MM116 = (-inf, 0.95) AND MM31 = <0.32, inf) THEN MM116_pred = {0.80} - IF MM116 = <1.05, inf) AND AS038 = <2.15, inf) AND MM31 = (-inf, 0.67) THEN MM116_pred = {1.20} diff --git a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r4, DMM and MM116 and PD.methane-train-minimal.txt b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r4, DMM and MM116 and PD.methane-train-minimal.txt index f1ca274f..9a646ce7 100644 --- a/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r4, DMM and MM116 and PD.methane-train-minimal.txt +++ b/adaa.analytics.rules/test/resources/reports/RegressionExpertSnCTest/test_methane.guided-r4, DMM and MM116 and PD.methane-train-minimal.txt @@ -19,4 +19,3 @@ Rules IF [MM116 = <0.55, inf)] AND [DMM116 = <-0.05, inf)] AND [PD = (-inf, 0.50)] AND MM31 = (-inf, 0.33) THEN MM116_pred = {0.60} IF [MM116 = <0.55, inf)] AND [DMM116 = <-0.05, inf)] AND [PD = (-inf, 0.50)] AND MM31 = <0.33, inf) THEN MM116_pred = {0.90} IF [MM116 = <0.55, inf)] AND [DMM116 = (-inf, 0.05)] AND [PD = (-inf, 0.50)] AND MM31 = <0.40, 0.56) AND BA13 = (-inf, 1074.50) THEN MM116_pred = {0.80} - IF [MM116 = <0.55, inf)] AND [DMM116 = (-inf, 0.05)] AND AS038 = (-inf, 2.35) AND MM31 = <0.31, 0.51) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.80} diff --git a/adaa.analytics.rules/test/resources/reports/RegressionSnCTest/test_methane.auto.methane-train-minimal.txt b/adaa.analytics.rules/test/resources/reports/RegressionSnCTest/test_methane.auto.methane-train-minimal.txt index fbff3194..a4e27642 100644 --- a/adaa.analytics.rules/test/resources/reports/RegressionSnCTest/test_methane.auto.methane-train-minimal.txt +++ b/adaa.analytics.rules/test/resources/reports/RegressionSnCTest/test_methane.auto.methane-train-minimal.txt @@ -1,17 +1,17 @@ Rules - IF MM116 = (-inf, 0.60) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} + IF MM31 = (-inf, 0.24) AND MM116 = (-inf, 0.60) THEN MM116_pred = {0.40} IF MM31 = <0.24, 0.31) AND PG072 = (-inf, 1.95) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.50} - IF MM116 = (-inf, 0.65) AND MM31 = <0.24, 0.30) AND BA13 = <1073.50, inf) THEN MM116_pred = {0.50} - IF MM116 = <0.55, 0.85) AND DMM116 = <-0.05, inf) AND AS038 = <2.25, inf) AND MM31 = <0.26, inf) AND PG072 = (-inf, 1.95) AND BA13 = <1074.50, inf) THEN MM116_pred = {0.70} - IF PD = (-inf, 0.50) AND MM116 = <0.55, 0.85) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.33) THEN MM116_pred = {0.60} + IF MM31 = <0.24, 0.30) AND BA13 = <1073.50, inf) AND MM116 = (-inf, 0.65) THEN MM116_pred = {0.50} + IF MM116 = <0.55, 0.85) AND DMM116 = <-0.05, inf) AND PG072 = (-inf, 1.95) AND AS038 = <2.25, inf) AND MM31 = <0.26, inf) AND BA13 = <1074.50, inf) THEN MM116_pred = {0.70} + IF MM31 = (-inf, 0.33) AND MM116 = <0.55, 0.85) AND AS038 = (-inf, 2.45) AND DMM116 = <-0.05, inf) AND PD = (-inf, 0.50) THEN MM116_pred = {0.60} IF MM31 = (-inf, 0.33) THEN MM116_pred = {0.50} IF MM116 = (-inf, 0.75) AND MM31 = <0.24, inf) THEN MM116_pred = {0.60} IF MM116 = <1.05, 1.25) AND AS038 = (-inf, 2.45) AND MM31 = <0.39, inf) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {1.20} IF MM116 = <1.05, 1.25) AND MM31 = <0.37, 0.57) AND PG072 = <1.75, inf) AND BA13 = <1069.50, inf) THEN MM116_pred = {1.20} IF MM116 = <0.95, 1.25) THEN MM116_pred = {1.10} IF MM116 = <0.95, 1.35) THEN MM116_pred = {1.10} - IF MM116 = <0.75, 0.85) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) THEN MM116_pred = {0.80} - IF MM116 = (-inf, 0.95) AND MM31 = <0.32, inf) THEN MM116_pred = {0.80} - IF MM116 = <1.05, inf) AND AS038 = <2.15, inf) AND MM31 = (-inf, 0.67) THEN MM116_pred = {1.20} + IF MM116 = <0.75, 0.85) AND AS038 = (-inf, 2.45) AND DMM116 = <-0.05, inf) THEN MM116_pred = {0.80} + IF MM31 = <0.32, inf) AND MM116 = (-inf, 0.95) THEN MM116_pred = {0.80} + IF AS038 = <2.15, inf) AND MM31 = (-inf, 0.67) AND MM116 = <1.05, inf) THEN MM116_pred = {1.20} diff --git a/adaa.analytics.rules/test/resources/reports/SurvivalLogRankExpertSnCTest/bmt.guided-s1, +CD34 -PLT -ANC.bmt-train-0.txt b/adaa.analytics.rules/test/resources/reports/SurvivalLogRankExpertSnCTest/bmt.guided-s1, +CD34 -PLT -ANC.bmt-train-0.txt index 72720b62..07ad9574 100644 --- a/adaa.analytics.rules/test/resources/reports/SurvivalLogRankExpertSnCTest/bmt.guided-s1, +CD34 -PLT -ANC.bmt-train-0.txt +++ b/adaa.analytics.rules/test/resources/reports/SurvivalLogRankExpertSnCTest/bmt.guided-s1, +CD34 -PLT -ANC.bmt-train-0.txt @@ -1,18 +1,18 @@ Rules - IF [CD34kgx10d6 = (-inf, 11.86)] AND Relapse = {0} AND Donorage35 = {1} AND Recipientage = <17.85, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND RecipientRh = {1} AND Recipientage = <17.85, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Txpostrelapse = {0} AND CD3dCD34 = <6.25, inf) AND Rbodymass = <38.50, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Donorage = <31.95, inf) AND CD3dCD34 = <4.34, 38.68) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Donorage = <33.15, inf) AND CD3dkgx10d8 = (-inf, 4.75) AND CD3dCD34 = <0.94, 51.97) AND Rbodymass = <33.50, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Diseasegroup = {1} AND Relapse = {0} AND Donorage = <27.02, inf) AND Donorage35 = {0} AND CD3dCD34 = (-inf, 14.33) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientage = <17.85, inf) AND Donorage35 = {1} AND Relapse = {0} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientage = <17.85, inf) AND RecipientRh = {1} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Rbodymass = <38.50, inf) AND CD3dCD34 = <6.25, inf) AND Txpostrelapse = {0} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND CD3dCD34 = <4.34, 38.68) AND Donorage = <31.95, inf) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND CD3dkgx10d8 = (-inf, 4.75) AND Rbodymass = <33.50, inf) AND CD3dCD34 = <0.94, 51.97) AND Donorage = <33.15, inf) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Donorage = <27.02, inf) AND Diseasegroup = {1} AND Relapse = {0} AND CD3dCD34 = (-inf, 14.33) AND Donorage35 = {0} THEN IF [CD34kgx10d6 = <11.86, inf)] AND Relapse = {0} THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND CD3dkgx10d8 = (-inf, 4.65) AND Recipientage = <12.60, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Donorage35 = {1} AND Rbodymass = <32.75, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Donorage = <35.70, inf) AND CD3dCD34 = <1.30, inf) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Relapse = {0} AND Stemcellsource = {1} AND time_to_aGvHD_III_IV = <27, inf) AND Rbodymass = (-inf, 54.50) AND Recipientage = (-inf, 14.95) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Stemcellsource = {0} AND Donorage = <23.22, inf) AND extcGvHD = {1} AND Donorage35 = {0} AND Recipientage = <3.25, 8.40) THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientgender = {1} AND ABOmatch = {1} AND Txpostrelapse = {0} AND Recipientage10 = {0} AND CD3dCD34 = (-inf, 4.30) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientage = <12.60, inf) AND CD3dkgx10d8 = (-inf, 4.65) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Rbodymass = <32.75, inf) AND Donorage35 = {1} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND CD3dCD34 = <1.30, inf) AND Donorage = <35.70, inf) THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND time_to_aGvHD_III_IV = <27, inf) AND Recipientage = (-inf, 14.95) AND Relapse = {0} AND Rbodymass = (-inf, 54.50) AND Stemcellsource = {1} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientage = <3.25, 8.40) AND Donorage = <23.22, inf) AND Stemcellsource = {0} AND Donorage35 = {0} AND extcGvHD = {1} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Recipientgender = {1} AND CD3dCD34 = (-inf, 4.30) AND ABOmatch = {1} AND Txpostrelapse = {0} AND Recipientage10 = {0} THEN IF [CD34kgx10d6 = <11.86, inf)] THEN - IF [CD34kgx10d6 = (-inf, 11.86)] AND Relapse = {0} AND Donorage = (-inf, 37.64) AND extcGvHD = {1} AND Txpostrelapse = {0} AND HLAgrI = {0} THEN + IF [CD34kgx10d6 = (-inf, 11.86)] AND Txpostrelapse = {0} AND Donorage = (-inf, 37.64) AND HLAgrI = {0} AND Relapse = {0} AND extcGvHD = {1} THEN diff --git a/adaa.analytics.rules/test/resources/reports/SurvivalLogRankSnCTest/bmt.auto.bmt-train-0.txt b/adaa.analytics.rules/test/resources/reports/SurvivalLogRankSnCTest/bmt.auto.bmt-train-0.txt index d28cfcff..363c2a37 100644 --- a/adaa.analytics.rules/test/resources/reports/SurvivalLogRankSnCTest/bmt.auto.bmt-train-0.txt +++ b/adaa.analytics.rules/test/resources/reports/SurvivalLogRankSnCTest/bmt.auto.bmt-train-0.txt @@ -1,8 +1,8 @@ Rules - IF Relapse = {0} AND Donorage = (-inf, 45.53) AND Recipientage = (-inf, 17.45) THEN + IF Donorage = (-inf, 45.53) AND Relapse = {0} AND Recipientage = (-inf, 17.45) THEN IF HLAmismatch = {0} AND Relapse = {1} THEN - IF Relapse = {0} AND Rbodymass = (-inf, 69) AND Recipientage = (-inf, 18) THEN - IF aGvHDIIIIV = {1} AND ANCrecovery = (-inf, 19.50) AND Stemcellsource = {1} AND Txpostrelapse = {0} THEN + IF Rbodymass = (-inf, 69) AND Relapse = {0} AND Recipientage = (-inf, 18) THEN + IF ANCrecovery = (-inf, 19.50) AND Txpostrelapse = {0} AND Stemcellsource = {1} AND aGvHDIIIIV = {1} THEN IF Donorage = <28.03, inf) AND CD34kgx10d6 = <1.27, 6.72) AND CD3dCD34 = <0.89, inf) AND Rbodymass = <31.50, inf) AND Recipientage = <11.55, inf) THEN