diff --git a/adaa.analytics.rules/build.gradle b/adaa.analytics.rules/build.gradle index acc7b923..14207538 100644 --- a/adaa.analytics.rules/build.gradle +++ b/adaa.analytics.rules/build.gradle @@ -27,7 +27,7 @@ codeQuality { } sourceCompatibility = 1.8 -version = '1.7.5' +version = '1.7.6' jar { diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/RuleParser.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/RuleParser.java index 25bd7698..eb902624 100644 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/RuleParser.java +++ b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/logic/representation/RuleParser.java @@ -16,9 +16,11 @@ import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; +import org.apache.commons.lang.math.NumberUtils; import java.util.ArrayList; import java.util.List; +import java.util.logging.Level; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -39,45 +41,50 @@ public class RuleParser { */ public static Rule parseRule(String s, ExampleSetMetaData meta) { Rule rule = null; - - Pattern pattern = Pattern.compile("IF\\s+(?.+)\\s+THEN(?\\s+.*|\\s*)"); - Matcher matcher = pattern.matcher(s); - - boolean isSurvival = false; + + Pattern pattern = Pattern.compile("IF\\s+(?.+)\\s+THEN(?\\s+.*|\\s*)"); + Matcher matcher = pattern.matcher(s); + + boolean isSurvival = false; if (meta.getAttributeByRole(SurvivalRule.SURVIVAL_TIME_ROLE) != null) { isSurvival = true; } - - if (matcher.find()) { - String pre = matcher.group("premise"); - String con = matcher.group("consequence"); - - ElementaryCondition consequence; - CompoundCondition premise = parseCompoundCondition(pre, meta); - - if (con == null || con.trim().length() == 0) { - if (!meta.getLabelMetaData().isNumerical()) - throw new IllegalArgumentException("Empty conclusion for non-numeric label attribute"); - consequence = new ElementaryCondition(); - consequence.attribute = meta.getLabelMetaData().getName(); - consequence.valueSet = new SingletonSet(NaN, null); - consequence.adjustable = false; - consequence.disabled = false; + + if (matcher.find()) { + String pre = matcher.group("premise"); + String con = matcher.group("consequence"); + + ElementaryCondition consequence = null; + CompoundCondition premise = parseCompoundCondition(pre, meta); + + if (con == null || con.trim().length() == 0) { + if (!meta.getLabelMetaData().isNumerical()) { + Logger.log("Empty conclusion for nominal label"+ "\n", Level.WARNING); + } else { + consequence = new ElementaryCondition(); + consequence.attribute = meta.getLabelMetaData().getName(); + consequence.valueSet = new SingletonSet(NaN, null); + consequence.adjustable = false; + consequence.disabled = false; + } } else { consequence = parseElementaryCondition(con, meta); } - - if (premise == null || consequence == null) { - return null; - } - - rule = meta.getLabelMetaData().isNominal() - ? new ClassificationRule(premise, consequence) - : (isSurvival + + if (premise != null && consequence != null) { + + rule = meta.getLabelMetaData().isNominal() + ? new ClassificationRule(premise, consequence) + : (isSurvival ? new SurvivalRule(premise, consequence) : new RegressionRule(premise, consequence)); - } - + } + } + + if (rule == null) { + Logger.log("Omitting expert's knowledge entry: " + s + "\n", Level.WARNING); + } + return rule; } @@ -143,6 +150,10 @@ public static ElementaryCondition parseElementaryCondition(String s, ExampleSetM IValueSet valueSet = null; AttributeMetaData attributeMeta = meta.getAttributeByName(attribute); + if (attributeMeta == null) { + Logger.log("Attribute <" + attribute + "> not found"+ "\n", Level.WARNING); + return null; + } ConditionBase.Type type = (numBrackets == 0) ? ConditionBase.Type.NORMAL : ((numBrackets == 1) ? ConditionBase.Type.PREFERRED : ConditionBase.Type.FORCED); @@ -159,7 +170,8 @@ public static ElementaryCondition parseElementaryCondition(String s, ExampleSetM mapping.addAll(attributeMeta.getValueSet()); double v = mapping.indexOf(value); if (v == -1) { - return null; + Logger.log("Invalid value <" + value + "> of the nominal attribute <" + attribute + ">"+ "\n", Level.WARNING); + return null; } valueSet = new SingletonSet(v, mapping); @@ -180,14 +192,36 @@ public static ElementaryCondition parseElementaryCondition(String s, ExampleSetM matcher = regex.matcher(valueString); if (matcher.find()) { + String lo = matcher.group("lo"); String hi = matcher.group("hi"); - - valueSet = new Interval( - lo.equals("-inf") ? Interval.MINUS_INF : Double.parseDouble(lo), - hi.equals("inf")? Interval.INF : Double.parseDouble(hi), - leftClosed, rightClosed); - } + + double numLo = Double.NaN; + double numHi = Double.NaN; + + if (lo.equals("-inf")) { + numLo = Interval.MINUS_INF; + } else if (NumberUtils.isNumber(lo)) { + numLo = Double.parseDouble(lo); + } else { + Logger.log("Invalid lower interval bound: " + lo + "\n" , Level.WARNING); + return null; + } + + if (hi.equals("inf")) { + numHi = Interval.INF; + } else if (NumberUtils.isNumber(hi)) { + numHi = Double.parseDouble(hi); + } else { + Logger.log("Invalid upper interval bound: " + hi + "\n", Level.WARNING ); + return null; + } + + valueSet = new Interval(numLo, numHi, leftClosed, rightClosed); + } else { + Logger.log("Invalid interval: " + valueString, Level.WARNING ); + return null; + } } } @@ -196,8 +230,11 @@ public static ElementaryCondition parseElementaryCondition(String s, ExampleSetM out.setType(type); out.setAdjustable(adjustable); } - } + } else { + Logger.log("Invalid elementary condition: " + s + "\n", Level.WARNING ); + } return out; } + } diff --git a/examples/guider-errors.xml b/examples/guider-errors.xml new file mode 100644 index 00000000..90098070 --- /dev/null +++ b/examples/guider-errors.xml @@ -0,0 +1,172 @@ + + + + + + + + true + + IF [[gimpul = (-inf, 750)]] THEN class = {0} + + + + + + + + + + + true + + + + 1: IF [[gimpuls = Any]] THEN class = {14} + + + + + + + + + true + + + + inf: IF [[seismic = {q}]] THEN class = {0} + + + + + + + + + true + + + + inf: IF [[seismic - {q}]] THEN class = {0} + + + + + + + + + true + + + + inf: IFF [[seismic = {a}]] THEN class = {0} + + + + + + + + + true + + IF [[gimpuls = (-inf 750)]] THEN class = {0} + + + + + + + + + + + true + + IF [[gimpuls = -inf, 750)]] THEN class = {0} + + + + + + + + + + + true + + IF [[gimpuls = (-inf, 750]] THEN class = {0} + + + + + + + + + + + true + + IF [[gimpuls = (-inff, 750)]] THEN class = {0} + + + + + + + + + + + true + + IF [[gimpuls = (-inf, a750)]] THEN class = {0} + + + + + + + + + + + true + + IF [[gimpuls = (-inf, 0.5.4)]] THEN class = {0} + + + + + + + + + + + + + ./results-guider/errors + + + training.txt + + + ../data/seismic-bumps/seismic-bumps.arff + seismic-bumps-full.mdl + + + + + + + + + + + + \ No newline at end of file