Skip to content

Commit

Permalink
Merge pull request #233 from opencb/TASK-2342
Browse files Browse the repository at this point in the history
TASK-2342 - Error at variant-index "SQLException: ERROR 201 (22000): Illegal data"
  • Loading branch information
j-coll authored Dec 20, 2022
2 parents e153533 + f428897 commit 049f37c
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,13 @@ public boolean sameGenomicVariant(Object o) {
if (getAlternate() != null ? !getAlternate().equals(variant.getAlternate()) : variant.getAlternate() != null) {
return false;
}
return getType() == variant.getType();
if (getType() != variant.getType()) {
return false;
}
if (getSv() != null ? !getSv().equals(variant.getSv()) : variant.getSv() != null) {
return false;
}
return true;

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,15 @@ public class VariantBuilder {

protected static Logger logger = LoggerFactory.getLogger(VariantBuilder.class);

private static final String CHROMOSOME_REGGEX = "[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*";
private static final String POSITION_REGGEX = "("
+ "\\p{Digit}+|\\p{Digit}+<\\p{Digit}+<\\p{Digit}+"
private static final String CHROMOSOME_REGEX = "[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*";
private static final String POSITION_REGEX = "("
+ "\\p{Digit}+|(-)?\\p{Digit}+<\\p{Digit}+<\\p{Digit}+"
+ ")";
private static final String REFERENCE_REGGEX = "("
private static final String REFERENCE_REGEX = "("
+ "[ACGTN]+|" // Simple reference
+ "-" // No reference
+ ")";
private static final String ALTERNATE_REGGEX = "("
private static final String ALTERNATE_REGEX = "("
+ "\\.|" // No variation
+ "-|" // No alternate
+ "[ACGTN]+|" // Simple alternate
Expand All @@ -88,13 +88,17 @@ public class VariantBuilder {
+ PARTIAL_INS_SEQ_SEPARATOR + "[ACGT]+|" // Partial long insertion, no left
+ "\\*|" // Span deletion
+ "<[^<>]+>|" // Symbolic alternate
+ "([ACGTN]*|\\.)([\\[\\]])(" + CHROMOSOME_REGGEX + "):(\\p{Digit}+)([\\[\\]])([ACGTN]*|\\.)" // Breakend
+ "([ACGTN]*|\\.)([\\[\\]])(" + CHROMOSOME_REGEX + "):(\\p{Digit}+)([\\[\\]])([ACGTN]*|\\.)" // Breakend
+ ")";
private static final Pattern ALTERNATE_PATTERN = Pattern.compile(ALTERNATE_REGGEX);
protected static final Pattern VARIANT_PATTERN = Pattern.compile("(?<chromosome>" + CHROMOSOME_REGGEX + ")"
+ ":(?<start>" + POSITION_REGGEX + ")(-(?<end>" + POSITION_REGGEX + "))?"
+ "(:(?<reference>" + REFERENCE_REGGEX +")?)?"
+ ":(?<alternate>" + ALTERNATE_REGGEX + "(,"+ALTERNATE_REGGEX+")*)");
private static final String START_END_REGEX = ""
+ "(?<start>" + POSITION_REGEX + ")"
+ "(-(?<end>" + POSITION_REGEX + "))?";
private static final Pattern ALTERNATE_PATTERN = Pattern.compile(ALTERNATE_REGEX);
private static final Pattern START_END_PATTERN = Pattern.compile(START_END_REGEX);
protected static final Pattern VARIANT_PATTERN = Pattern.compile("(?<chromosome>" + CHROMOSOME_REGEX + ")"
+ ":" + START_END_REGEX
+ "(:(?<reference>" + REFERENCE_REGEX +")?)?"
+ ":(?<alternate>" + ALTERNATE_REGEX + "(,"+ALTERNATE_REGEX+")*)");

static {
SV_TYPES = EnumSet.copyOf(Variant.SV_SUBTYPES);
Expand Down Expand Up @@ -186,12 +190,29 @@ public VariantBuilder(String variantString) {
parseAlternate(fields[3]);

// Structural variant (except <INS>) needs start-end coords (<INS> may be missing end)
if (fields[1].contains("-")) {
String[] coordinatesParts = fields[1].split("-");
parseStart(coordinatesParts[0], variantString);
parseEnd(coordinatesParts[1], variantString);
String startEnd = fields[1];
if (startEnd.contains("-")) {
String[] coordinatesParts = startEnd.split("-");
if (coordinatesParts.length == 2 && !coordinatesParts[0].isEmpty()) {
parseStart(coordinatesParts[0], variantString);
parseEnd(coordinatesParts[1], variantString);
} else {
// Weird scenario. Use REGEX to parse start-end
Matcher matcher = START_END_PATTERN.matcher(startEnd);
if (matcher.matches()) {
parseStart(matcher.group("start"), variantString);
// End might not be defined
String end = matcher.group("end");
if (end != null) {
parseEnd(end, variantString);
}
} else {
throw new IllegalArgumentException("Invalid coordinates position '" + startEnd + "' for variant "
+ variantString);
}
}
} else {
parseStart(fields[1], variantString);
parseStart(startEnd, variantString);
}
} else {
regexParse(variantString);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ public void parseVariantTest() {
map.put("1:1000:ACACAC...", new Variant("1", 1000, 999, "", "<INS>").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.INSERTION).setSv(new StructuralVariation(null, null, null, null, null, "ACACAC", "", null, null)));
map.put("1:799984<800001<800022:-:TGTGGTGTGTGTGGTGTG...ACCACACCCACACAACACACA", new Variant("1", 800001, 800000, "", "<INS>").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.INSERTION).setSv(new StructuralVariation(799984, 800022, null, null, null, "TGTGGTGTGTGTGGTGTG", "ACCACACCCACACAACACACA", null, null)));

// Negative CIPOS
map.put("1:-100<100<200-20<150<200:-:<DEL>", new Variant("1", 100, 150, "", "<DEL>")
.setLength(51)
.setType(VariantType.DELETION)
.setSv(new StructuralVariation(-100, 200, 20, 200, null, null, null, null, null)));
map.put("1:-100<100<200--20<150<200:-:<DEL>", new Variant("1", 100, 150, "", "<DEL>")
.setLength(51)
.setType(VariantType.DELETION)
.setSv(new StructuralVariation(-100, 200, -20, 200, null, null, null, null, null)));
map.put("1:-100<100<200:-:<INS>", new Variant("1", 100, 99, "", "<INS>")
.setType(VariantType.INSERTION)
.setSv(new StructuralVariation(-100, 200, null, null, null, null, null, null, null)));

// Breakends
map.put("1:1000:A:A.", new Variant("1", 1000, 999, "A", "A.").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.BREAKEND).setSv(new StructuralVariation(null, null, null, null, null, null, null, null, null)));
map.put("1:800001:A:A[2:321681[", new Variant("1", 800001, 800000, "A", "A[2:321681[").setLength(Variant.UNKNOWN_LENGTH).setType(VariantType.BREAKEND).setSv(new StructuralVariation(null, null, null, null, null, null, null, null, new Breakend(new BreakendMate("2", 321681, null, null), BreakendOrientation.SE, null))));
Expand Down

0 comments on commit 049f37c

Please sign in to comment.