Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid assertion error crashes during import #4829

Merged
merged 1 commit into from
Sep 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,85 +54,90 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc
FileReader reader = new FileReader(file);
BufferedReader buf = new BufferedReader(reader);
try {
String line = buf.readLine(); // skip header line
long segId = DaoCopyNumberSegment.getLargestId();
while ((line=buf.readLine()) != null) {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
String[] strs = line.split("\t");
if (strs.length<6) {
System.err.println("wrong format: "+line);
}
CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId);
//TODO - lines below should be removed. Agreed with JJ to remove this as soon as MSK moves to new validation
//procedure. In this new procedure, Patients and Samples should only be added
//via the corresponding ImportClinicalData process. Furthermore, the code below is wrong as it assumes one
//sample per patient, which is not always the case.
String barCode = strs[0];
Sample sample = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudyId,
String line = buf.readLine(); // skip header line
long segId = DaoCopyNumberSegment.getLargestId();
while ((line=buf.readLine()) != null) {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
String[] strs = line.split("\t");
if (strs.length<6) {
System.err.println("wrong format: "+line);
}
CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId);
//TODO - lines below should be removed. Agreed with JJ to remove this as soon as MSK moves to new validation
//procedure. In this new procedure, Patients and Samples should only be added
//via the corresponding ImportClinicalData process. Furthermore, the code below is wrong as it assumes one
//sample per patient, which is not always the case.
String barCode = strs[0];
Sample sample = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudyId,
StableIdUtil.getSampleId(barCode));
if (sample == null ) {
ImportDataUtil.addPatients(new String[] { barCode }, cancerStudy);
ImportDataUtil.addSamples(new String[] { barCode }, cancerStudy);
ProgressMonitor.logWarning("WARNING: Sample added on the fly because it was missing in clinical data");
}

String sampleId = StableIdUtil.getSampleId(barCode);
String chrom = strs[1].trim();
//validate in same way as GistitReader:
ValidationUtils.validateChromosome(chrom);

long start = Double.valueOf(strs[2]).longValue();
long end = Double.valueOf(strs[3]).longValue();
if (start >= end) {
//workaround to skip with warning, according to https://github.com/cBioPortal/cbioportal/issues/839#issuecomment-203452415
ProgressMonitor.logWarning("Start position of segment is not lower than end position. Skipping this entry.");
entriesSkipped++;
continue;
}
int numProbes = new BigDecimal((strs[4])).intValue();
double segMean = Double.parseDouble(strs[5]);

Sample s = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudyId, sampleId);
if (s == null) {
assert StableIdUtil.isNormal(sampleId);
entriesSkipped++;
continue;
}
CopyNumberSegment cns = new CopyNumberSegment(cancerStudyId, s.getInternalId(), chrom, start, end, numProbes, segMean);
cns.setSegId(++segId);
ImportDataUtil.addPatients(new String[] { barCode }, cancerStudy);
ImportDataUtil.addSamples(new String[] { barCode }, cancerStudy);
ProgressMonitor.logWarning("WARNING: Sample added on the fly because it was missing in clinical data");
}

String sampleId = StableIdUtil.getSampleId(barCode);
String chrom = strs[1].trim();
//validate in same way as GistitReader:
ValidationUtils.validateChromosome(chrom);

long start = Double.valueOf(strs[2]).longValue();
long end = Double.valueOf(strs[3]).longValue();
if (start >= end) {
//workaround to skip with warning, according to https://github.com/cBioPortal/cbioportal/issues/839#issuecomment-203452415
ProgressMonitor.logWarning("Start position of segment is not lower than end position. Skipping this entry.");
entriesSkipped++;
continue;
}
int numProbes = new BigDecimal((strs[4])).intValue();
double segMean = Double.parseDouble(strs[5]);

Sample s = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudyId, sampleId);
if (s == null) {
if (StableIdUtil.isNormal(sampleId)) {
entriesSkipped++;
continue;
}
else {
//this likely will not be reached since samples are added on the fly above if not known to database
throw new RuntimeException("Unknown sample id '" + sampleId + "' found in seg file: " + file.getCanonicalPath());
}
}
CopyNumberSegment cns = new CopyNumberSegment(cancerStudyId, s.getInternalId(), chrom, start, end, numProbes, segMean);
cns.setSegId(++segId);
DaoCopyNumberSegment.addCopyNumberSegment(cns);
}
MySQLbulkLoader.flushAll();
}
MySQLbulkLoader.flushAll();
}
finally {
buf.close();
buf.close();
}
}

public void run() {
try {
String description = "Import 'segment data' files";
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
String description = "Import 'segment data' files";
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));

Properties properties = new Properties();
properties.load(new FileInputStream(descriptorFile));
Properties properties = new Properties();
properties.load(new FileInputStream(descriptorFile));

ProgressMonitor.setCurrentMessage("Reading data from: " + dataFile);

SpringUtil.initDataSource();
CancerStudy cancerStudy = getCancerStudy(properties);
if (segmentDataExistsForCancerStudy(cancerStudy)) {
throw new IllegalArgumentException("Seg data for cancer study " + cancerStudy.getCancerStudyStableId() + " has already been imported: " + dataFile);
}
importCopyNumberSegmentFileMetadata(cancerStudy, properties);
SpringUtil.initDataSource();
CancerStudy cancerStudy = getCancerStudy(properties);
if (segmentDataExistsForCancerStudy(cancerStudy)) {
throw new IllegalArgumentException("Seg data for cancer study " + cancerStudy.getCancerStudyStableId() + " has already been imported: " + dataFile);
}
importCopyNumberSegmentFileMetadata(cancerStudy, properties);
importCopyNumberSegmentFileData(cancerStudy, dataFile);
importFractionGenomeAltered(cancerStudy);
} catch (RuntimeException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,15 @@ public void importData() throws IOException, DaoException {
// can be null in case of 'normal' sample:
// (if data files are run through validator, this condition should be minimal)
if (sample == null) {
assert StableIdUtil.isNormal(barCode);
//if new sample:
if (sampleSet.add(barCode))
samplesSkipped++;
continue;
if (StableIdUtil.isNormal(barCode)) {
//if new sample:
if (sampleSet.add(barCode))
samplesSkipped++;
continue;
}
else {
throw new RuntimeException("Unknown sample id '" + StableIdUtil.getSampleId(barCode) + "' found in MAF file: " + this.mutationFile.getCanonicalPath());
}
}

String validationStatus = record.getValidationStatus();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,13 @@ public void importData() throws IOException, DaoException {
StableIdUtil.getSampleId(barCode));
// can be null in case of 'normal' sample:
if (sample == null) {
assert StableIdUtil.isNormal(barCode);
line = buf.readLine();
continue;
if (StableIdUtil.isNormal(barCode)) {
line = buf.readLine();
continue;
}
else {
throw new RuntimeException("Unknown sample id '" + barCode + "' found in tab-delimited file: " + this.fusionFile.getCanonicalPath());
}
}
// Assume we are dealing with Entrez Gene Ids (this is the best / most stable option)
String geneSymbol = record.getHugoGeneSymbol();
Expand Down
Loading