diff --git a/core/build.gradle b/core/build.gradle index cb825f6771..a5de15b596 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -68,6 +68,7 @@ dependencies { exclude group: "org.yaml" exclude module: "snakeyaml" exclude module: "commons-lang3" + exclude module: "commons-text" } // This was reported here https://github.com/neo4j-contrib/neo4j-apoc-procedures/issues/3048 @@ -120,7 +121,7 @@ dependencies { compileOnly group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.12.214' , withoutJacksons testImplementation group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.12.214' , withoutJacksons - compile group: 'com.opencsv', name: 'opencsv', version: '4.6' + compile group: 'com.opencsv', name: 'opencsv', version: '5.7.1' compile group: 'commons-beanutils', name: 'commons-beanutils', version: '1.9.4' compileOnly group: 'org.ow2.asm', name: 'asm', version: '5.0.2' diff --git a/core/src/main/java/apoc/export/csv/CsvEntityLoader.java b/core/src/main/java/apoc/export/csv/CsvEntityLoader.java index a761594a17..ef45b0cd90 100644 --- a/core/src/main/java/apoc/export/csv/CsvEntityLoader.java +++ b/core/src/main/java/apoc/export/csv/CsvEntityLoader.java @@ -187,61 +187,64 @@ public void loadRelationships( .collect(Collectors.toList()); final Map mapping = getMapping(fields); + final var parser = new CSVParserBuilder().withSeparator(clc.getDelimiter()).build(); - final CSVReader csv = new CSVReader(reader, clc.getDelimiter()); - final String[] loadCsvCompatibleHeader = fields.stream().map(f -> f.getName()).toArray(String[]::new); + try (final var csv = new CSVReaderBuilder(reader).withCSVParser(parser).build()) { - AtomicInteger lineNo = new AtomicInteger(); - BatchTransaction btx = new BatchTransaction(db, clc.getBatchSize(), reporter); - try { - csv.forEach(line -> { - lineNo.getAndIncrement(); + final String[] loadCsvCompatibleHeader = fields.stream().map(f -> f.getName()).toArray(String[]::new); - final EnumSet results = EnumSet.of(Results.map); - final CSVResult result = new CSVResult( - loadCsvCompatibleHeader, line, lineNo.get(), false, mapping, Collections.emptyList(), results - ); + AtomicInteger lineNo = new AtomicInteger(); + BatchTransaction btx = new BatchTransaction(db, clc.getBatchSize(), reporter); + try { + csv.forEach(line -> { + lineNo.getAndIncrement(); - final Object startId = result.map.get(CsvLoaderConstants.START_ID_ATTR); - final Object startInternalId = idMapping.get(startIdField.getIdSpace()).get(startId); - if (startInternalId == null) { - throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + startId + " not found"); - } - final Node source = btx.getTransaction().getNodeById((long) startInternalId); + final EnumSet results = EnumSet.of(Results.map); + final CSVResult result = new CSVResult( + loadCsvCompatibleHeader, line, lineNo.get(), false, mapping, Collections.emptyList(), results + ); - final Object endId = result.map.get(CsvLoaderConstants.END_ID_ATTR); - final Object endInternalId = idMapping.get(endIdField.getIdSpace()).get(endId); - if (endInternalId == null) { - throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + endId + " not found"); - } - final Node target = btx.getTransaction().getNodeById((long) endInternalId); - - final String currentType; - final Object overridingType = result.map.get(CsvLoaderConstants.TYPE_ATTR); - if (overridingType != null && !((String) overridingType).isEmpty()) { - currentType = (String) overridingType; - } else { - currentType = type; - } - final Relationship rel = source.createRelationshipTo(target, RelationshipType.withName(currentType)); + final Object startId = result.map.get(CsvLoaderConstants.START_ID_ATTR); + final Object startInternalId = idMapping.get(startIdField.getIdSpace()).get(startId); + if (startInternalId == null) { + throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + startId + " not found"); + } + final Node source = btx.getTransaction().getNodeById((long) startInternalId); - // add properties - int props = 0; - for (CsvHeaderField field : edgePropertiesFields) { - final String name = field.getName(); - Object value = result.map.get(name); - boolean propertyAdded = CsvPropertyConverter.addPropertyToGraphEntity(rel, field, value, clc); - props += propertyAdded ? 1 : 0; - } - btx.increment(); - reporter.update(0, 1, props); - }); - btx.commit(); - } catch (RuntimeException e) { - btx.rollback(); - throw e; - } finally { - btx.close(); + final Object endId = result.map.get(CsvLoaderConstants.END_ID_ATTR); + final Object endInternalId = idMapping.get(endIdField.getIdSpace()).get(endId); + if (endInternalId == null) { + throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + endId + " not found"); + } + final Node target = btx.getTransaction().getNodeById((long) endInternalId); + + final String currentType; + final Object overridingType = result.map.get(CsvLoaderConstants.TYPE_ATTR); + if (overridingType != null && !((String) overridingType).isEmpty()) { + currentType = (String) overridingType; + } else { + currentType = type; + } + final Relationship rel = source.createRelationshipTo(target, RelationshipType.withName(currentType)); + + // add properties + int props = 0; + for (CsvHeaderField field : edgePropertiesFields) { + final String name = field.getName(); + Object value = result.map.get(name); + boolean propertyAdded = CsvPropertyConverter.addPropertyToGraphEntity(rel, field, value, clc); + props += propertyAdded ? 1 : 0; + } + btx.increment(); + reporter.update(0, 1, props); + }); + btx.commit(); + } catch (RuntimeException e) { + btx.rollback(); + throw e; + } finally { + btx.close(); + } } } } diff --git a/full/build.gradle b/full/build.gradle index 4f52112d55..19714e92ac 100644 --- a/full/build.gradle +++ b/full/build.gradle @@ -84,6 +84,7 @@ dependencies { exclude group: "org.yaml" exclude module: "snakeyaml" exclude module: "commons-lang3" + exclude module: "commons-text" } compile group: 'org.yaml', name: 'snakeyaml', version: '1.32' testCompile group: 'com.github.stefanbirkner', name: 'system-rules', version: '1.19.0' @@ -140,7 +141,7 @@ dependencies { compileOnly group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.12.214' , withoutJacksons testImplementation group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.12.214' , withoutJacksons - compile group: 'com.opencsv', name: 'opencsv', version: '4.6' + compile group: 'com.opencsv', name: 'opencsv', version: '5.7.1' compile group: 'commons-beanutils', name: 'commons-beanutils', version: '1.9.4' compileOnly group: 'org.ow2.asm', name: 'asm', version: '5.0.2' compile group: 'com.github.javafaker', name: 'javafaker', version: '1.0.2' diff --git a/full/src/main/java/apoc/load/LoadCsv.java b/full/src/main/java/apoc/load/LoadCsv.java index 1213c10ec3..f514475b67 100644 --- a/full/src/main/java/apoc/load/LoadCsv.java +++ b/full/src/main/java/apoc/load/LoadCsv.java @@ -8,6 +8,7 @@ import com.opencsv.CSVParserBuilder; import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvValidationException; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.procedure.Context; import org.neo4j.procedure.Description; @@ -51,7 +52,7 @@ public Stream csvParams(@Name("urlOrBinary") Object urlOrBinary, @Nam } reader = FileUtils.readerFor(urlOrBinary, httpHeaders, payload, config.getCompressionAlgo()); return streamCsv(url, config, reader); - } catch (IOException e) { + } catch (IOException | CsvValidationException e) { closeReaderSafely(reader); if(!config.isFailOnError()) return Stream.of(new CSVResult(new String[0], new String[0], 0, true, Collections.emptyMap(), emptyList(), EnumSet.noneOf(Results.class))); @@ -60,7 +61,7 @@ public Stream csvParams(@Name("urlOrBinary") Object urlOrBinary, @Nam } } - public Stream streamCsv(@Name("url") String url, LoadCsvConfig config, CountingReader reader) throws IOException { + public Stream streamCsv(@Name("url") String url, LoadCsvConfig config, CountingReader reader) throws IOException, CsvValidationException { CSVReader csv = new CSVReaderBuilder(reader) .withCSVParser(new CSVParserBuilder() @@ -78,7 +79,7 @@ public Stream streamCsv(@Name("url") String url, LoadCsvConfig config .onClose(() -> closeReaderSafely(reader)); } - private String[] getHeader(CSVReader csv, LoadCsvConfig config) throws IOException { + private String[] getHeader(CSVReader csv, LoadCsvConfig config) throws IOException, CsvValidationException { if (!config.isHasHeader()) return null; String[] headers = csv.readNext(); List ignore = config.getIgnore(); @@ -106,7 +107,7 @@ private static class CSVSpliterator extends Spliterators.AbstractSpliterator mapping, List nullValues, EnumSet results, boolean ignoreErrors) throws IOException { + public CSVSpliterator(CSVReader csv, String[] header, String url, long skip, long limit, boolean ignore, Map mapping, List nullValues, EnumSet results, boolean ignoreErrors) throws IOException, CsvValidationException { super(Long.MAX_VALUE, Spliterator.ORDERED); this.csv = csv; this.header = header; @@ -133,8 +134,10 @@ public boolean tryAdvance(Consumer action) { return true; } return false; - } catch (IOException e) { + } catch (IOException | CsvValidationException e) { throw new RuntimeException("Error reading CSV from " + (url == null ? "binary" : " URL " + cleanUrl(url)) + " at " + lineNo, e); + } catch (ArrayIndexOutOfBoundsException e) { + throw new RuntimeException("Error reading CSV from " + (url == null ? "binary" : " URL " + cleanUrl(url)) + " at " + lineNo + ". Please check whether you included a delimiter before a column separator or forgot a column separator."); } } } diff --git a/full/src/test/java/apoc/load/LoadCsvTest.java b/full/src/test/java/apoc/load/LoadCsvTest.java index 85e864bbe3..8b0627aa68 100644 --- a/full/src/test/java/apoc/load/LoadCsvTest.java +++ b/full/src/test/java/apoc/load/LoadCsvTest.java @@ -182,7 +182,7 @@ static void assertRow(Result r, String name, String age, long lineNo) { public void testLoadCsvEscape() { String url = "test-escape.csv"; final List results = List.of("map", "list", "stringMap", "strings"); - testResult(db, "CALL apoc.load.csv($url, $config)", + testResult(db, "CALL apoc.load.csv($url, $config)", map("url", url, "config", map("results", results)), (r) -> { assertRow(r, 0L,"name", "Naruto", "surname","Uzumaki"); @@ -192,12 +192,47 @@ public void testLoadCsvEscape() { testResult(db, "CALL apoc.load.csv($url,$config)", map("url", url, "config", map("results", results, "escapeChar", "NONE")), (r) -> { - assertRow(r, 0L,"name", "Narut\\o\\", "surname","Uzu\\maki"); + assertRow(r, 0L,"name", "Narut\\o", "surname","Uzu\\maki"); assertRow(r, 1L,"name", "Minat\\o", "surname","Nami\\kaze"); assertFalse(r.hasNext()); }); } + @Test + public void testLoadCsvWithEscapedDelimiters() { + String url = "test-escaped-delimiters.csv"; + final List results = List.of("map", "list", "stringMap", "strings"); + + /* In OpenCSV library 5.1 -> 5.2 they corrected a bug: https://sourceforge.net/p/opencsv/bugs/212/ + Now when we have an escaping character before a delimiter, + the delimiter is ignored + + This means before a line: + one\,two + + with the escaping character '\' and ',' as separator would parse + as two columns. As in first it splits, then escapes. + Now it looks like the new version of the library first escapes + (so the ',' is escaped) + and then splits, so it parses as one column. + */ + var e = assertThrows(RuntimeException.class, () -> testResult(db, "CALL apoc.load.csv($url, $config)", + map("url", url, "config", map("results", results)), + (r) -> { + // Consume the stream so it throws the exception + r.stream().toArray(); + }) + ); + assertTrue(e.getMessage().contains("Please check whether you included a delimiter before a column separator or forgot a column separator.")); + + testResult(db, "CALL apoc.load.csv($url,$config)", + map("url", url, "config", map("results", results, "escapeChar", "NONE")), + (r) -> { + assertRow(r, 0L,"name", "Narut\\o\\", "surname","Uzu\\maki"); + assertFalse(r.hasNext()); + }); + } + @Test public void testLoadCsvNoHeader() throws Exception { String url = "test-no-header.csv"; testResult(db, "CALL apoc.load.csv($url,{header:false,results:['map','list','stringMap','strings']})", map("url",url), // 'file:test.csv' diff --git a/full/src/test/resources/test-escape.csv b/full/src/test/resources/test-escape.csv index 425c1cb179..60b1907ed5 100644 --- a/full/src/test/resources/test-escape.csv +++ b/full/src/test/resources/test-escape.csv @@ -1,3 +1,3 @@ name,surname -Narut\o\,Uzu\maki +Narut\o,Uzu\maki Minat\o,Nami\kaze diff --git a/full/src/test/resources/test-escaped-delimiters.csv b/full/src/test/resources/test-escaped-delimiters.csv new file mode 100644 index 0000000000..366b84a95e --- /dev/null +++ b/full/src/test/resources/test-escaped-delimiters.csv @@ -0,0 +1,2 @@ +name,surname +Narut\o\,Uzu\maki \ No newline at end of file