Skip to content

Commit

Permalink
Fix escaping by using RFC compliant parser (#496)
Browse files Browse the repository at this point in the history
By configuring the CSVReader with an RFC-compliant parser the escaping
is fixed.

- update opencsv dependency to version 5.9
- add test
  • Loading branch information
dr0i committed Oct 18, 2024
1 parent 71c367c commit 6f39fcc
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 6 deletions.
2 changes: 1 addition & 1 deletion metafacture-csv/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ description = 'Modules for processing comma-separated values'

dependencies {
api project(':metafacture-framework')
implementation 'com.opencsv:opencsv:3.10'
implementation 'com.opencsv:opencsv:5.9'
testImplementation "junit:junit:${versions.junit}"
testImplementation "org.mockito:mockito-core:${versions.mockito}"
}
26 changes: 21 additions & 5 deletions metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
* Copyright 2013-2024 Deutsche Nationalbibliothek and hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,10 @@
import org.metafacture.framework.helpers.DefaultObjectPipe;

import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.RFC4180Parser;
import com.opencsv.RFC4180ParserBuilder;
import com.opencsv.exceptions.CsvException;

import java.io.IOException;
import java.io.StringReader;
Expand All @@ -48,6 +52,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
private String[] header = new String[0];
private int count;
private boolean hasHeader;
private RFC4180Parser parser;

/**
* Creates an instance of {@link CsvDecoder} with a given separator.
Expand All @@ -56,6 +61,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
*/
public CsvDecoder(final String separator) {
this.separator = separator.charAt(0);
initializeCsvParser();
}

/**
Expand All @@ -65,13 +71,21 @@ public CsvDecoder(final String separator) {
*/
public CsvDecoder(final char separator) {
this.separator = separator;
initializeCsvParser();
}

/**
* Creates an instance of {@link CsvDecoder}. The default separator is
* {@value #DEFAULT_SEP}.
*/
public CsvDecoder() {
initializeCsvParser();
}

private void initializeCsvParser() {
this.parser = new RFC4180ParserBuilder()
.withSeparator(separator)
.build();
}

@Override
Expand Down Expand Up @@ -105,18 +119,19 @@ else if (parts.length == header.length) {
}
}

private String[] parseCsv(final String string) {
private String[] parseCsv(final String csv) {
String[] parts = new String[0];
try {
final CSVReader reader = new CSVReader(new StringReader(string),
separator);
final CSVReader reader = new CSVReaderBuilder(new StringReader(csv))
.withCSVParser(parser)
.build();
final List<String[]> lines = reader.readAll();
if (lines.size() > 0) {
parts = lines.get(0);
}
reader.close();
}
catch (final IOException e) {
catch (final IOException | CsvException e) {
e.printStackTrace();
}
return parts;
Expand All @@ -139,5 +154,6 @@ public void setHasHeader(final boolean hasHeader) {
*/
public void setSeparator(final String separator) {
this.separator = separator.charAt(0);
initializeCsvParser();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,22 @@ public void testTabSeparated() {
ordered.verify(receiver).endRecord();
}

/**
* In: "a","b\t","c\\t","\","\cd\"
* Out: a, b , c\\t, \, \cd\
*/
@Test
public void issue496_escaping() {
decoder.setHasHeader(false);
decoder.process("\"a\",\"b\t\",\"c\\t\",\"\\\",\"\\cd\\\"");
final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("1");
ordered.verify(receiver).literal("0", "a");
ordered.verify(receiver).literal("1", "b\t");
ordered.verify(receiver).literal("2", "c\\t");
ordered.verify(receiver).literal("3", "\\");
ordered.verify(receiver).literal("4", "\\cd\\");
ordered.verify(receiver).endRecord();
}

}

0 comments on commit 6f39fcc

Please sign in to comment.