diff --git a/src/main/java/de/gwdg/metadataqa/marc/CsvUtils.java b/src/main/java/de/gwdg/metadataqa/marc/CsvUtils.java index e95f8d842..5c24375e6 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/CsvUtils.java +++ b/src/main/java/de/gwdg/metadataqa/marc/CsvUtils.java @@ -14,6 +14,10 @@ public static String createCsv(List values) { return createCsv(asArray(values)); } + public static String createCsvFromObjects(List values) { + return createCsv(asArrayFromObject(values)); + } + public static String createCsv(String[] values) { String csv = null; @@ -42,6 +46,20 @@ private static String[] asArray(List values) { return strings.toArray(new String[strings.size()]); } + private static String[] asArrayFromObject(List values) { + List strings = new ArrayList<>(); + for (Object value : values) { + if (value instanceof String) { + strings.add((String) value); + } else if (value == null) { + strings.add(""); + } else { + strings.add(value.toString()); + } + } + return strings.toArray(new String[strings.size()]); + } + private static String[] cleanRow(String[] values) { List quoted = new ArrayList<>(); diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java b/src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java index 23738bfdf..8cf1da6ae 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java +++ b/src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java @@ -193,10 +193,10 @@ private void saveLibraries003(String fileExtension, char separator) { logger.info("Saving libraries003..."); var path = Paths.get(parameters.getOutputDir(), "libraries003" + fileExtension); try (var writer = Files.newBufferedWriter(path)) { - writer.write("library" + separator + "count\n"); + writer.write(CsvUtils.createCsv(List.of("library", "count"))); completenessDAO.getLibrary003Counter().forEach((key, value) -> { try { - writer.write(String.format("\"%s\"%s%d%n", key, separator, value)); + writer.write(CsvUtils.createCsv(List.of(key, value))); } catch (IOException e) { logger.log(Level.SEVERE, "saveLibraries003", e); } @@ -209,15 +209,15 @@ private void saveLibraries003(String fileExtension, char separator) { private void saveMarcElements(String fileExtension, char separator) { Path path = Paths.get(parameters.getOutputDir(), "marc-elements" + fileExtension); try (var writer = Files.newBufferedWriter(path)) { - writer.write(createRow( + writer.write(CsvUtils.createCsv(List.of( "documenttype", "path", "packageid", "package", "tag", "subfield", "number-of-record", "number-of-instances", "min", "max", "mean", "stddev", "histogram" - )); + ))); completenessDAO.getElementCardinality().forEach((documentType, cardinalities) -> { cardinalities.forEach((marcPath, cardinality) -> { try { - writer.write(formatCardinality(separator, marcPath, cardinality, documentType, null)); + writer.write(formatCardinality(marcPath, cardinality, documentType, null)); } catch (IOException e) { logger.log(Level.SEVERE, "saveMarcElements", e); } @@ -241,7 +241,7 @@ private void saveGrouppedMarcElements(String fileExtension, char separator) { documentTypes.forEach((documentType, cardinalities) -> { cardinalities.forEach((marcPath, cardinality) -> { try { - writer.write(formatCardinality(separator, marcPath, cardinality, documentType, groupId)); + writer.write(formatCardinality(marcPath, cardinality, documentType, groupId)); } catch (IOException e) { logger.log(Level.SEVERE, "saveMarcElements", e); } @@ -257,7 +257,7 @@ private void savePackages(String fileExtension, char separator) { logger.info("saving packages..."); var path = Paths.get(parameters.getOutputDir(), "packages" + fileExtension); try (var writer = Files.newBufferedWriter(path)) { - writer.write(createRow(separator, "documenttype", "packageid", "name", "label", "iscoretag", "count")); + writer.write(CsvUtils.createCsv(List.of("documenttype", "packageid", "name", "label", "iscoretag", "count"))); completenessDAO.getPackageCounter().forEach((documentType, packages) -> { packages.forEach((packageName, count) -> { try { @@ -274,9 +274,7 @@ private void savePackages(String fileExtension, char separator) { } else { logger.severe(packageName + " has not been found in TagCategory"); } - writer.write(createRow( - separator, quote(documentType), id, quote(range), quote(label), isPartOfMarcScore, count - )); + writer.write(CsvUtils.createCsv(List.of(documentType, id, range, label, isPartOfMarcScore, count))); } catch (IOException e) { logger.log(Level.SEVERE, "savePackages", e); } @@ -291,7 +289,7 @@ private void saveGrouppedPackages(String fileExtension, char separator) { logger.info("saving groupped packages..."); var path = Paths.get(parameters.getOutputDir(), "completeness-groupped-packages" + fileExtension); try (var writer = Files.newBufferedWriter(path)) { - writer.write(createRow(separator, "group", "documenttype", "packageid", "name", "label", "iscoretag", "count")); + writer.write(CsvUtils.createCsv(List.of("group", "documenttype", "packageid", "name", "label", "iscoretag", "count"))); completenessDAO.getGrouppedPackageCounter().forEach((groupId, documentTypes) -> { documentTypes.forEach((documentType, packages) -> { packages.forEach((packageName, count) -> { @@ -309,9 +307,7 @@ private void saveGrouppedPackages(String fileExtension, char separator) { } else { logger.severe(packageName + " has not been found in TagCategory"); } - writer.write(createRow( - separator, quote(groupId), quote(documentType), id, quote(range), quote(label), isPartOfMarcScore, count - )); + writer.write(CsvUtils.createCsv(List.of(groupId, documentType, id, range, label, isPartOfMarcScore, count))); } catch (IOException e) { logger.log(Level.SEVERE, "savePackages", e); } @@ -327,11 +323,10 @@ private void saveLibraries(String fileExtension, char separator) { logger.info("Saving libraries..."); var path = Paths.get(parameters.getOutputDir(), "libraries" + fileExtension); try (var writer = Files.newBufferedWriter(path)) { - writer.write("library" + separator + "count\n"); + writer.write(CsvUtils.createCsv(List.of("library", "count"))); completenessDAO.getLibraryCounter().forEach((key, value) -> { try { writer.write(CsvUtils.createCsv(List.of(key, value))); - // writer.write(String.format("\"%s\"%s%d%n", key, separator, value)); } catch (IOException e) { logger.log(Level.SEVERE, "saveLibraries", e); } @@ -359,8 +354,7 @@ private void saveGroups(String fileExtension, char separator) { } } - private String formatCardinality(char separator, - String marcPath, + private String formatCardinality(String marcPath, int cardinality, String documentType, String groupId) { @@ -415,7 +409,8 @@ private String formatCardinality(char separator, if (groupId != null) values.add(0, groupId); - return StringUtils.join(values, separator) + "\n"; + return CsvUtils.createCsvFromObjects(values); + // return StringUtils.join(values, separator) + "\n"; } private char getSeparator(ValidationErrorFormat format) { diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java index a33b4aec6..1996bd11d 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java @@ -124,7 +124,8 @@ public void completeness_pica_groupBy() throws Exception { int occurrences = Integer.parseInt(record[8]); assertTrue(records <= occurrences); int total = 0; - for (String expr : record[13].split("; ")) { + String histogram = record[13].replaceAll("^\"(.*)\"$", "$1"); + for (String expr : histogram.split("; ")) { String[] parts = expr.split("="); total += Integer.parseInt(parts[0]) * Integer.parseInt(parts[1]); } @@ -167,7 +168,8 @@ public void completeness_pica_groupBy_file() throws Exception { int occurrences = Integer.parseInt(record[8]); assertTrue(records <= occurrences); int total = 0; - for (String expr : record[13].split("; ")) { + String histogram = record[13].replaceAll("^\"(.*)\"$", "$1"); + for (String expr : histogram.split("; ")) { String[] parts = expr.split("="); total += Integer.parseInt(parts[0]) * Integer.parseInt(parts[1]); }