Added dna_api and dna_evaluateBackboneSolution

leifeld · Sep 20, 2023 · b583726 · b583726
1 parent 132a9d6
commit b583726
Show file tree

Hide file tree

Showing 17 changed files with 631 additions and 157 deletions.
diff --git a/dna/src/main/java/dna/Dna.java b/dna/src/main/java/dna/Dna.java
@@ -27,8 +27,8 @@ public class Dna {
 	public static Dna dna;
 	public static Logger logger;
 	public static Sql sql;
-	public static final String date = "2023-09-18";
-	public static final String version = "3.0.10";
+	public static final String date = "2023-09-20";
+	public static final String version = "3.0.11";
 	public static final String operatingSystem = System.getProperty("os.name");
 	public static File workingDirectory = null;
 	public MainWindow mainWindow;

diff --git a/dna/src/main/java/dna/HeadlessDna.java b/dna/src/main/java/dna/HeadlessDna.java
@@ -719,6 +719,146 @@ public void rBackbone(String method, int backboneSize, double p, int T, String s
 		}
 	}
 
+	/**
+	 * Compute the spectral loss for a given backbone set relative to the full network.
+	 *
+	 * @param backboneEntities       An array of entities (e.g., concepts) for which the spectral loss should be computed relative to the full network.
+	 * @param p                      The penalty parameter. Can be \code{0} to switch off the penalty.
+	 * @param statementType          Statement type as a {@link String}.
+	 * @param variable1              First variable for export, provided as a {@link String}.
+	 * @param variable1Document      boolean indicating if the first variable is at the document level.
+	 * @param variable2              Second variable for export, provided as a {@link String}.
+	 * @param variable2Document      boolean indicating if the second variable is at the document level.
+	 * @param qualifier              Qualifier variable as a {@link String}.
+	 * @param qualifierDocument      boolean indicating if the qualifier variable is at the document level.
+	 * @param qualifierAggregation   Aggregation rule for the qualifier variable (can be {@code "ignore"}, {@code "combine"}, {@code "subtract"}, {@code "congruence"}, or {@code "conflict"}). Note that negative values in the {@code "subtract"} case are replaced by {@code 0}.
+	 * @param normalization          Normalization setting as a {@link String}, as provided by rDNA (can be {@code "no"}, {@code "activity"}, {@code "prominence"}, {@code "average"}, {@code "jaccard"}, or {@code "cosine"}).
+	 * @param duplicates             An input {@link String} from rDNA that can be {@code "include"}, {@code "document"}, {@code "week"}, {@code "month"}, {@code "year"}, or {@code "acrossrange"}.
+	 * @param startDate              Start date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}.
+	 * @param stopDate               Stop date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}.
+	 * @param startTime              Start time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}.
+	 * @param stopTime               Stop time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}.
+	 * @param excludeVariables       A {@link String} array with n elements, indicating the variable of the n'th value.
+	 * @param excludeValues          A {@link String} array with n elements, indicating the value pertaining to the n'th variable {@link String}.
+	 * @param excludeAuthors         A {@link String} array of values to exclude in the {@code author} variable at the document level.
+	 * @param excludeSources         A {@link String} array of values to exclude in the {@code source} variable at the document level.
+	 * @param excludeSections        A {@link String} array of values to exclude in the {@code section} variable at the document level.
+	 * @param excludeTypes           A {@link String} array of values to exclude in the {@code "type"} variable at the document level.
+	 * @param invertValues           boolean indicating whether the statement-level exclude values should be included (= {@code true}) rather than excluded.
+	 * @param invertAuthors          boolean indicating whether the document-level author values should be included (= {@code true}) rather than excluded.
+	 * @param invertSources          boolean indicating whether the document-level source values should be included (= {@code true}) rather than excluded.
+	 * @param invertSections         boolean indicating whether the document-level section values should be included (= {@code true}) rather than excluded.
+	 * @param invertTypes            boolean indicating whether the document-level type values should be included (= {@code true}) rather than excluded.
+	 * @return                       A double array with the loss for the backbone and redundant set.
+	 */
+	public double[] rEvaluateBackboneSolution(String[] backboneEntities, int p, String statementType, String variable1, boolean variable1Document, String variable2,
+						  boolean variable2Document, String qualifier, boolean qualifierDocument, String qualifierAggregation, String normalization,
+						  String duplicates, String startDate, String stopDate, String startTime, String stopTime,
+						  String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources, String[] excludeSections,
+						  String[] excludeTypes, boolean invertValues, boolean invertAuthors, boolean invertSources, boolean invertSections,
+						  boolean invertTypes) {
+
+		// step 1: preprocess arguments
+		StatementType st = Dna.sql.getStatementType(statementType); // format statement type
+
+		// format dates and times with input formats "dd.MM.yyyy" and "HH:mm:ss"
+		DateTimeFormatter dtf = DateTimeFormatter.ofPattern("dd.MM.yyyy HH:mm:ss");
+		LocalDateTime ldtStart, ldtStop;
+		LocalDateTime[] dateRange = Dna.sql.getDateTimeRange();
+		if (startTime == null || startTime.equals("")) {
+			startTime = "00:00:00";
+		}
+		if (startDate == null || startDate.equals("") || startDate.equals("01.01.1900")) {
+			ldtStart = dateRange[0];
+		} else {
+			String startString = startDate + " " + startTime;
+			ldtStart = LocalDateTime.parse(startString, dtf);
+			if (!startString.equals(dtf.format(ldtStart))) {
+				ldtStart = dateRange[0];
+				LogEvent le = new LogEvent(Logger.WARNING,
+						"Start date or time is invalid.",
+						"When computing the backbone and redundant set of the network, the start date or time (" + startString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming earliest date and time in the dataset: " + ldtStart.format(dtf) + ".");
+				Dna.logger.log(le);
+			}
+		}
+		if (stopTime == null || stopTime.equals("")) {
+			stopTime = "23:59:59";
+		}
+		if (stopDate == null || stopDate.equals("") || stopDate.equals("31.12.2099")) {
+			ldtStop = dateRange[1];
+		} else {
+			String stopString = stopDate + " " + stopTime;
+			ldtStop = LocalDateTime.parse(stopString, dtf);
+			if (!stopString.equals(dtf.format(ldtStop))) {
+				ldtStop = dateRange[1];
+				LogEvent le = new LogEvent(Logger.WARNING,
+						"End date or time is invalid.",
+						"When computing the spectral loss of a backbone set, the end date or time (" + stopString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming latest date and time in the dataset: " + ldtStop.format(dtf) + ".");
+				Dna.logger.log(le);
+			}
+		}
+
+		// process exclude variables: create HashMap with variable:value pairs
+		HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>();
+		if (excludeVariables.length > 0) {
+			for (int i = 0; i < excludeVariables.length; i++) {
+				ArrayList<String> values = map.get(excludeVariables[i]);
+				if (values == null) {
+					values = new ArrayList<String>();
+				}
+				if (!values.contains(excludeValues[i])) {
+					values.add(excludeValues[i]);
+				}
+				Collections.sort(values);
+				map.put(excludeVariables[i], values);
+			}
+		}
+
+		// initialize Exporter class
+		this.exporter = new Exporter(
+				"onemode",
+				st,
+				variable1,
+				variable1Document,
+				variable2,
+				variable2Document,
+				qualifier,
+				qualifierDocument,
+				qualifierAggregation,
+				normalization,
+				true,
+				duplicates,
+				ldtStart,
+				ldtStop,
+				"no",
+				1,
+				map,
+				Stream.of(excludeAuthors).collect(Collectors.toCollection(ArrayList::new)),
+				Stream.of(excludeSources).collect(Collectors.toCollection(ArrayList::new)),
+				Stream.of(excludeSections).collect(Collectors.toCollection(ArrayList::new)),
+				Stream.of(excludeTypes).collect(Collectors.toCollection(ArrayList::new)),
+				invertValues,
+				invertAuthors,
+				invertSources,
+				invertSections,
+				invertTypes,
+				null,
+				null);
+
+		// step 2: filter
+		this.exporter.loadData();
+		this.exporter.filterStatements();
+		if (exporter.getFilteredStatements().size() == 0) {
+			LogEvent le = new LogEvent(Logger.ERROR,
+					"No statements left after filtering.",
+					"Attempted to filter the statements by date and other criteria before finding backbone. But no statements were left after applying the filters. Perhaps the time period was mis-specified?");
+			Dna.logger.log(le);
+		}
+
+		// step 3: compute and return results
+		return this.exporter.evaluateBackboneSolution(backboneEntities, p);
+	}
+
 	private void saveJsonXml(String fileFormat, String outfile) {
 		if (fileFormat != null && outfile != null) {
 			if (fileFormat.equals("json") && !outfile.toLowerCase().endsWith(".json")) {

diff --git a/dna/src/main/java/export/Exporter.java b/dna/src/main/java/export/Exporter.java
@@ -3188,4 +3188,64 @@ public void iterateSimulatedAnnealingBackbone(boolean penalty) {
 		acceptanceRatioLastHundredIterationsLog.add(log / Math.min(100, t)); // log ratio of accepted candidates in the last 100 iterations
 		t = t + 1; // go to next iteration
 	}
+
+	/**
+	 * Compute the spectral distance between the full network and the network based only on the backbone set and only the redundant set. The penalty parameter can be switched off by setting it to zero.
+	 *
+	 * @param backboneEntities An array of entities (e.g., concepts) to construct a backbone set for computing the spectral distance.
+	 * @param p The penalty parameter. Can be \code{0} to switch off the penalty parameter.
+	 * @return A double array with the penalized loss for the backbone set and the redundant set.
+	 */
+	public double[] evaluateBackboneSolution(String[] backboneEntities, int p) {
+		this.p = p;
+		double[] results = new double[2];
+		this.isolates = false; // no isolates initially for full matrix; will be set to true after full matrix has been computed
+
+		// initial values before iterations start
+		this.originalStatements = this.filteredStatements; // to ensure not all isolates are included later
+
+		// full set of concepts C
+		fullConcepts = this.extractLabels(this.filteredStatements, this.variable2, this.variable2Document);
+
+		// full network matrix Y against which we compare in every iteration
+		fullMatrix = this.computeOneModeMatrix(this.filteredStatements, this.qualifierAggregation, this.startDateTime, this.stopDateTime);
+		this.isolates = true; // include isolates in the iterations; will be adjusted to full matrix without isolates manually each time
+
+		// compute normalised eigenvalues for the full matrix; no need to recompute every time as they do not change
+		eigenvaluesFull = computeNormalizedEigenvalues(fullMatrix.getMatrix());
+
+		// create copy of filtered statements and remove redundant entities
+		ArrayList<String> entityList = Stream.of(backboneEntities).collect(Collectors.toCollection(ArrayList<String>::new));
+		ArrayList<String> backboneSet = new ArrayList<>();
+		ArrayList<String> redundantSet = new ArrayList<>();
+		for (int i = 0; i < fullConcepts.length; i++) {
+			if (entityList.contains(fullConcepts[i])) {
+				backboneSet.add(fullConcepts[i]);
+			} else {
+				redundantSet.add(fullConcepts[i]);
+			}
+		}
+
+		// spectral distance between full and backbone set
+		candidateStatementList = this.filteredStatements
+				.stream()
+				.filter(s -> backboneSet.contains(((Entity) s.get(this.variable2)).getValue()))
+				.collect(Collectors.toCollection(ArrayList::new));
+		candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed
+		candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix
+		eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix
+		results[0] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, backboneSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix
+
+		// spectral distance between full and redundant set
+		candidateStatementList = this.filteredStatements
+				.stream()
+				.filter(s -> redundantSet.contains(((Entity) s.get(this.variable2)).getValue()))
+				.collect(Collectors.toCollection(ArrayList::new));
+		candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed
+		candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix
+		eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix
+		results[1] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, redundantSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix
+
+		return results;
+	}
 }
diff --git a/rDNA/rDNA/DESCRIPTION b/rDNA/rDNA/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rDNA
-Version: 3.0.10
-Date: 2023-09-19
+Version: 3.0.11
+Date: 2023-09-20
 Title: Discourse Network Analysis in R
 Authors@R: 
   c(person(given = "Philip",

diff --git a/rDNA/rDNA/NAMESPACE b/rDNA/rDNA/NAMESPACE
@@ -12,9 +12,11 @@ S3method(print,dna_barplot)
 S3method(print,dna_multiclust)
 S3method(print,dna_network_onemode)
 S3method(print,dna_network_twomode)
+export(dna_api)
 export(dna_backbone)
 export(dna_barplot)
 export(dna_closeDatabase)
+export(dna_evaluateBackboneSolution)
 export(dna_getAttributes)
 export(dna_getVariables)
 export(dna_init)