Skip to content

Commit

Permalink
Added dna_api and dna_evaluateBackboneSolution
Browse files Browse the repository at this point in the history
  • Loading branch information
leifeld committed Sep 20, 2023
1 parent 132a9d6 commit b583726
Show file tree
Hide file tree
Showing 17 changed files with 631 additions and 157 deletions.
4 changes: 2 additions & 2 deletions dna/src/main/java/dna/Dna.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public class Dna {
public static Dna dna;
public static Logger logger;
public static Sql sql;
public static final String date = "2023-09-18";
public static final String version = "3.0.10";
public static final String date = "2023-09-20";
public static final String version = "3.0.11";
public static final String operatingSystem = System.getProperty("os.name");
public static File workingDirectory = null;
public MainWindow mainWindow;
Expand Down
140 changes: 140 additions & 0 deletions dna/src/main/java/dna/HeadlessDna.java
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,146 @@ public void rBackbone(String method, int backboneSize, double p, int T, String s
}
}

/**
* Compute the spectral loss for a given backbone set relative to the full network.
*
* @param backboneEntities An array of entities (e.g., concepts) for which the spectral loss should be computed relative to the full network.
* @param p The penalty parameter. Can be \code{0} to switch off the penalty.
* @param statementType Statement type as a {@link String}.
* @param variable1 First variable for export, provided as a {@link String}.
* @param variable1Document boolean indicating if the first variable is at the document level.
* @param variable2 Second variable for export, provided as a {@link String}.
* @param variable2Document boolean indicating if the second variable is at the document level.
* @param qualifier Qualifier variable as a {@link String}.
* @param qualifierDocument boolean indicating if the qualifier variable is at the document level.
* @param qualifierAggregation Aggregation rule for the qualifier variable (can be {@code "ignore"}, {@code "combine"}, {@code "subtract"}, {@code "congruence"}, or {@code "conflict"}). Note that negative values in the {@code "subtract"} case are replaced by {@code 0}.
* @param normalization Normalization setting as a {@link String}, as provided by rDNA (can be {@code "no"}, {@code "activity"}, {@code "prominence"}, {@code "average"}, {@code "jaccard"}, or {@code "cosine"}).
* @param duplicates An input {@link String} from rDNA that can be {@code "include"}, {@code "document"}, {@code "week"}, {@code "month"}, {@code "year"}, or {@code "acrossrange"}.
* @param startDate Start date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}.
* @param stopDate Stop date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}.
* @param startTime Start time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}.
* @param stopTime Stop time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}.
* @param excludeVariables A {@link String} array with n elements, indicating the variable of the n'th value.
* @param excludeValues A {@link String} array with n elements, indicating the value pertaining to the n'th variable {@link String}.
* @param excludeAuthors A {@link String} array of values to exclude in the {@code author} variable at the document level.
* @param excludeSources A {@link String} array of values to exclude in the {@code source} variable at the document level.
* @param excludeSections A {@link String} array of values to exclude in the {@code section} variable at the document level.
* @param excludeTypes A {@link String} array of values to exclude in the {@code "type"} variable at the document level.
* @param invertValues boolean indicating whether the statement-level exclude values should be included (= {@code true}) rather than excluded.
* @param invertAuthors boolean indicating whether the document-level author values should be included (= {@code true}) rather than excluded.
* @param invertSources boolean indicating whether the document-level source values should be included (= {@code true}) rather than excluded.
* @param invertSections boolean indicating whether the document-level section values should be included (= {@code true}) rather than excluded.
* @param invertTypes boolean indicating whether the document-level type values should be included (= {@code true}) rather than excluded.
* @return A double array with the loss for the backbone and redundant set.
*/
public double[] rEvaluateBackboneSolution(String[] backboneEntities, int p, String statementType, String variable1, boolean variable1Document, String variable2,
boolean variable2Document, String qualifier, boolean qualifierDocument, String qualifierAggregation, String normalization,
String duplicates, String startDate, String stopDate, String startTime, String stopTime,
String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources, String[] excludeSections,
String[] excludeTypes, boolean invertValues, boolean invertAuthors, boolean invertSources, boolean invertSections,
boolean invertTypes) {

// step 1: preprocess arguments
StatementType st = Dna.sql.getStatementType(statementType); // format statement type

// format dates and times with input formats "dd.MM.yyyy" and "HH:mm:ss"
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("dd.MM.yyyy HH:mm:ss");
LocalDateTime ldtStart, ldtStop;
LocalDateTime[] dateRange = Dna.sql.getDateTimeRange();
if (startTime == null || startTime.equals("")) {
startTime = "00:00:00";
}
if (startDate == null || startDate.equals("") || startDate.equals("01.01.1900")) {
ldtStart = dateRange[0];
} else {
String startString = startDate + " " + startTime;
ldtStart = LocalDateTime.parse(startString, dtf);
if (!startString.equals(dtf.format(ldtStart))) {
ldtStart = dateRange[0];
LogEvent le = new LogEvent(Logger.WARNING,
"Start date or time is invalid.",
"When computing the backbone and redundant set of the network, the start date or time (" + startString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming earliest date and time in the dataset: " + ldtStart.format(dtf) + ".");
Dna.logger.log(le);
}
}
if (stopTime == null || stopTime.equals("")) {
stopTime = "23:59:59";
}
if (stopDate == null || stopDate.equals("") || stopDate.equals("31.12.2099")) {
ldtStop = dateRange[1];
} else {
String stopString = stopDate + " " + stopTime;
ldtStop = LocalDateTime.parse(stopString, dtf);
if (!stopString.equals(dtf.format(ldtStop))) {
ldtStop = dateRange[1];
LogEvent le = new LogEvent(Logger.WARNING,
"End date or time is invalid.",
"When computing the spectral loss of a backbone set, the end date or time (" + stopString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming latest date and time in the dataset: " + ldtStop.format(dtf) + ".");
Dna.logger.log(le);
}
}

// process exclude variables: create HashMap with variable:value pairs
HashMap<String, ArrayList<String>> map = new HashMap<String, ArrayList<String>>();
if (excludeVariables.length > 0) {
for (int i = 0; i < excludeVariables.length; i++) {
ArrayList<String> values = map.get(excludeVariables[i]);
if (values == null) {
values = new ArrayList<String>();
}
if (!values.contains(excludeValues[i])) {
values.add(excludeValues[i]);
}
Collections.sort(values);
map.put(excludeVariables[i], values);
}
}

// initialize Exporter class
this.exporter = new Exporter(
"onemode",
st,
variable1,
variable1Document,
variable2,
variable2Document,
qualifier,
qualifierDocument,
qualifierAggregation,
normalization,
true,
duplicates,
ldtStart,
ldtStop,
"no",
1,
map,
Stream.of(excludeAuthors).collect(Collectors.toCollection(ArrayList::new)),
Stream.of(excludeSources).collect(Collectors.toCollection(ArrayList::new)),
Stream.of(excludeSections).collect(Collectors.toCollection(ArrayList::new)),
Stream.of(excludeTypes).collect(Collectors.toCollection(ArrayList::new)),
invertValues,
invertAuthors,
invertSources,
invertSections,
invertTypes,
null,
null);

// step 2: filter
this.exporter.loadData();
this.exporter.filterStatements();
if (exporter.getFilteredStatements().size() == 0) {
LogEvent le = new LogEvent(Logger.ERROR,
"No statements left after filtering.",
"Attempted to filter the statements by date and other criteria before finding backbone. But no statements were left after applying the filters. Perhaps the time period was mis-specified?");
Dna.logger.log(le);
}

// step 3: compute and return results
return this.exporter.evaluateBackboneSolution(backboneEntities, p);
}

private void saveJsonXml(String fileFormat, String outfile) {
if (fileFormat != null && outfile != null) {
if (fileFormat.equals("json") && !outfile.toLowerCase().endsWith(".json")) {
Expand Down
60 changes: 60 additions & 0 deletions dna/src/main/java/export/Exporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -3188,4 +3188,64 @@ public void iterateSimulatedAnnealingBackbone(boolean penalty) {
acceptanceRatioLastHundredIterationsLog.add(log / Math.min(100, t)); // log ratio of accepted candidates in the last 100 iterations
t = t + 1; // go to next iteration
}

/**
* Compute the spectral distance between the full network and the network based only on the backbone set and only the redundant set. The penalty parameter can be switched off by setting it to zero.
*
* @param backboneEntities An array of entities (e.g., concepts) to construct a backbone set for computing the spectral distance.
* @param p The penalty parameter. Can be \code{0} to switch off the penalty parameter.
* @return A double array with the penalized loss for the backbone set and the redundant set.
*/
public double[] evaluateBackboneSolution(String[] backboneEntities, int p) {
this.p = p;
double[] results = new double[2];
this.isolates = false; // no isolates initially for full matrix; will be set to true after full matrix has been computed

// initial values before iterations start
this.originalStatements = this.filteredStatements; // to ensure not all isolates are included later

// full set of concepts C
fullConcepts = this.extractLabels(this.filteredStatements, this.variable2, this.variable2Document);

// full network matrix Y against which we compare in every iteration
fullMatrix = this.computeOneModeMatrix(this.filteredStatements, this.qualifierAggregation, this.startDateTime, this.stopDateTime);
this.isolates = true; // include isolates in the iterations; will be adjusted to full matrix without isolates manually each time

// compute normalised eigenvalues for the full matrix; no need to recompute every time as they do not change
eigenvaluesFull = computeNormalizedEigenvalues(fullMatrix.getMatrix());

// create copy of filtered statements and remove redundant entities
ArrayList<String> entityList = Stream.of(backboneEntities).collect(Collectors.toCollection(ArrayList<String>::new));
ArrayList<String> backboneSet = new ArrayList<>();
ArrayList<String> redundantSet = new ArrayList<>();
for (int i = 0; i < fullConcepts.length; i++) {
if (entityList.contains(fullConcepts[i])) {
backboneSet.add(fullConcepts[i]);
} else {
redundantSet.add(fullConcepts[i]);
}
}

// spectral distance between full and backbone set
candidateStatementList = this.filteredStatements
.stream()
.filter(s -> backboneSet.contains(((Entity) s.get(this.variable2)).getValue()))
.collect(Collectors.toCollection(ArrayList::new));
candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed
candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix
eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix
results[0] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, backboneSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix

// spectral distance between full and redundant set
candidateStatementList = this.filteredStatements
.stream()
.filter(s -> redundantSet.contains(((Entity) s.get(this.variable2)).getValue()))
.collect(Collectors.toCollection(ArrayList::new));
candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed
candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix
eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix
results[1] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, redundantSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix

return results;
}
}
4 changes: 2 additions & 2 deletions rDNA/rDNA/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rDNA
Version: 3.0.10
Date: 2023-09-19
Version: 3.0.11
Date: 2023-09-20
Title: Discourse Network Analysis in R
Authors@R:
c(person(given = "Philip",
Expand Down
2 changes: 2 additions & 0 deletions rDNA/rDNA/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ S3method(print,dna_barplot)
S3method(print,dna_multiclust)
S3method(print,dna_network_onemode)
S3method(print,dna_network_twomode)
export(dna_api)
export(dna_backbone)
export(dna_barplot)
export(dna_closeDatabase)
export(dna_evaluateBackboneSolution)
export(dna_getAttributes)
export(dna_getVariables)
export(dna_init)
Expand Down
Loading

0 comments on commit b583726

Please sign in to comment.