From b5837262783b0901cbe579285eec6acf39046a3b Mon Sep 17 00:00:00 2001 From: leifeld Date: Wed, 20 Sep 2023 10:08:58 +0100 Subject: [PATCH] Added dna_api and dna_evaluateBackboneSolution --- dna/src/main/java/dna/Dna.java | 4 +- dna/src/main/java/dna/HeadlessDna.java | 140 +++++++++++++++ dna/src/main/java/export/Exporter.java | 60 +++++++ rDNA/rDNA/DESCRIPTION | 4 +- rDNA/rDNA/NAMESPACE | 2 + rDNA/rDNA/R/rDNA.R | 189 ++++++++++++++++++++- rDNA/rDNA/man/autoplot.dna_barplot.Rd | 106 ------------ rDNA/rDNA/man/dna_api.Rd | 55 ++++++ rDNA/rDNA/man/dna_backbone.Rd | 64 +++++++ rDNA/rDNA/man/dna_barplot.Rd | 110 +++++++++++- rDNA/rDNA/man/dna_closeDatabase.Rd | 1 + rDNA/rDNA/man/dna_openConnectionProfile.Rd | 1 + rDNA/rDNA/man/dna_openDatabase.Rd | 1 + rDNA/rDNA/man/dna_printDetails.Rd | 1 + rDNA/rDNA/man/dna_saveConnectionProfile.Rd | 1 + rDNA/rDNA/man/print.dna_barplot.Rd | 36 ---- rDNA/rDNA/tests/testthat/test-backbone.R | 13 ++ 17 files changed, 631 insertions(+), 157 deletions(-) delete mode 100644 rDNA/rDNA/man/autoplot.dna_barplot.Rd create mode 100644 rDNA/rDNA/man/dna_api.Rd delete mode 100644 rDNA/rDNA/man/print.dna_barplot.Rd diff --git a/dna/src/main/java/dna/Dna.java b/dna/src/main/java/dna/Dna.java index 67afeb26..2c98de4c 100644 --- a/dna/src/main/java/dna/Dna.java +++ b/dna/src/main/java/dna/Dna.java @@ -27,8 +27,8 @@ public class Dna { public static Dna dna; public static Logger logger; public static Sql sql; - public static final String date = "2023-09-18"; - public static final String version = "3.0.10"; + public static final String date = "2023-09-20"; + public static final String version = "3.0.11"; public static final String operatingSystem = System.getProperty("os.name"); public static File workingDirectory = null; public MainWindow mainWindow; diff --git a/dna/src/main/java/dna/HeadlessDna.java b/dna/src/main/java/dna/HeadlessDna.java index 75e98f7c..b44683d4 100644 --- a/dna/src/main/java/dna/HeadlessDna.java +++ b/dna/src/main/java/dna/HeadlessDna.java @@ -719,6 +719,146 @@ public void rBackbone(String method, int backboneSize, double p, int T, String s } } + /** + * Compute the spectral loss for a given backbone set relative to the full network. + * + * @param backboneEntities An array of entities (e.g., concepts) for which the spectral loss should be computed relative to the full network. + * @param p The penalty parameter. Can be \code{0} to switch off the penalty. + * @param statementType Statement type as a {@link String}. + * @param variable1 First variable for export, provided as a {@link String}. + * @param variable1Document boolean indicating if the first variable is at the document level. + * @param variable2 Second variable for export, provided as a {@link String}. + * @param variable2Document boolean indicating if the second variable is at the document level. + * @param qualifier Qualifier variable as a {@link String}. + * @param qualifierDocument boolean indicating if the qualifier variable is at the document level. + * @param qualifierAggregation Aggregation rule for the qualifier variable (can be {@code "ignore"}, {@code "combine"}, {@code "subtract"}, {@code "congruence"}, or {@code "conflict"}). Note that negative values in the {@code "subtract"} case are replaced by {@code 0}. + * @param normalization Normalization setting as a {@link String}, as provided by rDNA (can be {@code "no"}, {@code "activity"}, {@code "prominence"}, {@code "average"}, {@code "jaccard"}, or {@code "cosine"}). + * @param duplicates An input {@link String} from rDNA that can be {@code "include"}, {@code "document"}, {@code "week"}, {@code "month"}, {@code "year"}, or {@code "acrossrange"}. + * @param startDate Start date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}. + * @param stopDate Stop date for the export, provided as a {@link String} with format {@code "dd.MM.yyyy"}. + * @param startTime Start time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}. + * @param stopTime Stop time for the export, provided as a {@link String} with format {@code "HH:mm:ss"}. + * @param excludeVariables A {@link String} array with n elements, indicating the variable of the n'th value. + * @param excludeValues A {@link String} array with n elements, indicating the value pertaining to the n'th variable {@link String}. + * @param excludeAuthors A {@link String} array of values to exclude in the {@code author} variable at the document level. + * @param excludeSources A {@link String} array of values to exclude in the {@code source} variable at the document level. + * @param excludeSections A {@link String} array of values to exclude in the {@code section} variable at the document level. + * @param excludeTypes A {@link String} array of values to exclude in the {@code "type"} variable at the document level. + * @param invertValues boolean indicating whether the statement-level exclude values should be included (= {@code true}) rather than excluded. + * @param invertAuthors boolean indicating whether the document-level author values should be included (= {@code true}) rather than excluded. + * @param invertSources boolean indicating whether the document-level source values should be included (= {@code true}) rather than excluded. + * @param invertSections boolean indicating whether the document-level section values should be included (= {@code true}) rather than excluded. + * @param invertTypes boolean indicating whether the document-level type values should be included (= {@code true}) rather than excluded. + * @return A double array with the loss for the backbone and redundant set. + */ + public double[] rEvaluateBackboneSolution(String[] backboneEntities, int p, String statementType, String variable1, boolean variable1Document, String variable2, + boolean variable2Document, String qualifier, boolean qualifierDocument, String qualifierAggregation, String normalization, + String duplicates, String startDate, String stopDate, String startTime, String stopTime, + String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources, String[] excludeSections, + String[] excludeTypes, boolean invertValues, boolean invertAuthors, boolean invertSources, boolean invertSections, + boolean invertTypes) { + + // step 1: preprocess arguments + StatementType st = Dna.sql.getStatementType(statementType); // format statement type + + // format dates and times with input formats "dd.MM.yyyy" and "HH:mm:ss" + DateTimeFormatter dtf = DateTimeFormatter.ofPattern("dd.MM.yyyy HH:mm:ss"); + LocalDateTime ldtStart, ldtStop; + LocalDateTime[] dateRange = Dna.sql.getDateTimeRange(); + if (startTime == null || startTime.equals("")) { + startTime = "00:00:00"; + } + if (startDate == null || startDate.equals("") || startDate.equals("01.01.1900")) { + ldtStart = dateRange[0]; + } else { + String startString = startDate + " " + startTime; + ldtStart = LocalDateTime.parse(startString, dtf); + if (!startString.equals(dtf.format(ldtStart))) { + ldtStart = dateRange[0]; + LogEvent le = new LogEvent(Logger.WARNING, + "Start date or time is invalid.", + "When computing the backbone and redundant set of the network, the start date or time (" + startString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming earliest date and time in the dataset: " + ldtStart.format(dtf) + "."); + Dna.logger.log(le); + } + } + if (stopTime == null || stopTime.equals("")) { + stopTime = "23:59:59"; + } + if (stopDate == null || stopDate.equals("") || stopDate.equals("31.12.2099")) { + ldtStop = dateRange[1]; + } else { + String stopString = stopDate + " " + stopTime; + ldtStop = LocalDateTime.parse(stopString, dtf); + if (!stopString.equals(dtf.format(ldtStop))) { + ldtStop = dateRange[1]; + LogEvent le = new LogEvent(Logger.WARNING, + "End date or time is invalid.", + "When computing the spectral loss of a backbone set, the end date or time (" + stopString + ") did not conform to the format dd.MM.yyyy HH:mm:ss and could not be interpreted. Assuming latest date and time in the dataset: " + ldtStop.format(dtf) + "."); + Dna.logger.log(le); + } + } + + // process exclude variables: create HashMap with variable:value pairs + HashMap> map = new HashMap>(); + if (excludeVariables.length > 0) { + for (int i = 0; i < excludeVariables.length; i++) { + ArrayList values = map.get(excludeVariables[i]); + if (values == null) { + values = new ArrayList(); + } + if (!values.contains(excludeValues[i])) { + values.add(excludeValues[i]); + } + Collections.sort(values); + map.put(excludeVariables[i], values); + } + } + + // initialize Exporter class + this.exporter = new Exporter( + "onemode", + st, + variable1, + variable1Document, + variable2, + variable2Document, + qualifier, + qualifierDocument, + qualifierAggregation, + normalization, + true, + duplicates, + ldtStart, + ldtStop, + "no", + 1, + map, + Stream.of(excludeAuthors).collect(Collectors.toCollection(ArrayList::new)), + Stream.of(excludeSources).collect(Collectors.toCollection(ArrayList::new)), + Stream.of(excludeSections).collect(Collectors.toCollection(ArrayList::new)), + Stream.of(excludeTypes).collect(Collectors.toCollection(ArrayList::new)), + invertValues, + invertAuthors, + invertSources, + invertSections, + invertTypes, + null, + null); + + // step 2: filter + this.exporter.loadData(); + this.exporter.filterStatements(); + if (exporter.getFilteredStatements().size() == 0) { + LogEvent le = new LogEvent(Logger.ERROR, + "No statements left after filtering.", + "Attempted to filter the statements by date and other criteria before finding backbone. But no statements were left after applying the filters. Perhaps the time period was mis-specified?"); + Dna.logger.log(le); + } + + // step 3: compute and return results + return this.exporter.evaluateBackboneSolution(backboneEntities, p); + } + private void saveJsonXml(String fileFormat, String outfile) { if (fileFormat != null && outfile != null) { if (fileFormat.equals("json") && !outfile.toLowerCase().endsWith(".json")) { diff --git a/dna/src/main/java/export/Exporter.java b/dna/src/main/java/export/Exporter.java index 80f22fd3..6a697a1d 100644 --- a/dna/src/main/java/export/Exporter.java +++ b/dna/src/main/java/export/Exporter.java @@ -3188,4 +3188,64 @@ public void iterateSimulatedAnnealingBackbone(boolean penalty) { acceptanceRatioLastHundredIterationsLog.add(log / Math.min(100, t)); // log ratio of accepted candidates in the last 100 iterations t = t + 1; // go to next iteration } + + /** + * Compute the spectral distance between the full network and the network based only on the backbone set and only the redundant set. The penalty parameter can be switched off by setting it to zero. + * + * @param backboneEntities An array of entities (e.g., concepts) to construct a backbone set for computing the spectral distance. + * @param p The penalty parameter. Can be \code{0} to switch off the penalty parameter. + * @return A double array with the penalized loss for the backbone set and the redundant set. + */ + public double[] evaluateBackboneSolution(String[] backboneEntities, int p) { + this.p = p; + double[] results = new double[2]; + this.isolates = false; // no isolates initially for full matrix; will be set to true after full matrix has been computed + + // initial values before iterations start + this.originalStatements = this.filteredStatements; // to ensure not all isolates are included later + + // full set of concepts C + fullConcepts = this.extractLabels(this.filteredStatements, this.variable2, this.variable2Document); + + // full network matrix Y against which we compare in every iteration + fullMatrix = this.computeOneModeMatrix(this.filteredStatements, this.qualifierAggregation, this.startDateTime, this.stopDateTime); + this.isolates = true; // include isolates in the iterations; will be adjusted to full matrix without isolates manually each time + + // compute normalised eigenvalues for the full matrix; no need to recompute every time as they do not change + eigenvaluesFull = computeNormalizedEigenvalues(fullMatrix.getMatrix()); + + // create copy of filtered statements and remove redundant entities + ArrayList entityList = Stream.of(backboneEntities).collect(Collectors.toCollection(ArrayList::new)); + ArrayList backboneSet = new ArrayList<>(); + ArrayList redundantSet = new ArrayList<>(); + for (int i = 0; i < fullConcepts.length; i++) { + if (entityList.contains(fullConcepts[i])) { + backboneSet.add(fullConcepts[i]); + } else { + redundantSet.add(fullConcepts[i]); + } + } + + // spectral distance between full and backbone set + candidateStatementList = this.filteredStatements + .stream() + .filter(s -> backboneSet.contains(((Entity) s.get(this.variable2)).getValue())) + .collect(Collectors.toCollection(ArrayList::new)); + candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed + candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix + eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix + results[0] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, backboneSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix + + // spectral distance between full and redundant set + candidateStatementList = this.filteredStatements + .stream() + .filter(s -> redundantSet.contains(((Entity) s.get(this.variable2)).getValue())) + .collect(Collectors.toCollection(ArrayList::new)); + candidateMatrix = this.computeOneModeMatrix(candidateStatementList, this.qualifierAggregation, this.startDateTime, this.stopDateTime); // create candidate matrix after filtering the statements based on the action that was executed + candidateMatrix = this.reduceCandidateMatrix(candidateMatrix, fullMatrix.getRowNames()); // ensure it has the right dimensions by purging isolates relative to the full matrix + eigenvaluesCandidate = computeNormalizedEigenvalues(candidateMatrix.getMatrix()); // normalised eigenvalues for the candidate matrix + results[1] = penalizedLoss(eigenvaluesFull, eigenvaluesCandidate, p, redundantSet.size(), fullConcepts.length); // spectral distance between full and candidate matrix + + return results; + } } \ No newline at end of file diff --git a/rDNA/rDNA/DESCRIPTION b/rDNA/rDNA/DESCRIPTION index dcb6338d..fb89dff7 100755 --- a/rDNA/rDNA/DESCRIPTION +++ b/rDNA/rDNA/DESCRIPTION @@ -1,6 +1,6 @@ Package: rDNA -Version: 3.0.10 -Date: 2023-09-19 +Version: 3.0.11 +Date: 2023-09-20 Title: Discourse Network Analysis in R Authors@R: c(person(given = "Philip", diff --git a/rDNA/rDNA/NAMESPACE b/rDNA/rDNA/NAMESPACE index 64291652..c8c4bb36 100644 --- a/rDNA/rDNA/NAMESPACE +++ b/rDNA/rDNA/NAMESPACE @@ -12,9 +12,11 @@ S3method(print,dna_barplot) S3method(print,dna_multiclust) S3method(print,dna_network_onemode) S3method(print,dna_network_twomode) +export(dna_api) export(dna_backbone) export(dna_barplot) export(dna_closeDatabase) +export(dna_evaluateBackboneSolution) export(dna_getAttributes) export(dna_getVariables) export(dna_init) diff --git a/rDNA/rDNA/R/rDNA.R b/rDNA/rDNA/R/rDNA.R index 691dd188..b1d62c30 100644 --- a/rDNA/rDNA/R/rDNA.R +++ b/rDNA/rDNA/R/rDNA.R @@ -539,6 +539,47 @@ dna_printDetails <- function() { .jcall(dnaEnvironment[["dna"]]$headlessDna, "V", "printDatabaseDetails") } +#' Get a reference to the headless Java class for R (API) +#' +#' Get a reference to the headless Java class for R (API). +#' +#' This function returns a Java object reference to the instance of the +#' \code{Dna/HeadlessDna} class in the DNA JAR file that is held in the rDNA +#' package environment and used by the functions in the package to exchange data +#' with the Java application. You can use the \pkg{rJava} package to access the +#' available functions in this class directly. API access requires detailed +#' knowledge of the DNA JAR classes and functions and is recommended for +#' developers and advanced users only. +#' +#' @return A Java object reference to the \code{Dna/HeadlessDna} class. +#' +#' @author Philip Leifeld +#' +#' @examples +#' \dontrun{ +#' library("rJava") # load rJava package to use functions in the Java API +#' dna_init() +#' dna_sample() +#' dna_openDatabase(coderId = 1, +#' coderPassword = "sample", +#' db_url = "sample.dna") +#' api <- dna_api() +#' +#' # use the \code{getVariables} function to retrieve variables +#' variable_references <- api$getVariables("DNA Statement") +#' +#' # iterate through variable references and print their data type +#' for (i in seq(variable_references$size()) - 1) { +#' print(variable_references$get(as.integer(i))$getDataType()) +#' } +#' } +#' +#' @family {rDNA database connections} +#' +#' @export +dna_api <- function() { + return(dnaEnvironment[["dna"]]$headlessDna) +} # Coder management-------------------------------------------------------------- @@ -1798,8 +1839,7 @@ autoplot.dna_network_twomode <- autoplot.dna_network_onemode #' #' @author Philip Leifeld #' -#' @family {rDNA barplots} -#' +#' @rdname dna_barplot #' @importFrom rJava .jarray #' @importFrom rJava .jcall #' @importFrom rJava .jevalArray @@ -1933,8 +1973,7 @@ dna_barplot <- function(statementType = "DNA Statement", #' #' @author Philip Leifeld #' -#' @family {rDNA barplots} -#' +#' @rdname dna_barplot #' @export print.dna_barplot <- function(x, trim = 30, attr = TRUE, ...) { x2 <- x @@ -2021,8 +2060,7 @@ print.dna_barplot <- function(x, trim = 30, attr = TRUE, ...) { #' #' @author Johannes B. Gruber, Tim Henrichsen #' -#' @family {rDNA barplots} -#' +#' @rdname dna_barplot #' @importFrom ggplot2 autoplot #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 aes_string @@ -2458,6 +2496,7 @@ autoplot.dna_barplot <- function(object, #' #' @author Philip Leifeld, Tim Henrichsen #' +#' @rdname dna_backbone #' @importFrom rJava .jarray #' @importFrom rJava .jcall #' @importFrom rJava .jnull @@ -2911,6 +2950,142 @@ autoplot.dna_backbone <- function(object, ..., ma = 500) { } } +#' Evaluate the spectral loss for an arbitrary set of entities +#' +#' Compute the backbone loss for any set of entities, for example concepts. +#' +#' This function computes the spectral loss for an arbitrary backbone and its +#' complement, the redundant set, specified by the user. For example, the user +#' can evaluate how much structure would be lost if the second mode was composed +#' only of the concepts provided to this function. This can be used to compare +#' how useful different codebook models are. The penalty parameter \code{p} +#' applies a penalty factor to the spectral loss. The default value of \code{0} +#' switches off the penalty. +#' +#' @param backboneEntities A vector of character values to be included in the +#' backbone. The function will compute the spectral loss between the full +#' network and the network composed only of those entities on the second mode +#' that are contained in this vector. +#' @param p The penalty parameter. The default value of \code{0} means no +#' penalty for backbone size is applied. +#' @inheritParams dna_backbone +#' @return A vector with two numeric values: the backbone and redundant loss. +#' +#' @examples +#' \dontrun{ +#' dna_init() +#' dna_sample() +#' dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample") +#' +#' dna_evaluateBackboneSolution( +#' c("There should be legislation to regulate emissions.", +#' "Emissions legislation should regulate CO2.") +#' ) +#' } +#' +#' @author Philip Leifeld +#' +#' @rdname dna_backbone +#' @importFrom rJava .jarray +#' @importFrom rJava .jcall +#' @importFrom rJava .jnull +#' @export +dna_evaluateBackboneSolution <- function(backboneEntities, + p = 0, + statementType = "DNA Statement", + variable1 = "organization", + variable1Document = FALSE, + variable2 = "concept", + variable2Document = FALSE, + qualifier = "agreement", + qualifierDocument = FALSE, + qualifierAggregation = "subtract", + normalization = "average", + duplicates = "document", + start.date = "01.01.1900", + stop.date = "31.12.2099", + start.time = "00:00:00", + stop.time = "23:59:59", + excludeValues = list(), + excludeAuthors = character(), + excludeSources = character(), + excludeSections = character(), + excludeTypes = character(), + invertValues = FALSE, + invertAuthors = FALSE, + invertSources = FALSE, + invertSections = FALSE, + invertTypes = FALSE) { + + # wrap the vectors of exclude values for document variables into Java arrays + excludeAuthors <- .jarray(excludeAuthors) + excludeSources <- .jarray(excludeSources) + excludeSections <- .jarray(excludeSections) + excludeTypes <- .jarray(excludeTypes) + + # compile exclude variables and values vectors + dat <- matrix("", nrow = length(unlist(excludeValues)), ncol = 2) + count <- 0 + if (length(excludeValues) > 0) { + for (i in 1:length(excludeValues)) { + if (length(excludeValues[[i]]) > 0) { + for (j in 1:length(excludeValues[[i]])) { + count <- count + 1 + dat[count, 1] <- names(excludeValues)[i] + dat[count, 2] <- excludeValues[[i]][j] + } + } + } + var <- dat[, 1] + val <- dat[, 2] + } else { + var <- character() + val <- character() + } + var <- .jarray(var) # array of variable names of each excluded value + val <- .jarray(val) # array of values to be excluded + + # encode R NULL as Java null value if necessary + if (is.null(qualifier) || is.na(qualifier)) { + qualifier <- .jnull(class = "java/lang/String") + } + + # call rBackbone function to compute results + result <- .jcall(dnaEnvironment[["dna"]]$headlessDna, + "[D", + "rEvaluateBackboneSolution", + .jarray(backboneEntities), + as.integer(p), + statementType, + variable1, + variable1Document, + variable2, + variable2Document, + qualifier, + qualifierDocument, + qualifierAggregation, + normalization, + duplicates, + start.date, + stop.date, + start.time, + stop.time, + var, + val, + excludeAuthors, + excludeSources, + excludeSections, + excludeTypes, + invertValues, + invertAuthors, + invertSources, + invertSections, + invertTypes + ) + names(result) <- c("backbone loss", "redundant loss") + return(result) +} + # Clustering ------------------------------------------------------------------- @@ -3103,6 +3278,7 @@ autoplot.dna_backbone <- function(object, ..., ma = 500) { #' mc3$max_mod # maximal modularity and method per time point #' } #' +#' @rdname dna_multiclust #' @importFrom stats as.dist cor hclust cutree kmeans #' @export dna_multiclust <- function(statementType = "DNA Statement", @@ -4078,6 +4254,7 @@ dna_multiclust <- function(statementType = "DNA Statement", #' @param ... Further options (currently not used). #' #' @author Philip Leifeld +#' #' @rdname dna_multiclust #' @importFrom utils head #' @export diff --git a/rDNA/rDNA/man/autoplot.dna_barplot.Rd b/rDNA/rDNA/man/autoplot.dna_barplot.Rd deleted file mode 100644 index cab27a97..00000000 --- a/rDNA/rDNA/man/autoplot.dna_barplot.Rd +++ /dev/null @@ -1,106 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rDNA.R -\name{autoplot.dna_barplot} -\alias{autoplot.dna_barplot} -\title{Plot \code{dna_barplot} object.} -\usage{ -\method{autoplot}{dna_barplot}( - object, - ..., - lab.pos = "Agreement", - lab.neg = "Disagreement", - lab = TRUE, - colors = FALSE, - fontSize = 12, - barWidth = 0.6, - axisWidth = 1.5, - truncate = 40, - exclude.min = NULL -) -} -\arguments{ -\item{object}{A \code{dna_barplot} object.} - -\item{...}{Additional arguments; currently not in use.} - -\item{lab.pos, lab.neg}{Names for (dis-)agreement labels.} - -\item{lab}{Should (dis-)agreement labels and title be displayed?} - -\item{colors}{If \code{TRUE}, the \code{Colors} column in the -\code{dna_barplot} object will be used to fill the bars. Also accepts -character objects matching one of the attribute variables of the -\code{dna_barplot} object.} - -\item{fontSize}{Text size in pt.} - -\item{barWidth}{Thickness of the bars. Bars will touch when set to \code{1}. -When set to \code{0.5}, space between two bars is the same as thickness of -bars.} - -\item{axisWidth}{Thickness of the x-axis which separates agreement from -disagreement.} - -\item{truncate}{Sets the number of characters to which axis labels should be -truncated.} - -\item{exclude.min}{Reduces the plot to entities with a minimum frequency of -statements.} -} -\description{ -Plot a barplot generated from \code{\link{dna_barplot}}. -} -\details{ -This function plots \code{dna_barplot} objects generated by the -\code{\link{dna_barplot}} function. It plots agreement and disagreement with -DNA statements for different entities such as \code{"concept"}, -\code{"organization"}, or \code{"person"}. Colors can be modified before -plotting (see examples). -} -\examples{ -\dontrun{ -dna_init() -dna_sample() - -dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample") - -# compute barplot data -b <- dna_barplot(statementType = "DNA Statement", - variable = "concept", - qualifier = "agreement") - -# plot barplot with ggplot2 -library("ggplot2") -autoplot(b) - -# use entity colours (here: colors of organizations as an illustration) -b <- dna_barplot(statementType = "DNA Statement", - variable = "organization", - qualifier = "agreement") -autoplot(b, colors = TRUE) - -# edit the colors before plotting -b$Color[b$Type == "NGO"] <- "red" # change NGO color to red -b$Color[b$Type == "Government"] <- "blue" # change government color to blue -autoplot(b, colors = TRUE) - -# use an attribute, such as type, to color the bars -autoplot(b, colors = "Type") + - scale_colour_manual(values = "black") - -# replace colors for the three possible actor types with custom colors -autoplot(b, colors = "Type") + - scale_fill_manual(values = c("red", "blue", "green")) + - scale_colour_manual(values = "black") -} - -} -\seealso{ -Other {rDNA barplots}: -\code{\link{dna_barplot}()}, -\code{\link{print.dna_barplot}()} -} -\author{ -Johannes B. Gruber, Tim Henrichsen -} -\concept{{rDNA barplots}} diff --git a/rDNA/rDNA/man/dna_api.Rd b/rDNA/rDNA/man/dna_api.Rd new file mode 100644 index 00000000..26830bf1 --- /dev/null +++ b/rDNA/rDNA/man/dna_api.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rDNA.R +\name{dna_api} +\alias{dna_api} +\title{Get a reference to the headless Java class for R (API)} +\usage{ +dna_api() +} +\value{ +A Java object reference to the \code{Dna/HeadlessDna} class. +} +\description{ +Get a reference to the headless Java class for R (API). +} +\details{ +This function returns a Java object reference to the instance of the +\code{Dna/HeadlessDna} class in the DNA JAR file that is held in the rDNA +package environment and used by the functions in the package to exchange data +with the Java application. You can use the \pkg{rJava} package to access the +available functions in this class directly. API access requires detailed +knowledge of the DNA JAR classes and functions and is recommended for +developers and advanced users only. +} +\examples{ +\dontrun{ +library("rJava") # load rJava package to use functions in the Java API +dna_init() +dna_sample() +dna_openDatabase(coderId = 1, + coderPassword = "sample", + db_url = "sample.dna") +api <- dna_api() + +# use the \code{getVariables} function to retrieve variables +variable_references <- api$getVariables("DNA Statement") + +# iterate through variable references and print their data type +for (i in seq(variable_references$size()) - 1) { + print(variable_references$get(as.integer(i))$getDataType()) +} +} + +} +\seealso{ +Other {rDNA database connections}: +\code{\link{dna_closeDatabase}()}, +\code{\link{dna_openConnectionProfile}()}, +\code{\link{dna_openDatabase}()}, +\code{\link{dna_printDetails}()}, +\code{\link{dna_saveConnectionProfile}()} +} +\author{ +Philip Leifeld +} +\concept{{rDNA database connections}} diff --git a/rDNA/rDNA/man/dna_backbone.Rd b/rDNA/rDNA/man/dna_backbone.Rd index eec68367..128d460c 100644 --- a/rDNA/rDNA/man/dna_backbone.Rd +++ b/rDNA/rDNA/man/dna_backbone.Rd @@ -5,6 +5,7 @@ \alias{print.dna_backbone} \alias{plot.dna_backbone} \alias{autoplot.dna_backbone} +\alias{dna_evaluateBackboneSolution} \title{Compute and retrieve the backbone and redundant set} \usage{ dna_backbone( @@ -45,6 +46,35 @@ dna_backbone( \method{plot}{dna_backbone}(x, ma = 500, ...) \method{autoplot}{dna_backbone}(object, ..., ma = 500) + +dna_evaluateBackboneSolution( + backboneEntities, + p = 0, + statementType = "DNA Statement", + variable1 = "organization", + variable1Document = FALSE, + variable2 = "concept", + variable2Document = FALSE, + qualifier = "agreement", + qualifierDocument = FALSE, + qualifierAggregation = "subtract", + normalization = "average", + duplicates = "document", + start.date = "01.01.1900", + stop.date = "31.12.2099", + start.time = "00:00:00", + stop.time = "23:59:59", + excludeValues = list(), + excludeAuthors = character(), + excludeSources = character(), + excludeSections = character(), + excludeTypes = character(), + invertValues = FALSE, + invertAuthors = FALSE, + invertSources = FALSE, + invertSections = FALSE, + invertTypes = FALSE +) } \arguments{ \item{method}{The backbone algorithm used to compute the results. Several @@ -275,14 +305,35 @@ an asterisk (\code{*}).} \item{ma}{Number of iterations to compute moving average.} \item{object}{A \code{"dna_backbone"} object.} + +\item{backboneEntities}{A vector of character values to be included in the +backbone. The function will compute the spectral loss between the full +network and the network composed only of those entities on the second mode +that are contained in this vector.} + +\item{p}{The penalty parameter. The default value of \code{0} means no +penalty for backbone size is applied.} +} +\value{ +A vector with two numeric values: the backbone and redundant loss. } \description{ Compute and retrieve the backbone and redundant set of a discourse network. + +Compute the backbone loss for any set of entities, for example concepts. } \details{ This function applies a simulated annealing algorithm to the discourse network to partition the set of second-mode entities (e.g., concepts) into a backbone set and a complementary redundant set. + +This function computes the spectral loss for an arbitrary backbone and its +complement, the redundant set, specified by the user. For example, the user +can evaluate how much structure would be lost if the second mode was composed +only of the concepts provided to this function. This can be used to compare +how useful different codebook models are. The penalty parameter \code{p} +applies a penalty factor to the spectral loss. The default value of \code{0} +switches off the penalty. } \examples{ \dontrun{ @@ -358,7 +409,20 @@ plot(b) autoplot(b) } +\dontrun{ +dna_init() +dna_sample() +dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample") + +dna_evaluateBackboneSolution( + c("There should be legislation to regulate emissions.", + "Emissions legislation should regulate CO2.") +) +} + } \author{ Philip Leifeld, Tim Henrichsen + +Philip Leifeld } diff --git a/rDNA/rDNA/man/dna_barplot.Rd b/rDNA/rDNA/man/dna_barplot.Rd index 5fcf03d9..06dce2f6 100644 --- a/rDNA/rDNA/man/dna_barplot.Rd +++ b/rDNA/rDNA/man/dna_barplot.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/rDNA.R \name{dna_barplot} \alias{dna_barplot} +\alias{print.dna_barplot} +\alias{autoplot.dna_barplot} \title{Generate the data necessary for creating a barplot for a variable} \usage{ dna_barplot( @@ -24,6 +26,22 @@ dna_barplot( invertSections = FALSE, invertTypes = FALSE ) + +\method{print}{dna_barplot}(x, trim = 30, attr = TRUE, ...) + +\method{autoplot}{dna_barplot}( + object, + ..., + lab.pos = "Agreement", + lab.neg = "Disagreement", + lab = TRUE, + colors = FALSE, + fontSize = 12, + barWidth = 0.6, + axisWidth = 1.5, + truncate = 40, + exclude.min = NULL +) } \arguments{ \item{statementType}{The name of the statement type in which the variable @@ -114,14 +132,64 @@ by the \code{excludeTypes} argument should be excluded from network construction (\code{invertTypes = FALSE}) or if they should be the only values that should be included during network construction (\code{invertTypes = TRUE}).} + +\item{x}{A \code{dna_barplot} object, as returned by the +\code{\link{dna_barplot}} function.} + +\item{trim}{Number of maximum characters to display in entity labels. +Entities with more characters are truncated, and the last character is +replaced by an asterisk (\code{*}).} + +\item{attr}{Display attributes, such as the name of the variable and the +levels of the qualifier variable if available.} + +\item{...}{Additional arguments; currently not in use.} + +\item{object}{A \code{dna_barplot} object.} + +\item{lab.pos, lab.neg}{Names for (dis-)agreement labels.} + +\item{lab}{Should (dis-)agreement labels and title be displayed?} + +\item{colors}{If \code{TRUE}, the \code{Colors} column in the +\code{dna_barplot} object will be used to fill the bars. Also accepts +character objects matching one of the attribute variables of the +\code{dna_barplot} object.} + +\item{fontSize}{Text size in pt.} + +\item{barWidth}{Thickness of the bars. Bars will touch when set to \code{1}. +When set to \code{0.5}, space between two bars is the same as thickness of +bars.} + +\item{axisWidth}{Thickness of the x-axis which separates agreement from +disagreement.} + +\item{truncate}{Sets the number of characters to which axis labels should be +truncated.} + +\item{exclude.min}{Reduces the plot to entities with a minimum frequency of +statements.} } \description{ Generate the data necessary for creating a barplot for a variable. + +Show details of a \code{dna_barplot} object. + +Plot a barplot generated from \code{\link{dna_barplot}}. } \details{ Create a \code{dna_barplot} object, which contains a data frame with entity value frequencies grouped by the levels of a qualifier variable. The qualifier variable is optional. + +Print the data frame returned by the \code{\link{dna_barplot}} function. + +This function plots \code{dna_barplot} objects generated by the +\code{\link{dna_barplot}} function. It plots agreement and disagreement with +DNA statements for different entities such as \code{"concept"}, +\code{"organization"}, or \code{"person"}. Colors can be modified before +plotting (see examples). } \examples{ \dontrun{ @@ -136,13 +204,45 @@ b <- dna_barplot(statementType = "DNA Statement", b } +\dontrun{ +dna_init() +dna_sample() + +dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample") + +# compute barplot data +b <- dna_barplot(statementType = "DNA Statement", + variable = "concept", + qualifier = "agreement") + +# plot barplot with ggplot2 +library("ggplot2") +autoplot(b) + +# use entity colours (here: colors of organizations as an illustration) +b <- dna_barplot(statementType = "DNA Statement", + variable = "organization", + qualifier = "agreement") +autoplot(b, colors = TRUE) + +# edit the colors before plotting +b$Color[b$Type == "NGO"] <- "red" # change NGO color to red +b$Color[b$Type == "Government"] <- "blue" # change government color to blue +autoplot(b, colors = TRUE) + +# use an attribute, such as type, to color the bars +autoplot(b, colors = "Type") + + scale_colour_manual(values = "black") + +# replace colors for the three possible actor types with custom colors +autoplot(b, colors = "Type") + + scale_fill_manual(values = c("red", "blue", "green")) + + scale_colour_manual(values = "black") } -\seealso{ -Other {rDNA barplots}: -\code{\link{autoplot.dna_barplot}()}, -\code{\link{print.dna_barplot}()} + } \author{ Philip Leifeld + +Johannes B. Gruber, Tim Henrichsen } -\concept{{rDNA barplots}} diff --git a/rDNA/rDNA/man/dna_closeDatabase.Rd b/rDNA/rDNA/man/dna_closeDatabase.Rd index aef6faa9..a9488a7c 100644 --- a/rDNA/rDNA/man/dna_closeDatabase.Rd +++ b/rDNA/rDNA/man/dna_closeDatabase.Rd @@ -26,6 +26,7 @@ dna_closeDatabase() } \seealso{ Other {rDNA database connections}: +\code{\link{dna_api}()}, \code{\link{dna_openConnectionProfile}()}, \code{\link{dna_openDatabase}()}, \code{\link{dna_printDetails}()}, diff --git a/rDNA/rDNA/man/dna_openConnectionProfile.Rd b/rDNA/rDNA/man/dna_openConnectionProfile.Rd index 322a50a6..8cd02116 100644 --- a/rDNA/rDNA/man/dna_openConnectionProfile.Rd +++ b/rDNA/rDNA/man/dna_openConnectionProfile.Rd @@ -46,6 +46,7 @@ dna_openConnectionProfile(file = "my profile.dnc", coderPassword = "sample") } \seealso{ Other {rDNA database connections}: +\code{\link{dna_api}()}, \code{\link{dna_closeDatabase}()}, \code{\link{dna_openDatabase}()}, \code{\link{dna_printDetails}()}, diff --git a/rDNA/rDNA/man/dna_openDatabase.Rd b/rDNA/rDNA/man/dna_openDatabase.Rd index 34f38c0f..a235d245 100644 --- a/rDNA/rDNA/man/dna_openDatabase.Rd +++ b/rDNA/rDNA/man/dna_openDatabase.Rd @@ -70,6 +70,7 @@ dna_openDatabase(coderId = 1, \code{\link{dna_queryCoders}} Other {rDNA database connections}: +\code{\link{dna_api}()}, \code{\link{dna_closeDatabase}()}, \code{\link{dna_openConnectionProfile}()}, \code{\link{dna_printDetails}()}, diff --git a/rDNA/rDNA/man/dna_printDetails.Rd b/rDNA/rDNA/man/dna_printDetails.Rd index 94c6b977..a2021f9e 100644 --- a/rDNA/rDNA/man/dna_printDetails.Rd +++ b/rDNA/rDNA/man/dna_printDetails.Rd @@ -27,6 +27,7 @@ dna_printDetails() } \seealso{ Other {rDNA database connections}: +\code{\link{dna_api}()}, \code{\link{dna_closeDatabase}()}, \code{\link{dna_openConnectionProfile}()}, \code{\link{dna_openDatabase}()}, diff --git a/rDNA/rDNA/man/dna_saveConnectionProfile.Rd b/rDNA/rDNA/man/dna_saveConnectionProfile.Rd index ab0b4968..88b0aec7 100644 --- a/rDNA/rDNA/man/dna_saveConnectionProfile.Rd +++ b/rDNA/rDNA/man/dna_saveConnectionProfile.Rd @@ -44,6 +44,7 @@ dna_saveConnectionProfile(file = "my profile.dnc", coderPassword = "sample") } \seealso{ Other {rDNA database connections}: +\code{\link{dna_api}()}, \code{\link{dna_closeDatabase}()}, \code{\link{dna_openConnectionProfile}()}, \code{\link{dna_openDatabase}()}, diff --git a/rDNA/rDNA/man/print.dna_barplot.Rd b/rDNA/rDNA/man/print.dna_barplot.Rd deleted file mode 100644 index a5d33c72..00000000 --- a/rDNA/rDNA/man/print.dna_barplot.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rDNA.R -\name{print.dna_barplot} -\alias{print.dna_barplot} -\title{Print a \code{dna_barplot} object} -\usage{ -\method{print}{dna_barplot}(x, trim = 30, attr = TRUE, ...) -} -\arguments{ -\item{x}{A \code{dna_barplot} object, as returned by the -\code{\link{dna_barplot}} function.} - -\item{trim}{Number of maximum characters to display in entity labels. -Entities with more characters are truncated, and the last character is -replaced by an asterisk (\code{*}).} - -\item{attr}{Display attributes, such as the name of the variable and the -levels of the qualifier variable if available.} - -\item{...}{Additional arguments. Currently not in use.} -} -\description{ -Show details of a \code{dna_barplot} object. -} -\details{ -Print the data frame returned by the \code{\link{dna_barplot}} function. -} -\seealso{ -Other {rDNA barplots}: -\code{\link{autoplot.dna_barplot}()}, -\code{\link{dna_barplot}()} -} -\author{ -Philip Leifeld -} -\concept{{rDNA barplots}} diff --git a/rDNA/rDNA/tests/testthat/test-backbone.R b/rDNA/rDNA/tests/testthat/test-backbone.R index f5835017..3bb485ca 100644 --- a/rDNA/rDNA/tests/testthat/test-backbone.R +++ b/rDNA/rDNA/tests/testthat/test-backbone.R @@ -227,4 +227,17 @@ test_that("Autoplot method works for nested backbones", { expect_equal(class(p), c("ggraph", "gg", "ggplot")) dna_closeDatabase() unlink(samp) +}) + + +test_that("Evaluate backbone solution works", { + samp <- dna_sample() + dna_openDatabase(samp, coderId = 1, coderPassword = "sample") + b <- dna_evaluateBackboneSolution( + c("There should be legislation to regulate emissions.", + "Emissions legislation should regulate CO2.") + ) + expect_length(b, 2) + dna_closeDatabase() + unlink(samp) }) \ No newline at end of file