Skip to content

Commit

Permalink
'all' command should run only the selected tasks if schema is PICA #265
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed May 5, 2023
1 parent 2ebef70 commit 2e697a9
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 34 deletions.
31 changes: 12 additions & 19 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ do_version_link() {
fi
}

do_sqlite() {
run sqlite
do_validate_sqlite() {
run "validate sqlite"
php scripts/sqlite/normalize-issue-details.php ${OUTPUT_DIR} &> ${PREFIX}/sqlite.log

untrace
Expand Down Expand Up @@ -465,15 +465,6 @@ do_all_solr() {
do_index
}

do_all() {
if [ "${SCHEMA:-}" == "PICA" ]; then
do_all_analyses_for_pica
else
do_all_analyses
fi
do_all_solr
}

# ---- usage and execution of proccessing steps ----

help() {
Expand All @@ -499,12 +490,11 @@ commands:
record-patterns record patterns
prepare-solr prepare indexing
index indexing with Solr
sqlite import tables to SQLite
validate-sqlite import result of validation to SQLite
completeness-sqlite import groupped output of completeness to SQLite
export-schema-files export schema files
shacl4bib run SHACL-like validation
all-analyses run all analytical tasks (or those set via ANALYSES)
all-analyses-pica run all analytical tasks for PICA records (or those set via PICA_ANALYSES)
all-solr run all indexing tasks
all run all tasks (analyses and indexing)
config show configuration
Expand Down Expand Up @@ -546,9 +536,13 @@ else
fi

# which tasks to run on `all-analyses`
ALL_ANALYSES=validate,sqlite,completeness,completeness_sqlite,classifications,authorities,tt_completeness,shelf_ready_completeness,serial_score,functional_analysis,pareto,marc_history
if [ "${SCHEMA:-}" == "PICA" ]; then
ALL_ANALYSES=validate,validate_sqlite,completeness,completeness_sqlite,classifications,authorities,marc_history
else
ALL_ANALYSES=validate,validate_sqlite,completeness,completeness_sqlite,classifications,authorities,tt_completeness,shelf_ready_completeness,serial_score,functional_analysis,pareto,marc_history
fi
ANALYSES=${ANALYSES:-$ALL_ANALYSES}
PICA_ANALYSES=validate,sqlite,completeness,completeness_sqlite,classifications,authorities,marc_history
PICA_ANALYSES=validate,validate_sqlite,completeness,completeness_sqlite,classifications,authorities,marc_history
PICA=${PICA:-$PICA_ANALYSES}

# check directories for processing commands
Expand All @@ -571,7 +565,7 @@ if [[ ! "${1:-help}" =~ ^(help|config|export-schema-files)$ ]]; then
fi

case "${1:-help}" in
validate) do_validate ; do_sqlite ;;
validate) do_validate ; do_validate_sqlite ;;
prepare-solr) do_prepare_solr ;;
index) do_index ;;
completeness) do_completeness ; do_completeness_sqlite ;;
Expand All @@ -588,13 +582,12 @@ case "${1:-help}" in
marc-history) do_marc_history ;;
record-patterns) do_record_patterns ;;
completeness-sqlite) do_completeness_sqlite ;;
sqlite) do_sqlite ;;
validate-sqlite) do_validate_sqlite ;;
mysql) do_mysql ;;
export-schema-files) do_export_schema_files ;;
all-analyses) do_all_analyses ;;
all-analyses-pica) do_all_analyses_for_pica ;;
all-solr) do_all_solr ;;
all) do_all ;;
all) do_all_analyses ; do_all_solr ;;
version-link) do_version_link ;;
config) config ;;
help) help ;;
Expand Down
24 changes: 14 additions & 10 deletions scripts/sqlite/calculate-aggregated-numbers.groupped.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#

log() {
echo "$(date +'%F %T')> $1"
}

OUTPUT_DIR=$1

echo "OUTPUT_DIR: ${OUTPUT_DIR}"
log "OUTPUT_DIR: ${OUTPUT_DIR}"

if [[ -f $(pwd)/solr-functions ]]; then
. ./solr-functions
Expand All @@ -18,10 +22,10 @@ fi

SOLR_CORE=validation

echo "create Solr core"
log "create Solr core"

CORE_EXISTS=$(check_core $SOLR_CORE)
echo "$SOLR_CORE exists: $CORE_EXISTS"
log "$SOLR_CORE exists: $CORE_EXISTS"
if [[ $CORE_EXISTS != 1 ]]; then
echo "Create Solr core '$SOLR_CORE'"
create_core $SOLR_CORE
Expand All @@ -30,7 +34,7 @@ else
purge_core $SOLR_CORE
fi

echo "populate Solr core"
log "populate Solr core"

php scripts/sqlite/validation-result-indexer.php ${OUTPUT_DIR} $SOLR_CORE

Expand All @@ -41,11 +45,11 @@ optimize_core $SOLR_CORE
# ${OUTPUT_DIR}/issue-groupped-categories.csv
# ${OUTPUT_DIR}/issue-groupped-paths.csv

echo "calculate numbers"
log "calculate numbers"

Rscript scripts/sqlite/qa_catalogue.groupping.R ${OUTPUT_DIR} $SOLR_CORE

echo "create database structure"
log "create database structure"

sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
CREATE TABLE issue_groupped_types(
Expand All @@ -71,7 +75,7 @@ CREATE TABLE issue_groupped_paths(
);
EOF

echo "import issue_groupped_types"
log "import issue_groupped_types"
tail -n +2 ${OUTPUT_DIR}/issue-groupped-types.csv > ${OUTPUT_DIR}/issue-groupped-types-noheader.csv
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
Expand All @@ -80,7 +84,7 @@ sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
EOF
rm ${OUTPUT_DIR}/issue-groupped-types-noheader.csv

echo "import issue_groupped_categories"
log "import issue_groupped_categories"
tail -n +2 ${OUTPUT_DIR}/issue-groupped-categories.csv > ${OUTPUT_DIR}/issue-groupped-categories-noheader.csv
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
Expand All @@ -89,7 +93,7 @@ EOF

rm ${OUTPUT_DIR}/issue-groupped-categories-noheader.csv

echo "import issue_groupped_paths"
log "import issue_groupped_paths"
tail -n +2 ${OUTPUT_DIR}/issue-groupped-paths.csv > ${OUTPUT_DIR}/issue-groupped-paths-noheader.csv
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
.mode csv
Expand All @@ -98,7 +102,7 @@ EOF

rm ${OUTPUT_DIR}/issue-groupped-paths-noheader.csv

echo "index sqlite tables"
log "index sqlite tables"
sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite << EOF
CREATE INDEX IF NOT EXISTS "types_groupId" ON issue_groupped_types ("groupId");
CREATE INDEX IF NOT EXISTS "types_typeId" ON issue_groupped_types ("typeId");
Expand Down
5 changes: 0 additions & 5 deletions scripts/sqlite/qa_catalogue.groupping.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,10 @@ summary <- summary %>%
mutate(
groupId = ifelse(groupId == 'all', 0, groupId),
groupId = as.integer(groupId))
head(summary)
gc()

groupIds <- summary %>% select(groupId) %>% distinct() %>%
unlist(use.names = FALSE)
groupIds

typesDF <- NULL
pathsDF <- NULL
Expand Down Expand Up @@ -116,9 +114,6 @@ for (i in 1:len) {
}

}
typesDF
pathsDF
categoriesDF

file <- sprintf('%s/%s', OUTPUT_DIR, 'issue-groupped-types.csv')
print(file)
Expand Down

0 comments on commit 2e697a9

Please sign in to comment.