From 3beb4a74f650945584d3112f129a0dabbe465d4e Mon Sep 17 00:00:00 2001 From: Erhun Giray TUNCAY <48091473+giraygi@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:32:18 +0200 Subject: [PATCH 1/3] splitted output files based on ontology for #60 --- .../json2solr/src/main/java/JSON2Solr.java | 78 ++++++++++--------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/dataload/json2solr/src/main/java/JSON2Solr.java b/dataload/json2solr/src/main/java/JSON2Solr.java index cad0e0e57..9940e6a7c 100644 --- a/dataload/json2solr/src/main/java/JSON2Solr.java +++ b/dataload/json2solr/src/main/java/JSON2Solr.java @@ -39,24 +39,15 @@ public static void main(String[] args) throws IOException { String inputFilePath = cmd.getOptionValue("input"); String outPath = cmd.getOptionValue("outDir"); - PrintStream ontologiesWriter = null; - PrintStream classesWriter = null; - PrintStream propertiesWriter = null; - PrintStream individualsWriter = null; - PrintStream autocompleteWriter = null; - - - String ontologiesOutName = outPath + "/ontologies.jsonl"; - String classesOutName = outPath + "/classes.jsonl"; - String propertiesOutName = outPath + "/properties.jsonl"; - String individualsOutName = outPath + "/individuals.jsonl"; - String autocompleteOutName = outPath + "/autocomplete.jsonl"; + File file = new File(outPath); + try { + file.mkdirs(); + file.createNewFile(); + } catch (IOException ioe) { + ioe.printStackTrace(); + } - ontologiesWriter = new PrintStream(ontologiesOutName); - classesWriter = new PrintStream(classesOutName); - propertiesWriter = new PrintStream(propertiesOutName); - individualsWriter = new PrintStream(individualsOutName); - autocompleteWriter = new PrintStream(autocompleteOutName); + Map writers = new HashMap<>(); JsonReader reader = new JsonReader(new InputStreamReader(new FileInputStream(inputFilePath))); @@ -98,10 +89,9 @@ public static void main(String[] args) throws IOException { flattenedClass.put("id", entityId); flattenProperties(_class, flattenedClass); + writeEntity("classes",ontologyId,flattenedClass,outPath,writers); - classesWriter.println(gson.toJson(flattenedClass)); - - writeAutocompleteEntries(ontologyId, entityId, flattenedClass, autocompleteWriter); + writeAutocompleteEntries(ontologyId, entityId, flattenedClass, outPath, writers); } reader.endArray(); @@ -123,9 +113,9 @@ public static void main(String[] args) throws IOException { flattenProperties(property, flattenedProperty); - propertiesWriter.println(gson.toJson(flattenedProperty)); + writeEntity("properties",ontologyId,flattenedProperty,outPath,writers); - writeAutocompleteEntries(ontologyId, entityId, flattenedProperty, autocompleteWriter); + writeAutocompleteEntries(ontologyId, entityId, flattenedProperty,outPath,writers); } reader.endArray(); @@ -147,9 +137,9 @@ public static void main(String[] args) throws IOException { flattenProperties(individual, flattenedIndividual); - individualsWriter.println(gson.toJson(flattenedIndividual)); + writeEntity("individuals",ontologyId,flattenedIndividual,outPath,writers); - writeAutocompleteEntries(ontologyId, entityId, flattenedIndividual, autocompleteWriter); + writeAutocompleteEntries(ontologyId, entityId, flattenedIndividual,outPath,writers); } reader.endArray(); @@ -176,7 +166,7 @@ public static void main(String[] args) throws IOException { flattenProperties(ontology, flattenedOntology); - ontologiesWriter.println(gson.toJson(flattenedOntology)); + writeEntity("ontologies",ontologyId,flattenedOntology,outPath,writers); reader.endObject(); // ontology } @@ -194,6 +184,24 @@ public static void main(String[] args) throws IOException { reader.close(); } + static private void writeEntity(String type, String ontologyId, Map flattenedEntity, String outPath, Map writers) throws FileNotFoundException { + if(writers.containsKey(ontologyId+"_"+type)) + writers.get(ontologyId+"_"+type).println(gson.toJson(flattenedEntity)); + else { + writers.put(ontologyId+"_"+type,new PrintStream(outPath+"/"+ontologyId+"_"+type+".jsonl")); + writers.get(ontologyId+"_"+type).println(gson.toJson(flattenedEntity)); + } + } + + static private void writeAutocomplete(String ontologyId, Map flattenedEntity, String outPath, Map writers) throws FileNotFoundException { + if(writers.containsKey(ontologyId+"_autocomplete")) + writers.get(ontologyId+"_autocomplete").println(gson.toJson(flattenedEntity, Map.class)); + else { + writers.put(ontologyId+"_autocomplete",new PrintStream(outPath+"/"+ontologyId+"_autocomplete.jsonl")); + writers.get(ontologyId+"_autocomplete").println(gson.toJson(flattenedEntity, Map.class)); + } + } + static private void flattenProperties(Map properties, Map flattened) { for (String k : properties.keySet()) { @@ -233,24 +241,24 @@ static private void flattenProperties(Map properties, Map flattenedEntity, PrintStream autocompleteWriter) { + static void writeAutocompleteEntries(String ontologyId, String entityId, Map flattenedEntity, String outPath, Map writers) throws FileNotFoundException { Object labels = flattenedEntity.get("label"); if(labels instanceof List) { for(Object label : (List) labels) { - autocompleteWriter.println( gson.toJson(makeAutocompleteEntry(ontologyId, entityId, (String)label), Map.class) ); + writeAutocomplete(ontologyId,makeAutocompleteEntry(ontologyId, entityId, (String)label),outPath,writers); } } else if(labels instanceof String) { - autocompleteWriter.println( gson.toJson(makeAutocompleteEntry(ontologyId, entityId, (String)labels), Map.class) ); + writeAutocomplete(ontologyId,makeAutocompleteEntry(ontologyId, entityId, (String)labels),outPath,writers); } Object synonyms = flattenedEntity.get("synonym"); if(synonyms instanceof List) { for(Object label : (List) synonyms) { - autocompleteWriter.println( gson.toJson(makeAutocompleteEntry(ontologyId, entityId, (String)label), Map.class) ); + writeAutocomplete(ontologyId,makeAutocompleteEntry(ontologyId, entityId, (String)label),outPath,writers); } } else if(synonyms instanceof String) { - autocompleteWriter.println( gson.toJson(makeAutocompleteEntry(ontologyId, entityId, (String)synonyms), Map.class) ); + writeAutocomplete(ontologyId,makeAutocompleteEntry(ontologyId, entityId, (String)synonyms),outPath,writers); } } From 0198cdde0b7eda01632046f828a7e71237f1048d Mon Sep 17 00:00:00 2001 From: Erhun Giray TUNCAY <48091473+giraygi@users.noreply.github.com> Date: Tue, 8 Oct 2024 18:10:12 +0200 Subject: [PATCH 2/3] closed all PrintWriters by the end of execution for #60 --- dataload/json2solr/src/main/java/JSON2Solr.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dataload/json2solr/src/main/java/JSON2Solr.java b/dataload/json2solr/src/main/java/JSON2Solr.java index 9940e6a7c..723b2dc1a 100644 --- a/dataload/json2solr/src/main/java/JSON2Solr.java +++ b/dataload/json2solr/src/main/java/JSON2Solr.java @@ -182,6 +182,8 @@ public static void main(String[] args) throws IOException { reader.endObject(); reader.close(); + for (PrintStream printStream : writers.values()) + printStream.close(); } static private void writeEntity(String type, String ontologyId, Map flattenedEntity, String outPath, Map writers) throws FileNotFoundException { From 2f37563492637d7bf7b2ca45962c0b07ea05ae06 Mon Sep 17 00:00:00 2001 From: Erhun Giray TUNCAY <48091473+giraygi@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:45:58 +0200 Subject: [PATCH 3/3] updated load into solr script based on the json2solr module outputs for #60 --- dataload/load_into_solr.sh | 39 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/dataload/load_into_solr.sh b/dataload/load_into_solr.sh index aae049a28..86e7d65fb 100755 --- a/dataload/load_into_solr.sh +++ b/dataload/load_into_solr.sh @@ -8,33 +8,24 @@ fi $1/bin/solr start -force -Djetty.host=127.0.0.1 sleep 10 -wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $2/ontologies.jsonl \ - http://127.0.0.1:8983/solr/ols4_entities/update/json/docs?commit=true - -wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $2/classes.jsonl \ - http://127.0.0.1:8983/solr/ols4_entities/update/json/docs?commit=true - -wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $2/properties.jsonl \ - http://127.0.0.1:8983/solr/ols4_entities/update/json/docs?commit=true - -wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $2/individuals.jsonl \ - http://127.0.0.1:8983/solr/ols4_entities/update/json/docs?commit=true - -wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $2/autocomplete.jsonl \ - http://127.0.0.1:8983/solr/ols4_autocomplete/update/json/docs?commit=true - +FILES=$2/*_*.jsonl +for f in $FILES +do + echo "$f" + if [[ $f == *_ontologies.jsonl ]] || [[ $f == *_classes.jsonl ]] || [[ $f == *_properties.jsonl ]] || [[ $f == *_individuals.jsonl ]]; then + echo 'entity' + wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $f http://127.0.0.1:8983/solr/ols4_entities/update/json/docs?commit=true + elif [[ $f == *_autocomplete.jsonl ]]; then + echo 'autocomplete' + wget --method POST --no-proxy -O - --server-response --content-on-error=on --header="Content-Type: application/json" --body-file $f http://127.0.0.1:8983/solr/ols4_autocomplete/update/json/docs?commit=true + fi +done sleep 5 - +echo 'update entities' wget --no-proxy http://127.0.0.1:8983/solr/ols4_entities/update?commit=true - sleep 5 - +echo 'update autocomplete' wget --no-proxy http://127.0.0.1:8983/solr/ols4_autocomplete/update?commit=true - -sleep 5 +echo 'loading solr finished' $1/bin/solr stop - - - -