From 5e3f31f201a5c55662f14b992074225c2d8e893e Mon Sep 17 00:00:00 2001 From: HannesOberreiter Date: Sat, 16 Mar 2024 16:14:18 +0100 Subject: [PATCH] fix: :bug: more safety checks before insert (#18) --- scripts/import/import.go | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/scripts/import/import.go b/scripts/import/import.go index cd14b90..719be28 100644 --- a/scripts/import/import.go +++ b/scripts/import/import.go @@ -50,12 +50,14 @@ func main() { moveToObservations() updateLastFetchStatus() clearImport() + clearObservations() conn.Close() } // Clear import table after and before import func clearImport() { + slog.Info("Clearing import table") _, err := conn.ExecContext(context.Background(), "DELETE FROM import") if err != nil { slog.Error("Failed to clear import table", err) @@ -63,6 +65,16 @@ func clearImport() { } } +// Clear observations which have no taxon in taxa table +func clearObservations() { + slog.Info("Clearing observations table") + _, err := conn.ExecContext(context.Background(), "DELETE FROM observations WHERE TaxonID NOT IN (SELECT TaxonID FROM taxa)") + if err != nil { + slog.Error("Failed to clear observations table", err) + log.Fatal(err) + } +} + // Import gbif "simple" export zip into import table func importZIP(filePath string) { slog.Info("Importing zip file", "filePath", filePath) @@ -131,6 +143,14 @@ func importZIP(filePath string) { continue } + if data.TaxonID == "" || data.TaxonID == " " || data.TaxonID == "\\N" { + continue + } + + if data.ObservationID == "" || data.ObservationID == " " || data.ObservationID == "\\N" { + continue + } + cleanDate := gbif.CleanDate(data.ObservationDateOriginal) insertString := fmt.Sprintf("('%s', '%s', '%s', '%s', '%s')", safeQuotes(data.ObservationID), safeQuotes(data.TaxonID), safeQuotes(data.CountryCode), safeQuotes(data.ObservationDateOriginal), safeQuotes(cleanDate)) @@ -138,8 +158,8 @@ func importZIP(filePath string) { tempArray = append(tempArray, insertString) count++ - if len(tempArray)%20000 == 0 { - slog.Info("Inserting batch records", "count", len(tempArray)) + if len(tempArray)%100_000 == 0 { + slog.Info("Inserting batch records", "count", len(tempArray), "total", count) insert(&tempArray, "import") } } @@ -149,7 +169,7 @@ func importZIP(filePath string) { } if len(tempArray) > 0 { - slog.Info("Inserting last batch records", "count", len(tempArray)) + slog.Info("Inserting last batch records", "count", len(tempArray), "total", count) insert(&tempArray, "import") } } @@ -214,14 +234,14 @@ func moveToObservations() { tempArray = append(tempArray, insertString) count++ - if len(tempArray)%20000 == 0 { - slog.Info("Move batch records", "count", len(tempArray)) + if len(tempArray)%100000 == 0 { + slog.Info("Move batch records", "count", len(tempArray), "total", count) insert(&tempArray, "observations") } } if len(tempArray) > 0 { - slog.Info("Moving last batch records", "count", len(tempArray)) + slog.Info("Moving last batch records", "count", len(tempArray), "total", count) insert(&tempArray, "observations") }