Skip to content

Commit

Permalink
fix: 🐛 more safety checks before insert (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
HannesOberreiter committed Mar 16, 2024
1 parent 4edc468 commit 5e3f31f
Showing 1 changed file with 26 additions and 6 deletions.
32 changes: 26 additions & 6 deletions scripts/import/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,31 @@ func main() {
moveToObservations()
updateLastFetchStatus()
clearImport()
clearObservations()

conn.Close()
}

// Clear import table after and before import
func clearImport() {
slog.Info("Clearing import table")
_, err := conn.ExecContext(context.Background(), "DELETE FROM import")
if err != nil {
slog.Error("Failed to clear import table", err)
log.Fatal(err)
}
}

// Clear observations which have no taxon in taxa table
func clearObservations() {
slog.Info("Clearing observations table")
_, err := conn.ExecContext(context.Background(), "DELETE FROM observations WHERE TaxonID NOT IN (SELECT TaxonID FROM taxa)")
if err != nil {
slog.Error("Failed to clear observations table", err)
log.Fatal(err)
}
}

// Import gbif "simple" export zip into import table
func importZIP(filePath string) {
slog.Info("Importing zip file", "filePath", filePath)
Expand Down Expand Up @@ -131,15 +143,23 @@ func importZIP(filePath string) {
continue
}

if data.TaxonID == "" || data.TaxonID == " " || data.TaxonID == "\\N" {
continue
}

if data.ObservationID == "" || data.ObservationID == " " || data.ObservationID == "\\N" {
continue
}

cleanDate := gbif.CleanDate(data.ObservationDateOriginal)

insertString := fmt.Sprintf("('%s', '%s', '%s', '%s', '%s')", safeQuotes(data.ObservationID), safeQuotes(data.TaxonID), safeQuotes(data.CountryCode), safeQuotes(data.ObservationDateOriginal), safeQuotes(cleanDate))

tempArray = append(tempArray, insertString)
count++

if len(tempArray)%20000 == 0 {
slog.Info("Inserting batch records", "count", len(tempArray))
if len(tempArray)%100_000 == 0 {
slog.Info("Inserting batch records", "count", len(tempArray), "total", count)
insert(&tempArray, "import")
}
}
Expand All @@ -149,7 +169,7 @@ func importZIP(filePath string) {
}

if len(tempArray) > 0 {
slog.Info("Inserting last batch records", "count", len(tempArray))
slog.Info("Inserting last batch records", "count", len(tempArray), "total", count)
insert(&tempArray, "import")
}
}
Expand Down Expand Up @@ -214,14 +234,14 @@ func moveToObservations() {
tempArray = append(tempArray, insertString)
count++

if len(tempArray)%20000 == 0 {
slog.Info("Move batch records", "count", len(tempArray))
if len(tempArray)%100000 == 0 {
slog.Info("Move batch records", "count", len(tempArray), "total", count)
insert(&tempArray, "observations")
}
}

if len(tempArray) > 0 {
slog.Info("Moving last batch records", "count", len(tempArray))
slog.Info("Moving last batch records", "count", len(tempArray), "total", count)
insert(&tempArray, "observations")
}

Expand Down

0 comments on commit 5e3f31f

Please sign in to comment.