Skip to content

Commit

Permalink
Improve beacon processing to get unique link labels (RPB-156)
Browse files Browse the repository at this point in the history
Try in that order: name, message, description, institution, domain
  • Loading branch information
fsteeg committed Apr 24, 2024
1 parent b74b45f commit 92e0896
Show file tree
Hide file tree
Showing 24 changed files with 40 additions and 27 deletions.
4 changes: 2 additions & 2 deletions conf/output/test-output-rppd-lobid-1.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}, {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/11855476X",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "https://www.lagis-hessen.de/pnd/11855476X",
Expand All @@ -21,7 +21,7 @@
}, {
"id" : "https://www.deutsche-biographie.de/pnd11855476X.html#ndbcontent",
"collection" : {
"name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek"
"name" : "Biographien der NDB"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-10.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/122507487",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-11.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1051147387",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-12.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}, {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/123205670",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-14.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "https://www.deutsche-biographie.de/pnd118649558.html#adbcontent",
"collection" : {
"name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek"
"name" : "Biographien der ADB"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
4 changes: 2 additions & 2 deletions conf/output/test-output-rppd-lobid-17.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "https://www.deutsche-biographie.de/pnd119280957.html#adbcontent",
"collection" : {
"name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek"
"name" : "Biographien der ADB"
}
}, {
"id" : "https://persondata.toolforge.org/redirect/gnd/commons/119280957",
Expand All @@ -16,7 +16,7 @@
}, {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/119280957",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "http://www.tripota.uni-trier.de/beacon.php?ID=119280957",
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-18.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/120260948",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
4 changes: 2 additions & 2 deletions conf/output/test-output-rppd-lobid-20.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/11698211X",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "http://opac.regesta-imperii.de/lang_de/suche.php?tags=11698211X",
"collection" : {
"name" : "Akademieprojekt Regesta Imperii (Quellen zur Reichsgeschichte) - Akademie der Wissenschaften und der Literatur Mainz"
"name" : "REGESTA IMPERII RI OPAC GND"
}
}, {
"id" : "http://www.tripota.uni-trier.de/beacon.php?ID=11698211X",
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-21.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/105121548X",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-22.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215498",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-23.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/105121551X",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-24.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/126790086",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-26.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/137243324",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-27.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}, {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215536",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "http://swb.bsz-bw.de/DB=2.114/CMD?ACT=SRCHA&IKT=2011&TRM=gnd:1051215536&REC=2",
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-32.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/117006084",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "http://www.tripota.uni-trier.de/beacon.php?ID=117006084",
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-33.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}, {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/117021652",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-36.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215609",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-37.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1028922108",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-39.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/117269476",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-41.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/1120816653",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-48.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/121384462",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-5.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/120526433",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
} ],
"type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ],
Expand Down
2 changes: 1 addition & 1 deletion conf/output/test-output-rppd-lobid-7.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"sameAs" : [ {
"id" : "http://persondata.toolforge.org/redirect/gnd/de/116324899",
"collection" : {
"name" : "Deutschsprachige Wikipedia"
"name" : "Wikipedia-Personenartikel"
}
}, {
"id" : "http://www.leo-bw.de/web/guest/detail/-/Detail/details/PERSON/wlbblb_personen/116324899/person",
Expand Down
15 changes: 14 additions & 1 deletion conf/rppd-beacon-to-tsv.flux
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,30 @@
default ENCODING = "UTF-8";
IN
| open-http(encoding=ENCODING)
| read-beacon(metadataFilter="name|institution")
| read-beacon(metadataFilter=".*")
| fix("
# temporary workaround until https://www.historische-kommission-muenchen-editionen.de/beacond/bsb_personen.php?beacon is fixed:
replace_all('seeAlso.url', 'https://personenlexika.digitale-sammlungen.dehttps://personenlexika.digitale-sammlungen.de', 'https://personenlexika.digitale-sammlungen.de')

vacuum() # remove empty fields

# for the name label we try, in that order: name, message, description, institution, domain:
unless exists('seeAlso.name')
copy_field('seeAlso.message', 'seeAlso.name')
end
unless exists('seeAlso.name')
copy_field('seeAlso.description', 'seeAlso.name')
end
unless exists('seeAlso.name')
copy_field('seeAlso.institution', 'seeAlso.name')
end
unless exists('seeAlso.name')
copy_field(seeAlso.url, seeAlso.name)
replace_all(seeAlso.name, 'https?://(?:www\\\\.)?([^/]+).*', '$1')
end

replace_all(seeAlso.name, ' +', ' ')

retain('seeAlso.url', 'seeAlso.name')
")
| encode-csv(includeRecordId="true", includeHeader="true", noQuotes="true", separator="\t")
Expand Down

0 comments on commit 92e0896

Please sign in to comment.