From d1be353d17c9e7026b6530833715c04e0b88cc62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Sun, 24 Dec 2023 19:27:00 +0100 Subject: [PATCH 1/8] GeoNetwork 4.x harvester --- .../harvest/harvester/geonet40/Aligner.java | 904 ++++++++++++++++++ .../geonet40/GeoNetworkApiClient.java | 213 +++++ .../harvester/geonet40/Geonet40Harvester.java | 94 ++ .../harvester/geonet40/GeonetParams.java | 213 +++++ .../harvest/harvester/geonet40/Group.java | 140 +++ .../harvest/harvester/geonet40/Harvester.java | 284 ++++++ .../harvest/harvester/geonet40/Search.java | 156 +++ .../resources/config-spring-geonetwork.xml | 5 + .../js/admin/HarvestSettingsController.js | 38 + .../resources/catalog/locales/en-admin.json | 4 +- .../admin/harvest/type/geonetwork40.html | 251 +++++ .../admin/harvest/type/geonetwork40.js | 110 +++ .../xsl/xml/harvesting/geonetwork40.xsl | 77 ++ 13 files changed, 2488 insertions(+), 1 deletion(-) create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.html create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.js create mode 100644 web/src/main/webapp/xsl/xml/harvesting/geonetwork40.xsl diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java new file mode 100644 index 00000000000..87b15015f10 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java @@ -0,0 +1,904 @@ +//============================================================================= +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import jeeves.server.ServiceConfig; +import jeeves.server.context.ServiceContext; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.MetadataResourceDatabaseMigration; +import org.fao.geonet.api.records.attachments.Store; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.*; +import org.fao.geonet.domain.userfeedback.RatingsSetting; +import org.fao.geonet.exceptions.NoSchemaMatchesException; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.*; +import org.fao.geonet.kernel.mef.*; +import org.fao.geonet.kernel.search.IndexingMode; +import org.fao.geonet.kernel.setting.SettingManager; +import org.fao.geonet.kernel.setting.Settings; +import org.fao.geonet.repository.GroupRepository; +import org.fao.geonet.repository.MetadataRepository; +import org.fao.geonet.schema.iso19139.ISO19139SchemaPlugin; +import org.fao.geonet.utils.Log; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.jdom.JDOMException; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; + + +public class Aligner extends BaseAligner { + + private Logger log; + + private ServiceContext context; + private DataManager dataMan; + private IMetadataManager metadataManager; + private HarvestResult result; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + private String processName; + private String preferredSchema; + private Map processParams = new HashMap<>(); + private MetadataRepository metadataRepository; + private Map> hmRemoteGroups = new HashMap<>(); + private SettingManager settingManager; + + private GeoNetworkApiClient geoNetworkApiClient; + + public Aligner(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, + GeonetParams params, List groups) { + super(cancelMonitor); + this.log = log; + this.context = context; + this.params = params; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataRepository = gc.getBean(MetadataRepository.class); + settingManager = gc.getBean(SettingManager.class); + geoNetworkApiClient = gc.getBean(GeoNetworkApiClient.class); + + result = new HarvestResult(); + + //--- save remote categories and groups into hashmaps for a fast access + if (groups != null) { + setupLocalGroup(groups, hmRemoteGroups); + } + } + + //-------------------------------------------------------------------------- + + private void setupLocalGroup(List list, Map> hmEntity) { + + for (org.fao.geonet.domain.Group group : list) { + String name = group.getName(); + + Map hm = new HashMap<>(); + hmEntity.put(name, hm); + hm.putAll(group.getLabelTranslations()); + } + } + + public HarvestResult align(SortedSet records, List errors) throws Exception { + log.info("Start of alignment for : " + params.getName()); + + //----------------------------------------------------------------------- + //--- retrieve all local categories and groups + //--- retrieve harvested uuids for given harvesting node + + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + Pair> filter = HarvesterUtil.parseXSLFilter(params.xslfilter); + processName = filter.one(); + processParams = filter.two(); + + //----------------------------------------------------------------------- + //--- remove old metadata + + for (String uuid : localUuids.getUUIDs()) { + if (cancelMonitor.get()) { + return this.result; + } + + try { + if (!exists(records, uuid)) { + String id = localUuids.getID(uuid); + + if (log.isDebugEnabled()) log.debug(" - Removing old metadata with id:" + id); + metadataManager.deleteMetadata(context, id); + + result.locallyRemoved++; + } + } catch (Throwable t) { + log.error("Couldn't remove metadata with uuid " + uuid); + log.error(t); + result.unchangedMetadata++; + } + } + //--- insert/update new metadata + // Load preferred schema and set to iso19139 by default + preferredSchema = context.getBean(ServiceConfig.class).getMandatoryValue("preferredSchema"); + if (preferredSchema == null) { + preferredSchema = "iso19139"; + } + + for (RecordInfo ri : records) { + if (cancelMonitor.get()) { + return this.result; + } + + try { + + result.totalMetadata++; + + // Mef full format provides ISO19139 records in both the profile + // and ISO19139 so we could be able to import them as far as + // ISO19139 schema is installed by default. + if (!dataMan.existsSchema(ri.schema) && !ri.schema.startsWith("iso19139.")) { + if (log.isDebugEnabled()) + log.debug(" - Metadata skipped due to unknown schema. uuid:" + ri.uuid + + ", schema:" + ri.schema); + result.unknownSchema++; + } else { + String id = dataMan.getMetadataId(ri.uuid); + + // look up value of localrating/enable + SettingManager settingManager = context.getBean(SettingManager.class); + String localRating = settingManager.getValue(Settings.SYSTEM_LOCALRATING_ENABLE); + + if (id == null) { + //record doesn't exist (so it doesn't belong to this harvester) + log.debug("Adding record with uuid " + ri.uuid); + addMetadata(ri, localRating.equals(RatingsSetting.BASIC), ri.uuid); + } else if (localUuids.getID(ri.uuid) == null) { + //record doesn't belong to this harvester but exists + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(ri, + id, + localRating.equals(RatingsSetting.BASIC), + params.useChangeDateForUpdate(), + localUuids.getChangeDate(ri.uuid), true); + log.info("Overriding record with uuid " + ri.uuid); + result.updatedMetadata++; + + if (params.isIfRecordExistAppendPrivileges()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + result.privilegesAppendedOnExistingRecord++; + } + break; + case RANDOM: + log.info("Generating random uuid for remote record with uuid " + ri.uuid); + addMetadata(ri, localRating.equals(RatingsSetting.BASIC), UUID.randomUUID().toString()); + break; + case SKIP: + log.debug("Skipping record with uuid " + ri.uuid); + result.uuidSkipped++; + default: + break; + } + } else { + //record exists and belongs to this harvester + log.debug("Updating record with uuid " + ri.uuid); + updateMetadata(ri, id, + localRating.equals(RatingsSetting.BASIC), + params.useChangeDateForUpdate(), + localUuids.getChangeDate(ri.uuid), false); + + if (params.isIfRecordExistAppendPrivileges()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + result.privilegesAppendedOnExistingRecord++; + } + } + + } + } catch (Throwable t) { + log.error("Couldn't insert or update metadata with uuid " + ri.uuid); + log.error(t); + result.unchangedMetadata++; + } + } + + dataMan.forceIndexChanges(); + + log.info("End of alignment for : " + params.getName()); + + return result; + } + + private Element extractValidMetadataForImport(DirectoryStream files, Element info) throws IOException, JDOMException { + Element metadataValidForImport; + final String finalPreferredSchema = preferredSchema; + + String infoSchema = "_none_"; + if (info != null && info.getContentSize() != 0) { + Element general = info.getChild("general"); + if (general != null && general.getContentSize() != 0) { + if (general.getChildText("schema") != null) { + infoSchema = general.getChildText("schema"); + } + } + } + + Path lastUnknownMetadataFolderName = null; + + if (Log.isDebugEnabled(Geonet.MEF)) + Log.debug(Geonet.MEF, "Multiple metadata files"); + + Map> mdFiles = + new HashMap<>(); + for (Path file : files) { + if (Files.isRegularFile(file)) { + Element metadata = Xml.loadFile(file); + try { + Path parent = file.getParent(); + Path parent2 = parent.getParent(); + String metadataSchema = dataMan.autodetectSchema(metadata, null); + // If local node doesn't know metadata + // schema try to load next xml file. + if (metadataSchema == null) { + continue; + } + + String currFile = "Found metadata file " + parent2.relativize(file); + mdFiles.put(metadataSchema, Pair.read(currFile, metadata)); + + } catch (NoSchemaMatchesException e) { + // Important folder name to identify metadata should be ../../ + Path parent = file.getParent(); + if (parent != null) { + Path parent2 = parent.getParent(); + if (parent2 != null) { + lastUnknownMetadataFolderName = parent2.relativize(parent); + } + } + log.debug("No schema match for " + lastUnknownMetadataFolderName + file.getFileName() + "."); + } catch (NullPointerException e) { + log.error("Check the schema directory"); + log.error(e); + } + } + } + + if (mdFiles.size() == 0) { + log.debug("No valid metadata file found" + + ((lastUnknownMetadataFolderName == null) ? + "" : + (" in " + lastUnknownMetadataFolderName) + ) + "."); + return null; + } + + // 1st: Select metadata with schema in info file + Pair mdInform = mdFiles.get(infoSchema); + if (mdInform != null) { + log.debug(mdInform.one() + + " with info.xml schema (" + infoSchema + ")."); + metadataValidForImport = mdInform.two(); + return metadataValidForImport; + } + // 2nd: Select metadata with preferredSchema + mdInform = mdFiles.get(finalPreferredSchema); + if (mdInform != null) { + log.debug(mdInform.one() + + " with preferred schema (" + finalPreferredSchema + ")."); + metadataValidForImport = mdInform.two(); + return metadataValidForImport; + } + + // Lastly: Select the first metadata in the map + String metadataSchema = (String) mdFiles.keySet().toArray()[0]; + mdInform = mdFiles.get(metadataSchema); + log.debug(mdInform.one() + + " with known schema (" + metadataSchema + ")."); + metadataValidForImport = mdInform.two(); + + return metadataValidForImport; + } + + private void addMetadata(final RecordInfo ri, final boolean localRating, String uuid) throws Exception { + final String[] id = {null}; + final Element[] md = {null}; + + //--- import metadata from MEF file + + Path mefFile = null; + + try { + mefFile = geoNetworkApiClient.retrieveMEF( params.host + "/" + params.getNode(), ri.uuid); + String fileType = "mef"; + MEFLib.Version version = MEFLib.getMEFVersion(mefFile); + if (version != null && version.equals(MEFLib.Version.V2)) { + fileType = "mef2"; + } + + IVisitor visitor = fileType.equals("mef2") ? new MEF2Visitor() : new MEFVisitor(); + + MEFLib.visit(mefFile, visitor, new IMEFVisitor() { + public void handleMetadata(Element mdata, int index) throws Exception { + md[index] = mdata; + } + + //-------------------------------------------------------------------- + + public void handleMetadataFiles(DirectoryStream files, Element info, int index) throws Exception { + // Import valid metadata + Element metadataValidForImport = extractValidMetadataForImport(files, info); + + if (metadataValidForImport != null) { + handleMetadata(metadataValidForImport, index); + } + } + + //-------------------------------------------------------------------- + + public void handleInfo(Element info, int index) throws Exception { + + final Element metadata = md[index]; + String schema = dataMan.autodetectSchema(metadata, null); + if (info != null && info.getContentSize() != 0) { + Element general = info.getChild("general"); + if (general != null && general.getContentSize() != 0) { + Element schemaInfo = general.getChild("schema"); + if (schemaInfo != null) { + schemaInfo.setText(schema); + } + } + } + if (info != null) { + id[index] = addMetadata(ri, md[index], info, localRating, uuid); + } + } + + //-------------------------------------------------------------------- + + public void handlePublicFile(String file, String changeDate, InputStream is, int index) throws Exception { + handleFile(file, changeDate, is, index, MetadataResourceVisibility.PUBLIC); + } + + private void handleFile(String file, String changeDate, InputStream is, int index, MetadataResourceVisibility visibility) throws Exception { + if (id[index] == null) return; + if (log.isDebugEnabled()) + log.debug(" - Adding remote " + visibility + " file with name: " + file); + final Store store = context.getBean("resourceStore", Store.class); + final IMetadataUtils metadataUtils = context.getBean(IMetadataUtils.class); + final String metadataUuid = metadataUtils.getMetadataUuid(id[index]); + store.putResource(context, metadataUuid, file, is, new ISODate(changeDate).toDate(), visibility, true); + } + + public void handleFeatureCat(Element md, int index) + throws Exception { + // Feature Catalog not managed for harvesting + } + + public void handlePrivateFile(String file, String changeDate, + InputStream is, int index) throws Exception { + if (params.mefFormatFull) { + handleFile(file, changeDate, is, index, MetadataResourceVisibility.PRIVATE); + } + } + }); + } catch (Exception e) { + //--- we ignore the exception here. Maybe the metadata has been removed just now + if (log.isDebugEnabled()) + log.debug(" - Skipped unretrievable metadata (maybe has been removed) with uuid:" + ri.uuid); + result.unretrievable++; + log.error(e); + } finally { + if (mefFile != null) { + FileUtils.deleteQuietly(mefFile.toFile()); + } + } + } + + private String addMetadata(RecordInfo ri, Element md, Element info, boolean localRating, String uuid) throws Exception { + Element general = info.getChild("general"); + + String createDate = general.getChildText("createDate"); + String changeDate = general.getChildText("changeDate"); + String isTemplate = general.getChildText("isTemplate"); + String siteId = general.getChildText("siteId"); + String popularity = general.getChildText("popularity"); + String schema = general.getChildText("schema"); + + if ("true".equals(isTemplate)) isTemplate = "y"; + else isTemplate = "n"; + + if (log.isDebugEnabled()) log.debug(" - Adding metadata with remote uuid:" + ri.uuid); + + try { + Integer groupIdVal = null; + if (StringUtils.isNotEmpty(params.getOwnerIdGroup())) { + groupIdVal = getGroupOwner(); + } + + params.getValidate().validate(dataMan, context, md, groupIdVal); + } catch (Exception e) { + log.info("Ignoring invalid metadata uuid: " + uuid); + result.doesNotValidate++; + return null; + } + + if (params.mefFormatFull && ri.schema.startsWith(ISO19139SchemaPlugin.IDENTIFIER)) { + // In GeoNetwork 3.x, links to resources changed: + // * thumbnails contains full URL instead of file name only + // * API mode change old URL structure. + MetadataResourceDatabaseMigration.updateMetadataResourcesLink(md, null, settingManager); + } + + if (!params.xslfilter.equals("")) { + md = HarvesterUtil.processMetadata(dataMan.getSchema(schema), + md, processName, processParams); + } + + // insert metadata + // If MEF format is full, private file links needs to be updated + boolean ufo = params.mefFormatFull; + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(md.getQualifiedName()). + setType(MetadataType.lookup(isTemplate)). + setCreateDate(new ISODate(createDate)). + setChangeDate(new ISODate(changeDate)); + metadata.getSourceInfo(). + setSourceId(siteId). + setOwner(getOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + try { + metadata.getSourceInfo().setGroupOwner(Integer.valueOf(params.getOwnerIdGroup())); + } catch (NumberFormatException e) { + } + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, md, IndexingMode.none, ufo, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + if (!localRating) { + String rating = general.getChildText("rating"); + if (rating != null) { + metadata.getDataInfo().setRating(Integer.valueOf(rating)); + } + } + + if (popularity != null) { + metadata.getDataInfo().setPopularity(Integer.valueOf(popularity)); + } + + + if (params.createRemoteCategory) { + Element categs = info.getChild("categories"); + if (categs != null) { + Importer.addCategoriesToMetadata(metadata, categs, context); + } + } + if (((ArrayList) params.getGroupCopyPolicy()).isEmpty()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + } else { + addPrivilegesFromGroupPolicy(id, info.getChild("privileges")); + } + context.getBean(IMetadataManager.class).save(metadata); + + dataMan.indexMetadata(id, Math.random() < 0.01); + result.addedMetadata++; + + return id; + } + + private void addPrivilegesFromGroupPolicy(String id, Element privil) throws Exception { + Map> groupOper = buildPrivileges(privil); + + for (Group remoteGroup : params.getGroupCopyPolicy()) { + //--- get operations allowed to remote group + Set oper = groupOper.get(remoteGroup.name); + + //--- if we don't find any match, maybe the remote group has been removed + + if (oper == null) + log.info(" - Remote group has been removed or no privileges exist : " + remoteGroup.name); + else { + String localGrpId = localGroups.getID(remoteGroup.name); + + if (localGrpId == null) { + //--- group does not exist locally + + if (remoteGroup.policy == Group.CopyPolicy.CREATE_AND_COPY) { + if (log.isDebugEnabled()) + log.debug(" - Creating local group : " + remoteGroup.name); + localGrpId = createGroup(remoteGroup.name); + + if (localGrpId == null) + log.info(" - Specified group was not found remotely : " + remoteGroup.name); + else { + if (log.isDebugEnabled()) + log.debug(" - Setting privileges for group : " + remoteGroup.name); + addOperations(id, localGrpId, oper); + } + } + } else { + //--- group exists locally + + if (remoteGroup.policy == Group.CopyPolicy.COPY_TO_INTRANET) { + if (log.isDebugEnabled()) + log.debug(" - Setting privileges for 'intranet' group"); + addOperations(id, "0", oper); + } else { + if (log.isDebugEnabled()) + log.debug(" - Setting privileges for group : " + remoteGroup.name); + addOperations(id, localGrpId, oper); + } + } + } + } + } + + private Map> buildPrivileges(Element privil) { + Map> map = new HashMap<>(); + + for (Object o : privil.getChildren("group")) { + Element group = (Element) o; + String name = group.getAttributeValue("name"); + + Set set = new HashSet<>(); + map.put(name, set); + + for (Object op : group.getChildren("operation")) { + Element oper = (Element) op; + name = oper.getAttributeValue("name"); + set.add(name); + } + } + + return map; + } + + private void addOperations(String id, String groupId, Set oper) throws Exception { + for (String opName : oper) { + int opId = dataMan.getAccessManager().getPrivilegeId(opName); + + //--- allow only: view, download, dynamic, featured + if (opId == 0 || opId == 1 || opId == 5 || opId == 6) { + if (log.isDebugEnabled()) log.debug(" --> " + opName); + dataMan.setOperation(context, id, groupId, opId + ""); + } else { + if (log.isDebugEnabled()) log.debug(" --> " + opName + " (skipped)"); + } + } + } + + private String createGroup(String name) throws Exception { + Map hm = hmRemoteGroups.get(name); + + if (hm == null) + return null; + + org.fao.geonet.domain.Group group = new org.fao.geonet.domain.Group() + .setName(name); + group.getLabelTranslations().putAll(hm); + + group = context.getBean(GroupRepository.class).save(group); + + int id = group.getId(); + localGroups.add(name, id + ""); + + return id + ""; + } + + /** + * Updates the record on the database. The force parameter allows you to force an update even + * if the date is not more updated, to make sure transformation and attributes assigned by the + * harvester are applied. Also, it changes the ownership of the record so it is assigned to the + * new harvester that last updated it. + * + * @param ri + * @param id + * @param localRating + * @param useChangeDate + * @param localChangeDate + * @param force + * @throws Exception + */ + private void updateMetadata(final RecordInfo ri, final String id, final boolean localRating, + final boolean useChangeDate, String localChangeDate, Boolean force) throws Exception { + final Element[] md = {null}; + final Element[] publicFiles = {null}; + final Element[] privateFiles = {null}; + + if (localUuids.getID(ri.uuid) == null && !force) { + if (log.isDebugEnabled()) + log.debug(" - Skipped metadata managed by another harvesting node. uuid:" + ri.uuid + ", name:" + params.getName()); + } else { + if (force || !useChangeDate || ri.isMoreRecentThan(localChangeDate)) { + Path mefFile = null; + + try { + mefFile = geoNetworkApiClient.retrieveMEF(params.host + "/" + params.getNode(), ri.uuid); + + String fileType = "mef"; + MEFLib.Version version = MEFLib.getMEFVersion(mefFile); + if (version != null && version.equals(MEFLib.Version.V2)) { + fileType = "mef2"; + } + + IVisitor visitor = fileType.equals("mef2") ? new MEF2Visitor() : new MEFVisitor(); + + MEFLib.visit(mefFile, visitor, new IMEFVisitor() { + public void handleMetadata(Element mdata, int index) throws Exception { + md[index] = mdata; + } + + //----------------------------------------------------------------- + + public void handleMetadataFiles(DirectoryStream files, Element info, int index) throws Exception { + // Import valid metadata + Element metadataValidForImport = extractValidMetadataForImport(files, info); + + if (metadataValidForImport != null) { + handleMetadata(metadataValidForImport, index); + } + } + + public void handleInfo(Element info, int index) throws Exception { + updateMetadata(ri, id, md[index], info, localRating, force); + publicFiles[index] = info.getChild("public"); + privateFiles[index] = info.getChild("private"); + } + + //----------------------------------------------------------------- + + public void handlePublicFile(String file, String changeDate, InputStream is, int index) throws Exception { + handleFile(id, file, MetadataResourceVisibility.PUBLIC, changeDate, is, publicFiles[index]); + } + + public void handleFeatureCat(Element md, int index) + throws Exception { + // Feature Catalog not managed for harvesting + } + + public void handlePrivateFile(String file, + String changeDate, InputStream is, int index) + throws Exception { + handleFile(id, file, MetadataResourceVisibility.PRIVATE, changeDate, is, privateFiles[index]); + } + + }); + } catch (Exception e) { + //--- we ignore the exception here. Maybe the metadata has been removed just now + result.unretrievable++; + } finally { + if (mefFile != null) { + FileUtils.deleteQuietly(mefFile.toFile()); + } + } + } else { + result.unchangedMetadata++; + } + } + } + + private void updateMetadata(RecordInfo ri, String id, Element md, + Element info, boolean localRating, boolean force) throws Exception { + String date = localUuids.getChangeDate(ri.uuid); + + + try { + Integer groupIdVal = null; + if (StringUtils.isNotEmpty(params.getOwnerIdGroup())) { + groupIdVal = getGroupOwner(); + } + + params.getValidate().validate(dataMan, context, md, groupIdVal); + } catch (Exception e) { + log.info("Ignoring invalid metadata uuid: " + ri.uuid); + result.doesNotValidate++; + return; + } + + final IMetadataManager metadataManager = context.getBean(IMetadataManager.class); + Metadata metadata; + if (!force && !ri.isMoreRecentThan(date)) { + if (log.isDebugEnabled()) + log.debug(" - XML not changed for local metadata with uuid:" + ri.uuid); + result.unchangedMetadata++; + metadata = metadataRepository.findOneById(Integer.valueOf(id)); + if (metadata == null) { + throw new NoSuchElementException("Unable to find a metadata with ID: " + id); + } + } else { + if (params.mefFormatFull && ri.schema.startsWith(ISO19139SchemaPlugin.IDENTIFIER)) { + // In GeoNetwork 3.x, links to resources changed: + // * thumbnails contains full URL instead of file name only + // * API mode change old URL structure. + MetadataResourceDatabaseMigration.updateMetadataResourcesLink(md, null, settingManager); + } + + if (!params.xslfilter.equals("")) { + md = HarvesterUtil.processMetadata(dataMan.getSchema(ri.schema), + md, processName, processParams); + } + // update metadata + if (log.isDebugEnabled()) + log.debug(" - Updating local metadata with id=" + id); + + boolean validate = false; + boolean ufo = params.mefFormatFull; + boolean updateDateStamp = true; + String language = context.getLanguage(); + metadataManager.updateMetadata(context, id, md, validate, ufo, language, ri.changeDate, + updateDateStamp, IndexingMode.none); + metadata = metadataRepository.findOneById(Integer.valueOf(id)); + result.updatedMetadata++; + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + metadataManager.save(metadata); + } + } + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + metadata = metadataRepository.findOneById(Integer.valueOf(id)); + + Element general = info.getChild("general"); + + String popularity = general.getChildText("popularity"); + + if (!localRating) { + String rating = general.getChildText("rating"); + if (rating != null) { + metadata.getDataInfo().setRating(Integer.valueOf(rating)); + } + } + + if (popularity != null) { + metadata.getDataInfo().setPopularity(Integer.valueOf(popularity)); + } + + if (params.createRemoteCategory) { + Element categs = info.getChild("categories"); + if (categs != null) { + Importer.addCategoriesToMetadata(metadata, categs, context); + } + } + + if (((ArrayList) params.getGroupCopyPolicy()).isEmpty()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + } else if (info != null){ + addPrivilegesFromGroupPolicy(id, info.getChild("privileges")); + } + + metadataManager.save(metadata); + + dataMan.indexMetadata(id, Math.random() < 0.01); + } + + private void handleFile(String id, String file, MetadataResourceVisibility visibility, String changeDate, + InputStream is, Element files) throws Exception { + if (files == null) { + if (log.isDebugEnabled()) + log.debug(" - No file found in info.xml. Cannot update file:" + file); + } else { + final Store store = context.getBean("resourceStore", Store.class); + final IMetadataUtils metadataUtils = context.getBean(IMetadataUtils.class); + final String metadataUuid = metadataUtils.getMetadataUuid(id); + removeOldFile(store, metadataUuid, files, visibility); + saveFile(store, metadataUuid, file, visibility, changeDate, is); + } + } + + private void removeOldFile(Store store, String metadataUuid, Element infoFiles, MetadataResourceVisibility visibility) throws Exception { + final List resources = store.getResources(context, metadataUuid, visibility, null, true); + for (MetadataResource resource: resources) { + if (infoFiles != null && !existsFile(resource.getId(), infoFiles)) { + if (log.isDebugEnabled()) { + log.debug(" - Removing old " + metadataUuid + " file with name=" + resource.getFilename()); + } + store.delResource(context, metadataUuid, visibility, resource.getFilename(), true); + } + } + } + + private boolean existsFile(String fileName, Element files) { + @SuppressWarnings("unchecked") + List list = files.getChildren("file"); + + for (Element elem : list) { + String name = elem.getAttributeValue("name"); + + if (fileName.equals(name)) { + return true; + } + } + + return false; + } + + private void saveFile(final Store store, String metadataUuid, String file, + MetadataResourceVisibility visibility, String changeDate, InputStream is) throws Exception { + ISODate remIsoDate = new ISODate(changeDate); + boolean saveFile; + + final MetadataResource description = store.getResourceDescription(context, metadataUuid, visibility, file, true); + if (description == null) { + saveFile = true; + } else { + ISODate locIsoDate = new ISODate(description.getLastModification().getTime(), false); + saveFile = (remIsoDate.timeDifferenceInSeconds(locIsoDate) > 0); + } + + if (saveFile) { + if (log.isDebugEnabled()) { + log.debug(" - Adding remote " + metadataUuid + " file with name:" + file); + } + + store.putResource(context, metadataUuid, file, is, remIsoDate.toDate(), visibility, true); + } else { + if (log.isDebugEnabled()) { + log.debug(" - Nothing to do in dir " + metadataUuid + " for file with name:" + file); + } + } + } + + /** + * Return true if the uuid is present in the remote node + */ + + private boolean exists(SortedSet records, String uuid) { + // Records is a TreeSet sorted by uuid attribute. + // Method equals of RecordInfo only checks equality using `uuid` attribute. + // TreeSet.contains can be used more efficiently instead of doing a loop over all the recordInfo elements. + RecordInfo recordToTest = new RecordInfo(uuid, null); + return records.contains(recordToTest); + + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java new file mode 100644 index 00000000000..163763e5a0a --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java @@ -0,0 +1,213 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.CharStreams; +import org.apache.http.Header; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.message.BasicHeader; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms; +import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; +import org.elasticsearch.search.aggregations.metrics.ParsedTopHits; +import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.elasticsearch.xcontent.ContextParser; +import org.elasticsearch.xcontent.NamedXContentRegistry; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.fao.geonet.domain.Group; +import org.fao.geonet.domain.Source; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.kernel.setting.SettingManager; +import org.fao.geonet.lib.Lib; +import org.fao.geonet.utils.GeonetHttpRequestFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.client.ClientHttpResponse; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Component +public class GeoNetworkApiClient { + @Autowired + private GeonetHttpRequestFactory requestFactory; + + @Autowired + private SettingManager settingManager; + + /** + * Retrieves the list of sources from the GeoNetwork server and creates a Map using the source uuid as the key. + * + * @param serverUrl GeoNetwork server URL. + * @return Map of sources using the source uuid as the key. + * @throws URISyntaxException + * @throws IOException + */ + public Map retrieveSources(String serverUrl) throws URISyntaxException, IOException { + String sourcesJson = retrieveUrl(addUrlSlash(serverUrl) + "api/sources"); + + ObjectMapper objectMapper = new ObjectMapper(); + List sourceList + = objectMapper.readValue(sourcesJson, new TypeReference<>(){}); + + Map sourceMap = new HashMap<>(); + sourceList.forEach(s -> sourceMap.put(s.getUuid(), s)); + return sourceMap; + } + + + /** + * Retrieves the list of groups from the GeoNetwork server. + * + * @param serverUrl GeoNetwork server URL. + * @return List of groups. + * @throws URISyntaxException + * @throws IOException + */ + public List retrieveGroups(String serverUrl) throws URISyntaxException, IOException { + String groupsJson = retrieveUrl(addUrlSlash(serverUrl) + "api/groups"); + + ObjectMapper objectMapper = new ObjectMapper(); + return objectMapper.readValue(groupsJson, new TypeReference<>(){}); + } + + /** + * Queries the GeoNetwork server and returns the results. + * + * @param serverUrl GeoNetwork server URL. + * @param query ElasticSearch query. + * @return + * @throws URISyntaxException + * @throws IOException + */ + public SearchResponse query(String serverUrl, String query) throws URISyntaxException, IOException { + final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); + Lib.net.setupProxy(settingManager, clientBuilder, new URL(addUrlSlash(serverUrl)).getHost()); + + HttpPost httpMethod = new HttpPost(createUrl(addUrlSlash(serverUrl) + "api/search/records/_search")); + final Header headerContentType = new BasicHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); + final Header header = new BasicHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.getMimeType()); + httpMethod.addHeader(headerContentType); + httpMethod.addHeader(header); + final StringEntity entity = new StringEntity(query); + httpMethod.setEntity(entity); + + try (ClientHttpResponse httpResponse = requestFactory.execute(httpMethod)){ + String jsonResponse = CharStreams.toString(new InputStreamReader(httpResponse.getBody())); + + return getSearchResponseFromJson(jsonResponse); + } + } + + + /** + * Retrieves a metadata MEF file from the GeoNetwork server. + * + * @param serverUrl GeoNetwork server URL. + * @param uuid Metadata UUID to retrieve. + * @return + * @throws URISyntaxException + * @throws IOException + */ + public Path retrieveMEF(String serverUrl, String uuid) throws URISyntaxException, IOException { + Path tempFile = Files.createTempFile("temp-", ".dat"); + + String url = addUrlSlash(serverUrl) + + "/api/records/" + uuid + "/formatters/zip?withRelated=false"; + + HttpGet httpMethod = new HttpGet(createUrl(url)); + final Header header = new BasicHeader(HttpHeaders.ACCEPT, "application/x-gn-mef-2-zip"); + httpMethod.addHeader(header); + + final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); + Lib.net.setupProxy(settingManager, clientBuilder, new URL(addUrlSlash(serverUrl)).getHost()); + + try (ClientHttpResponse httpResponse = requestFactory.execute(httpMethod)){ + Files.copy(httpResponse.getBody(), tempFile, StandardCopyOption.REPLACE_EXISTING); + } + + return tempFile; + } + + + private URI createUrl(String jsonUrl) throws URISyntaxException { + return new URI(jsonUrl); + } + + private String retrieveUrl(String url) throws URISyntaxException, IOException { + if (!Lib.net.isUrlValid(url)) + throw new BadParameterEx("Invalid URL", url); + HttpGet httpMethod = new HttpGet(createUrl(url)); + final Header header = new BasicHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.toString()); + httpMethod.addHeader(header); + + final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); + Lib.net.setupProxy(settingManager, clientBuilder, new URL(url).getHost()); + + try ( ClientHttpResponse httpResponse = requestFactory.execute(httpMethod);){ + return CharStreams.toString(new InputStreamReader(httpResponse.getBody())); + } + } + + private String addUrlSlash(String url) { + return url + (!url.endsWith("/") ? "/" : ""); + } + + private List getDefaultNamedXContents() { + Map> map = new HashMap<>(); + map.put(TopHitsAggregationBuilder.NAME, (p, c) -> ParsedTopHits.fromXContent(p, (String) c)); + map.put(StringTerms.NAME, (p, c) -> ParsedStringTerms.fromXContent(p, (String) c)); + map.put(DateHistogramAggregationBuilder.NAME,(p, c) -> ParsedStringTerms.fromXContent(p, (String) c)); + return map.entrySet().stream() + .map(entry -> new NamedXContentRegistry.Entry(Aggregation.class, new ParseField(entry.getKey()), entry.getValue())) + .collect(Collectors.toList()); + } + + private SearchResponse getSearchResponseFromJson(String jsonResponse) throws IOException { + NamedXContentRegistry registry = new NamedXContentRegistry(getDefaultNamedXContents()); + XContentParser parser = JsonXContent.jsonXContent.createParser(registry, null, jsonResponse); + return SearchResponse.fromXContent(parser); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java new file mode 100644 index 00000000000..2aad71ced60 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java @@ -0,0 +1,94 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import org.fao.geonet.Logger; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.jdom.Element; + +import java.sql.SQLException; + +public class Geonet40Harvester extends AbstractHarvester { + public static final String TYPE = "geonetwork40"; + + @Override + protected GeonetParams createParams() { + return new GeonetParams(dataMan); + } + + + @Override + protected void storeNodeExtra(GeonetParams params, String path, + String siteId, String optionsId) throws SQLException { + setParams(params); + + harvesterSettingsManager.add("id:" + siteId, "host", params.host); + harvesterSettingsManager.add("id:" + siteId, "node", params.getNode()); + harvesterSettingsManager.add("id:" + siteId, "useChangeDateForUpdate", params.useChangeDateForUpdate()); + harvesterSettingsManager.add("id:" + siteId, "createRemoteCategory", params.createRemoteCategory); + harvesterSettingsManager.add("id:" + siteId, "mefFormatFull", params.mefFormatFull); + harvesterSettingsManager.add("id:" + siteId, "xslfilter", params.xslfilter); + + //--- store search nodes + + for (Search s : params.getSearches()) { + String searchID = harvesterSettingsManager.add(path, "search", ""); + + harvesterSettingsManager.add("id:" + searchID, "freeText", s.freeText); + harvesterSettingsManager.add("id:" + searchID, "title", s.title); + harvesterSettingsManager.add("id:" + searchID, "abstract", s.abstrac); + harvesterSettingsManager.add("id:" + searchID, "keywords", s.keywords); + harvesterSettingsManager.add("id:" + searchID, "sourceUuid", s.sourceUuid); + } + + //--- store group mapping + + for (Group g : params.getGroupCopyPolicy()) { + String groupID = harvesterSettingsManager.add(path, "groupCopyPolicy", ""); + + harvesterSettingsManager.add("id:" + groupID, "name", g.name); + harvesterSettingsManager.add("id:" + groupID, "policy", g.policy); + } + } + + @Override + public void addHarvestInfo(Element info, String id, String uuid) { + super.addHarvestInfo(info, id, uuid); + + String small = context.getBaseUrl() + "/" + params.getNode() + + "/en/resources.get?access=public&id=" + id + "&fname="; + + String large = context.getBaseUrl() + "/" + params.getNode() + + "/en/graphover.show?access=public&id=" + id + "&fname="; + + info.addContent(new Element("smallThumbnail").setText(small)); + info.addContent(new Element("largeThumbnail").setText(large)); + } + + public void doHarvest(Logger log) throws Exception { + Harvester h = new Harvester(cancelMonitor, log, context, params); + result = h.harvest(log); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java new file mode 100644 index 00000000000..5e1410d15c5 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java @@ -0,0 +1,213 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import org.apache.commons.lang.StringUtils; +import org.fao.geonet.Util; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.fao.geonet.utils.Log; +import org.jdom.Element; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + +public class GeonetParams extends AbstractParams { + public String host; + + public boolean createRemoteCategory; + + public boolean mefFormatFull; + + /** + * The filter is a process (see schema/process folder) which depends on the schema. It could be + * composed of parameter which will be sent to XSL transformation using the following syntax : + *
+     * anonymizer?protocol=MYLOCALNETWORK:FILEPATH&email=gis@organisation.org&thesaurus=MYORGONLYTHEASURUS
+     * 
+ */ + public String xslfilter; + + private String node; + + private Boolean useChangeDateForUpdate; + + private ArrayList alSearches = new ArrayList<>(); + + private ArrayList alCopyPolicy = new ArrayList<>(); + + public GeonetParams(DataManager dm) { + super(dm); + } + + @Override + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + Element policy = node.getChild("groupsCopyPolicy"); + Element searches = node.getChild("searches"); + + host = Util.getParam(site, "host", ""); + + this.setNode(Util.getParam(site, "node", "srv")); + this.setUseChangeDateForUpdate(Util.getParam(site, "useChangeDateForUpdate", false)); + + createRemoteCategory = Util.getParam(site, "createRemoteCategory", false); + mefFormatFull = Util.getParam(site, "mefFormatFull", false); + xslfilter = Util.getParam(site, "xslfilter", ""); + + addSearches(searches); + addCopyPolicy(policy); + } + + @Override + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + Element searches = node.getChild("searches"); + Element policy = node.getChild("groupsCopyPolicy"); + + host = Util.getParam(site, "host", host); + this.setNode(Util.getParam(site, "node", this.getNode())); + this.setUseChangeDateForUpdate(Util.getParam(site, "useChangeDateForUpdate", false)); + createRemoteCategory = Util.getParam(site, "createRemoteCategory", createRemoteCategory); + mefFormatFull = Util.getParam(site, "mefFormatFull", mefFormatFull); + xslfilter = Util.getParam(site, "xslfilter", ""); + + //--- if some search queries are given, we drop the previous ones and + //--- set these new ones + + if (searches != null) + addSearches(searches); + + if (policy != null) + addCopyPolicy(policy); + } + + public Iterable getSearches() { + return alSearches; + } + + public Iterable getGroupCopyPolicy() { + return alCopyPolicy; + } + + public String getServletPath() { + if (StringUtils.isNotEmpty(host)) { + try { + return new URL(host).getPath(); + } catch (MalformedURLException ex) { + Log.error(Geonet.HARVEST_MAN, ex.getMessage(), ex); + } + } + + return ""; + } + + public boolean isSearchEmpty() { + return alSearches.isEmpty(); + } + + public GeonetParams copy() { + GeonetParams copy = new GeonetParams(dm); + copyTo(copy); + + copy.host = host; + copy.node = node; + copy.useChangeDateForUpdate = useChangeDateForUpdate; + copy.createRemoteCategory = createRemoteCategory; + copy.mefFormatFull = mefFormatFull; + copy.xslfilter = xslfilter; + + for (Search s : alSearches) + copy.alSearches.add(s.copy()); + + for (Group g : alCopyPolicy) + copy.alCopyPolicy.add(g.copy()); + + return copy; + } + + private void addSearches(Element searches) throws BadInputEx { + alSearches.clear(); + + if (searches == null) + return; + + for (Object o : searches.getChildren("search")) { + Element search = (Element) o; + + alSearches.add(new Search(search)); + } + } + + private void addCopyPolicy(Element policy) throws BadInputEx { + alCopyPolicy.clear(); + + if (policy == null) + return; + + for (Object o : policy.getChildren("group")) { + Element group = (Element) o; + + alCopyPolicy.add(new Group(group)); + } + } + + public String getNode() { + if (this.node == null) { + //default node + this.setNode("srv"); + } + return node; + } + + public void setNode(String node) { + this.node = node; + } + + public boolean useChangeDateForUpdate() { + if (this.useChangeDateForUpdate == null) { + this.setUseChangeDateForUpdate(false); + } + return useChangeDateForUpdate; + } + + public void setUseChangeDateForUpdate(Boolean useChangeDateForUpdate) { + if (useChangeDateForUpdate == null) { + useChangeDateForUpdate = false; + } + this.useChangeDateForUpdate = useChangeDateForUpdate; + } + + @Override + public String getIcon() { + return null; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java new file mode 100644 index 00000000000..aa35b0294da --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java @@ -0,0 +1,140 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.exceptions.MissingParameterEx; +import org.jdom.Element; + +//============================================================================= + +class Group { + //--------------------------------------------------------------------------- + //--- + //--- Constructor + //--- + //--------------------------------------------------------------------------- + + public String name; + + //--------------------------------------------------------------------------- + public CopyPolicy policy; + + //--------------------------------------------------------------------------- + //--- + //--- API methods + //--- + //--------------------------------------------------------------------------- + + Group() { + } + + //--------------------------------------------------------------------------- + + public Group(Element group) throws BadInputEx { + name = group.getAttributeValue("name"); + + if (name == null) + throw new MissingParameterEx("attribute:name", group); + + String t = group.getAttributeValue("policy"); + + if (t == null) + throw new MissingParameterEx("attribute:policy", group); + + policy = CopyPolicy.parse(t); + + if (policy == null) + throw new BadParameterEx("attribute:policy", policy); + + //--- '1' is the 'All' group + + if (policy == CopyPolicy.COPY_TO_INTRANET && !isAllGroup()) + throw new BadParameterEx("attribute:policy", policy); + + if (policy == CopyPolicy.CREATE_AND_COPY && isAllGroup()) + throw new BadParameterEx("attribute:policy", policy); + } + + //--------------------------------------------------------------------------- + //--- + //--- Variables + //--- + //--------------------------------------------------------------------------- + + public Group copy() { + Group m = new Group(); + + m.name = name; + m.policy = policy; + + return m; + } + + public boolean isAllGroup() { + return name.equals("all"); + } + + //--------------------------------------------------------------------------- + //--- + //--- CopyType + //--- + //--------------------------------------------------------------------------- + + public enum CopyPolicy { + COPY("copy"), + CREATE_AND_COPY("createAndCopy"), + COPY_TO_INTRANET("copyToIntranet"); + + //------------------------------------------------------------------------ + + private String policy; + + //------------------------------------------------------------------------ + + private CopyPolicy(String policy) { + this.policy = policy; + } + + //------------------------------------------------------------------------ + + public static CopyPolicy parse(String policy) { + if (policy.equals(COPY.toString())) return COPY; + if (policy.equals(CREATE_AND_COPY.toString())) return CREATE_AND_COPY; + if (policy.equals(COPY_TO_INTRANET.toString())) return COPY_TO_INTRANET; + + return null; + } + + //------------------------------------------------------------------------ + + public String toString() { + return policy; + } + } +} + +//============================================================================= + diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java new file mode 100644 index 00000000000..0f15f5e6f83 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java @@ -0,0 +1,284 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import com.google.common.collect.Lists; +import jeeves.server.context.ServiceContext; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.search.SearchHit; +import org.fao.geonet.Logger; +import org.fao.geonet.domain.Source; +import org.fao.geonet.domain.SourceType; +import org.fao.geonet.exceptions.*; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.IHarvester; +import org.fao.geonet.kernel.harvest.harvester.RecordInfo; +import org.fao.geonet.kernel.setting.SettingManager; +import org.fao.geonet.lib.Lib; +import org.fao.geonet.repository.SourceRepository; +import org.fao.geonet.resources.Resources; +import org.fao.geonet.utils.GeonetHttpRequestFactory; +import org.fao.geonet.utils.Log; +import org.fao.geonet.utils.XmlRequest; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; + +class Harvester implements IHarvester { + public static final String LOGGER_NAME = "geonetwork.harvester.geonetwork40"; + + private final AtomicBoolean cancelMonitor; + private Logger log; + + private GeonetParams params; + private ServiceContext context; + + private GeoNetworkApiClient geoNetworkApiClient; + + /** + * Contains a list of accumulated errors during the executing of this harvest. + */ + private List errors = new LinkedList<>(); + + //--------------------------------------------------------------------------- + + public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, GeonetParams params) { + this.cancelMonitor = cancelMonitor; + this.log = log; + this.context = context; + this.params = params; + } + + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + + geoNetworkApiClient = context.getBean(GeoNetworkApiClient.class); + + //--- login + if (params.isUseAccount()) { + // TODO: Implement + } + + //--- retrieve info on categories and groups + + log.info("Retrieving information from : " + params.host); + + String serverUrl = getServerUrl(); + Map sources = geoNetworkApiClient.retrieveSources(serverUrl); + + List groupList = geoNetworkApiClient.retrieveGroups(serverUrl); + + //--- perform all searches + + // Use a TreeSet because in the align phase we need to check if a given UUID is already in the set.. + SortedSet records = new TreeSet<>(Comparator.comparing(RecordInfo::getUuid)); + + boolean error = false; + List searches = Lists.newArrayList(params.getSearches()); + if (params.isSearchEmpty()) { + searches.add(Search.createEmptySearch(1, 2)); + } + + int pageSize = 30; + for (Search s : searches) { + if (cancelMonitor.get()) { + return new HarvestResult(); + } + log.info(String.format("Processing search with these parameters %s", s.toString())); + int from = 0; + int to = from + (pageSize - 1); + s.setRange(from, to); + + long resultCount = Integer.MAX_VALUE; + log.info("Searching on : " + params.getName()); + + while (from < resultCount && !error) { + try { + SearchResponse searchResponse = doSearch(s); + resultCount = searchResponse.getHits().getTotalHits().value; + + records.addAll(processSearchResult(searchResponse.getHits().getHits())); + } catch (Exception t) { + error = true; + log.error("Unknown error trying to harvest"); + log.error(t.getMessage()); + log.error(t); + errors.add(new HarvestError(context, t)); + } catch (Throwable t) { + error = true; + log.fatal("Something unknown and terrible happened while harvesting"); + log.fatal(t.getMessage()); + log.error(t); + errors.add(new HarvestError(context, t)); + } + + from = from + pageSize; + to = to + pageSize; + s.setRange(from, to); + + } + } + + log.info("Total records processed from this search :" + records.size()); + + //--- align local node + HarvestResult result = new HarvestResult(); + if (!error) { + try { + Aligner aligner = new Aligner(cancelMonitor, log, context, params, groupList); + result = aligner.align(records, errors); + + updateSources(records, sources); + } catch (Exception t) { + log.error("Unknown error trying to harvest"); + log.error(t.getMessage()); + errors.add(new HarvestError(this.context, t)); + } catch (Throwable t) { + log.fatal("Something unknown and terrible happened while harvesting"); + log.fatal(t.getMessage()); + errors.add(new HarvestError(this.context, t)); + } + } else { + log.warning("Due to previous errors the align process has not been called"); + } + + return result; + } + + private Set processSearchResult(SearchHit[] searchHits) { + Set records = new HashSet<>(searchHits.length); + for (SearchHit md : searchHits) { + if (cancelMonitor.get()) { + return Collections.emptySet(); + } + + try { + String uuid = md.getSourceAsMap().get("uuid").toString(); + String schema = md.getSourceAsMap().get("documentStandard").toString(); + String changeDate = md.getSourceAsMap().get("dateStamp").toString(); + String source = md.getSourceAsMap().get("sourceCatalogue").toString(); + records.add(new RecordInfo(uuid, changeDate, schema, source)); + + } catch (Exception e) { + HarvestError harvestError = new HarvestError(context, e); + harvestError.setDescription("Malformed element '" + + md.toString() + "'"); + harvestError + .setHint("It seems that there was some malformed element. Check with your administrator."); + this.errors.add(harvestError); + } + + } + return records; + } + + //--------------------------------------------------------------------------- + + private SearchResponse doSearch(Search s) throws OperationAbortedEx { + try { + String queryBody = s.createElasticsearchQuery(); + return geoNetworkApiClient.query(getServerUrl(), queryBody); + } catch (Exception ex) { + Log.error(LOGGER_NAME, ex.getMessage(), ex); + HarvestError harvestError = new HarvestError(context, ex); + harvestError.setDescription("Error while searching on " + + params.getName() + ". "); + harvestError.setHint("Check with your administrator."); + this.errors.add(harvestError); + throw new OperationAbortedEx("Raised exception when searching", ex); + } + } + + private void updateSources(SortedSet records, + Map remoteSources) throws MalformedURLException { + log.info("Aligning source logos from for : " + params.getName()); + + //--- collect all different sources that have been harvested + + Set sources = new HashSet<>(); + + for (RecordInfo ri : records) { + sources.add(ri.source); + } + + //--- update local sources and retrieve logos (if the case) + + String siteId = context.getBean(SettingManager.class).getSiteId(); + final Resources resources = context.getBean(Resources.class); + + for (String sourceUuid : sources) { + if (!siteId.equals(sourceUuid)) { + Source source = remoteSources.get(sourceUuid); + + if (source != null) { + retrieveLogo(context, resources, params.host, sourceUuid); + } else { + String sourceName = "(unknown)"; + source = new Source(sourceUuid, sourceName, new HashMap<>(), SourceType.harvester); + resources.copyUnknownLogo(context, sourceUuid); + } + + context.getBean(SourceRepository.class).save(source); + } + } + } + + private void retrieveLogo(ServiceContext context, final Resources resources, String url, String uuid) throws MalformedURLException { + String logo = uuid + ".gif"; + String baseUrl = url; + if (!new URL(baseUrl).getPath().endsWith("/")) { + // Needed to make it work when harvesting from a GN deployed at ROOT ("/") + baseUrl += "/"; + } + XmlRequest req = context.getBean(GeonetHttpRequestFactory.class).createXmlRequest(new URL(baseUrl)); + Lib.net.setupProxy(context, req); + req.setAddress(req.getAddress() + "images/logos/" + logo); + + final Path logoDir = resources.locateLogosDir(context); + + try { + resources.createImageFromReq(context, logoDir, logo, req); + } catch (IOException e) { + context.warning("Cannot retrieve logo file from : " + url); + context.warning(" (C) Logo : " + logo); + context.warning(" (C) Excep : " + e.getMessage()); + + resources.copyUnknownLogo(context, uuid); + } + } + + + public List getErrors() { + return errors; + } + + private String getServerUrl() { + return params.host + (params.host.endsWith("/") ? "" : "/") + params.getNode(); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java new file mode 100644 index 00000000000..12a0e522d29 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java @@ -0,0 +1,156 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.builder.ToStringBuilder; +import org.fao.geonet.Util; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.utils.Log; +import org.jdom.Element; + +import java.util.Iterator; + +//============================================================================= + +class Search { + public int from; + public int to; + public String freeText; + public String title; + public String abstrac; + public String keywords; + public String sourceUuid; + + public Search() { + } + + public Search(Element search) throws BadParameterEx { + freeText = Util.getParam(search, "freeText", ""); + title = Util.getParam(search, "title", ""); + abstrac = Util.getParam(search, "abstract", ""); + keywords = Util.getParam(search, "keywords", ""); + + Element source = search.getChild("source"); + + sourceUuid = Util.getParam(source, "uuid", ""); + } + + public static Search createEmptySearch(int from, int to) throws BadParameterEx { + Search s = new Search(new Element("search")); + s.setRange(from, to); + return s; + } + + public Search copy() { + Search s = new Search(); + + s.freeText = freeText; + s.title = title; + s.abstrac = abstrac; + s.keywords = keywords; + s.sourceUuid = sourceUuid; + s.from = from; + s.to = to; + + return s; + } + + public String createElasticsearchQuery() { + String sourceFilter = ""; + if (StringUtils.isNotEmpty(sourceUuid)) { + sourceFilter = String.format(",{\"term\": {\"sourceCatalogue\": \"%s\"}}", sourceUuid); + } + + String freeTextFilter = ""; + if (StringUtils.isNotEmpty(freeText)) { + freeTextFilter = String.format(",{\"term\": {\"any.default\": \"%s\"}}", freeText); + } + + String titleFilter = ""; + if (StringUtils.isNotEmpty(title)) { + titleFilter = String.format(",{\"term\": {\"resourceTitleObject.default\": \"%s\"}}", title); + } + + String abstractFilter = ""; + if (StringUtils.isNotEmpty(abstrac)) { + abstractFilter = String.format(",{\"term\": {\"resourceAbstractObject.default\": \"%s\"}}", abstrac); + } + + String keywordFilter = ""; + if (StringUtils.isNotEmpty(keywords)) { + abstractFilter = String.format(",{\"term\": {\"tag.default\": \"%s\"}}", keywords); + } + + String queryBody = String.format("{\n" + + " \"from\": %d,\n" + + " \"size\": %d,\n" + + " \"sort\": [\"_score\"],\n" + + " \"query\": {\"bool\": {\"must\": [{\"terms\": {\"isTemplate\": [\"n\"]}}%s%s%s%s%s]}},\n" + + " \"_source\": {\"includes\": [\n" + + " \"uuid\",\n" + + " \"id\",\n" + + " \"isTemplate\",\n" + + " \"sourceCatalogue\",\n" + + " \"dateStamp\",\n" + + " \"documentStandard\"\n" + + " ]},\n" + + " \"track_total_hits\": true\n" + + "}", from, to, sourceFilter, freeTextFilter, titleFilter, abstractFilter, keywordFilter); + + + if (Log.isDebugEnabled(Geonet.HARVEST_MAN)) { + Log.debug(Geonet.HARVEST_MAN, "Search request is " + queryBody); + } + + return queryBody; + } + + private void add(Element req, String name, String value) { + if (value.length() != 0) + req.addContent(new Element(name).setText(value)); + } + + public void setRange(int from, int to) { + this.from = from; + this.to = to; + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("from", from) + .append("to", to) + .append("freeText", freeText) + .append("title", title) + .append("abstrac", abstrac) + .append("keywords", keywords) + .append("sourceUuid", sourceUuid) + .toString(); + } +} + + + diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index 5bda0379065..e6096633f83 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -42,6 +42,8 @@ + + @@ -73,4 +75,7 @@ + diff --git a/web-ui/src/main/resources/catalog/js/admin/HarvestSettingsController.js b/web-ui/src/main/resources/catalog/js/admin/HarvestSettingsController.js index dbcd3d5fc9e..00c2fb2a155 100644 --- a/web-ui/src/main/resources/catalog/js/admin/HarvestSettingsController.js +++ b/web-ui/src/main/resources/catalog/js/admin/HarvestSettingsController.js @@ -732,6 +732,44 @@ ); }; + $scope.geonetworkGetSources2 = function (url) { + $http + .get($scope.proxyUrl + encodeURIComponent(url + "/srv/api/sources/portal")) + .then( + function (response) { + $scope.geonetworkSources = []; + + angular.forEach(response.data, function (source) { + $scope.geonetworkSources.push({ + uuid: source.uuid, + name: source.name + }); + }); + + $http + .get( + $scope.proxyUrl + encodeURIComponent(url + "/srv/api/sources/harvester") + ) + .then( + function (response) { + angular.forEach(response.data, function (source) { + $scope.geonetworkSources.push({ + uuid: source.uuid, + name: source.name + }); + }); + }, + function (response) { + // TODO + } + ); + }, + function (response) { + // TODO + } + ); + }; + // OGCWxS var ogcwxsGet = function () { $scope.ogcwxsTemplates = []; diff --git a/web-ui/src/main/resources/catalog/locales/en-admin.json b/web-ui/src/main/resources/catalog/locales/en-admin.json index 1379e8db5b9..ca11ed330e1 100644 --- a/web-ui/src/main/resources/catalog/locales/en-admin.json +++ b/web-ui/src/main/resources/catalog/locales/en-admin.json @@ -322,7 +322,9 @@ "harvester-geonetwork": "GeoNetwork (from 2.1 to 3.x)", "harvester-geonetwork20": "GeoNetwork (2.0)", "harvester-geonetwork20Help": "Harvest GeoNetwork node in 2.0 or lower version. Please note that harvesting old GeoNetwork nodes is unsafe. The old nodes do not have unique site ids and this could cause unpredictable results (like removing nodes from other harvestings).", - "harvester-geonetworkHelp": "Harvest from another GeoNetwork catalog.", + "harvester-geonetworkHelp": "Harvest from another GeoNetwork 3.x catalog.", + "harvester-geonetwork40": "GeoNetwork (4.0)", + "harvester-geonetwork40Help": "Harvest from another GeoNetwork catalog (from 4.x)", "harvester-oaipmh": "OAI/PMH", "harvester-oaipmhHelp": "Harvest using OAI Protocol for metadata harvesting 2.0", "harvester-ogcwxs": "OGC Web Services", diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.html b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.html new file mode 100644 index 00000000000..5c81687a535 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.html @@ -0,0 +1,251 @@ +
+ + +
+
+
+
+ +
+ + harvesterConnectionConfiguration {{('harvester-' + + harvesterSelected['@type']) | translate}} + +
+
+ + +

geonetwork-hostHelp

+
+
+ +

geonetwork-nodeHelp

+
+
+ +
+ + harvesterFilter + + +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ + + + +
+
+
+
+ +
+ + harvesterResponseProcessing + {{harvesterSelected['@type'] | translate}} + + +
+ +
+ +
+ +

geonetwork-mefFormatFullHelp

+
+ +
+ +

geonetwork-useChangeDateForUpdateHelp

+
+ +
+ +

geonetwork-createRemoteCategoryHelp

+
+ +
+ +
+ + +

geonetwork-xslfilterHelp

+
+ +
+ +
+

harvesterValidateHelp

+
+
+ +
+ diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.js b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.js new file mode 100644 index 00000000000..adcffc11a61 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/geonetwork40.js @@ -0,0 +1,110 @@ +// This is not that much elegant and should be replaced by some kind +// of Angular module. +var gnHarvestergeonetwork40 = { + createNew : function() { + return { + "@id": "", + "@type": "geonetwork40", + "owner": [""], + "ownerGroup": [""], + "ownerUser": [""], + "site": { + "name": "", + "uuid": "", + "account": { + "use": false, + "username": "", + "password": "" + }, + "host": [], + "node": "srv", + "useChangeDateForUpdate": false, + "createRemoteCategory": false, + "mefFormatFull": false, + "xslfilter": [] + }, + "content": { + "validate": "NOVALIDATION", + "importxslt": "none" + }, + "options": { + "every": "0 0 0 ? * *", + "oneRunOnly": false, + "overrideUuid": "SKIP", + "status": "" + }, + "searches": [{ + "freeText": "", + "title": "", + "abstract": "", + "keywords": "", + "source": { + "uuid": [], + "name": [] + } + }], + "ifRecordExistAppendPrivileges": false, + "privileges": [{ + "@id": "1", + "operation": [ + {"@name": "view"}, + {"@name": "dynamic"} + ] + }], + "categories" : [{'@id': ''}], + "groupsCopyPolicy": [], + "info": { + "lastRun": [], + "running": false + } + }; + }, + buildResponse : function(h, $scope) { + var body = '' + + ' ' + h.ownerGroup[0] + '' + + ' ' + h.ownerUser[0] + '' + + ' ' + + ' ' + h.site.name + '' + + ' ' + h.site.host.replace(/&/g, '&') + '' + + ' ' + h.site.node + '' + + ' ' + h.site.useChangeDateForUpdate + '' + + ' ' + h.site.createRemoteCategory + '' + + ' ' + h.site.icon + '' + + ' ' + h.site.mefFormatFull + '' + + ' ' + + (h.site.xslfilter[0] ? h.site.xslfilter.replace(/&/g, '&') : '') + + '' + + ' ' + + ' ' + h.site.account.use + '' + + ' ' + h.site.account.username + '' + + ' ' + h.site.account.password + '' + + ' ' + + ' ' + + ' ' + + ' ' + + ' ' + ((h.searches[0] && h.searches[0].freeText) || '') + '' + + ' ' + ((h.searches[0] && h.searches[0].title) || '') + '' + + ' ' + ((h.searches[0] && h.searches[0]['abstract']) || '') + '' + + ' ' + ((h.searches[0] && h.searches[0].keywords) || '') + '' + + ' ' + + ' ' + ((h.searches[0] && h.searches[0].source.uuid) || '') + '' + + ' ' + + ' ' + + ' ' + + ' ' + + ' ' + + ' ' + h.options.oneRunOnly + '' + + ' ' + h.options.overrideUuid + '' + + ' ' + h.options.every + '' + + ' ' + h.options.status + '' + + ' ' + + ' ' + + ' ' + h.content.validate + '' + + ' ' + h.content.importxslt + '' + + ' ' + + $scope.buildResponseGroup(h) + + $scope.buildResponseCategory(h) + ''; + return body; + } +}; diff --git a/web/src/main/webapp/xsl/xml/harvesting/geonetwork40.xsl b/web/src/main/webapp/xsl/xml/harvesting/geonetwork40.xsl new file mode 100644 index 00000000000..919036aebdd --- /dev/null +++ b/web/src/main/webapp/xsl/xml/harvesting/geonetwork40.xsl @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="children/title/value"/> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From d6f0bde87544aa37fbc779414f2619ddb81a8886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Mon, 25 Dec 2023 15:52:22 +0100 Subject: [PATCH 2/8] GeoNetwork 4.x harvester / add support for authentication and code cleanup --- .../harvester/geonet/GeonetParams.java | 9 +- .../harvest/harvester/geonet40/Aligner.java | 67 +++++++---- .../geonet40/GeoNetworkApiClient.java | 109 ++++++++++++++---- .../harvester/geonet40/Geonet40Harvester.java | 2 +- .../harvester/geonet40/GeonetParams.java | 2 +- .../harvest/harvester/geonet40/Group.java | 2 +- .../harvest/harvester/geonet40/Harvester.java | 28 ++++- .../harvest/harvester/geonet40/Search.java | 8 +- 8 files changed, 162 insertions(+), 65 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet/GeonetParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet/GeonetParams.java index 4a8efadb75b..0c87749a24e 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet/GeonetParams.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet/GeonetParams.java @@ -56,14 +56,15 @@ public class GeonetParams extends AbstractParams { private Boolean useChangeDateForUpdate; - private ArrayList alSearches = new ArrayList(); + private ArrayList alSearches = new ArrayList<>(); - private ArrayList alCopyPolicy = new ArrayList(); + private ArrayList alCopyPolicy = new ArrayList<>(); public GeonetParams(DataManager dm) { super(dm); } + @Override public void create(Element node) throws BadInputEx { super.create(node); @@ -80,11 +81,11 @@ public void create(Element node) throws BadInputEx { mefFormatFull = Util.getParam(site, "mefFormatFull", false); xslfilter = Util.getParam(site, "xslfilter", ""); - //checkPort(port); addSearches(searches); addCopyPolicy(policy); } + @Override public void update(Element node) throws BadInputEx { super.update(node); @@ -99,8 +100,6 @@ public void update(Element node) throws BadInputEx { mefFormatFull = Util.getParam(site, "mefFormatFull", mefFormatFull); xslfilter = Util.getParam(site, "xslfilter", ""); - //checkPort(port); - //--- if some search queries are given, we drop the previous ones and //--- set these new ones diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java index 87b15015f10..3a4545d3cf4 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Aligner.java @@ -35,10 +35,10 @@ import org.fao.geonet.domain.*; import org.fao.geonet.domain.userfeedback.RatingsSetting; import org.fao.geonet.exceptions.NoSchemaMatchesException; +import org.fao.geonet.kernel.AccessManager; import org.fao.geonet.kernel.DataManager; import org.fao.geonet.kernel.UpdateDatestamp; -import org.fao.geonet.kernel.datamanager.IMetadataManager; -import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.datamanager.*; import org.fao.geonet.kernel.harvest.BaseAligner; import org.fao.geonet.kernel.harvest.harvester.*; import org.fao.geonet.kernel.mef.*; @@ -69,6 +69,10 @@ public class Aligner extends BaseAligner { private ServiceContext context; private DataManager dataMan; private IMetadataManager metadataManager; + private IMetadataIndexer metadataIndexer; + private IMetadataOperations metadataOperations; + private IMetadataUtils metadataUtils; + private IMetadataSchemaUtils metadataSchemaUtils; private HarvestResult result; private CategoryMapper localCateg; private GroupMapper localGroups; @@ -79,7 +83,7 @@ public class Aligner extends BaseAligner { private MetadataRepository metadataRepository; private Map> hmRemoteGroups = new HashMap<>(); private SettingManager settingManager; - + private AccessManager accessManager; private GeoNetworkApiClient geoNetworkApiClient; public Aligner(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, @@ -90,10 +94,14 @@ public Aligner(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, this.params = params; GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); - dataMan = gc.getBean(DataManager.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); metadataManager = gc.getBean(IMetadataManager.class); + metadataOperations = gc.getBean(IMetadataOperations.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataSchemaUtils = gc.getBean(IMetadataSchemaUtils.class); metadataRepository = gc.getBean(MetadataRepository.class); settingManager = gc.getBean(SettingManager.class); + accessManager = gc.getBean(AccessManager.class); geoNetworkApiClient = gc.getBean(GeoNetworkApiClient.class); result = new HarvestResult(); @@ -174,16 +182,15 @@ public HarvestResult align(SortedSet records, List err // Mef full format provides ISO19139 records in both the profile // and ISO19139 so we could be able to import them as far as // ISO19139 schema is installed by default. - if (!dataMan.existsSchema(ri.schema) && !ri.schema.startsWith("iso19139.")) { + if (!metadataSchemaUtils.existsSchema(ri.schema) && !ri.schema.startsWith("iso19139.")) { if (log.isDebugEnabled()) log.debug(" - Metadata skipped due to unknown schema. uuid:" + ri.uuid + ", schema:" + ri.schema); result.unknownSchema++; } else { - String id = dataMan.getMetadataId(ri.uuid); + String id = metadataUtils.getMetadataId(ri.uuid); // look up value of localrating/enable - SettingManager settingManager = context.getBean(SettingManager.class); String localRating = settingManager.getValue(Settings.SYSTEM_LOCALRATING_ENABLE); if (id == null) { @@ -241,7 +248,7 @@ public HarvestResult align(SortedSet records, List err } } - dataMan.forceIndexChanges(); + metadataIndexer.forceIndexChanges(); log.info("End of alignment for : " + params.getName()); @@ -275,7 +282,7 @@ private Element extractValidMetadataForImport(DirectoryStream files, Eleme try { Path parent = file.getParent(); Path parent2 = parent.getParent(); - String metadataSchema = dataMan.autodetectSchema(metadata, null); + String metadataSchema = metadataSchemaUtils.autodetectSchema(metadata, null); // If local node doesn't know metadata // schema try to load next xml file. if (metadataSchema == null) { @@ -343,11 +350,21 @@ private void addMetadata(final RecordInfo ri, final boolean localRating, String final Element[] md = {null}; //--- import metadata from MEF file + String username; + String password; + + if (params.isUseAccount()) { + username = params.getUsername(); + password = params.getPassword(); + } else { + username = ""; + password = ""; + } Path mefFile = null; try { - mefFile = geoNetworkApiClient.retrieveMEF( params.host + "/" + params.getNode(), ri.uuid); + mefFile = geoNetworkApiClient.retrieveMEF( params.host + "/" + params.getNode(), ri.uuid, username, password); String fileType = "mef"; MEFLib.Version version = MEFLib.getMEFVersion(mefFile); if (version != null && version.equals(MEFLib.Version.V2)) { @@ -377,7 +394,7 @@ public void handleMetadataFiles(DirectoryStream files, Element info, int i public void handleInfo(Element info, int index) throws Exception { final Element metadata = md[index]; - String schema = dataMan.autodetectSchema(metadata, null); + String schema = metadataSchemaUtils.autodetectSchema(metadata, null); if (info != null && info.getContentSize() != 0) { Element general = info.getChild("general"); if (general != null && general.getContentSize() != 0) { @@ -403,7 +420,6 @@ private void handleFile(String file, String changeDate, InputStream is, int inde if (log.isDebugEnabled()) log.debug(" - Adding remote " + visibility + " file with name: " + file); final Store store = context.getBean("resourceStore", Store.class); - final IMetadataUtils metadataUtils = context.getBean(IMetadataUtils.class); final String metadataUuid = metadataUtils.getMetadataUuid(id[index]); store.putResource(context, metadataUuid, file, is, new ISODate(changeDate).toDate(), visibility, true); } @@ -469,7 +485,7 @@ private String addMetadata(RecordInfo ri, Element md, Element info, boolean loca } if (!params.xslfilter.equals("")) { - md = HarvesterUtil.processMetadata(dataMan.getSchema(schema), + md = HarvesterUtil.processMetadata(metadataSchemaUtils.getSchema(schema), md, processName, processParams); } @@ -527,7 +543,7 @@ private String addMetadata(RecordInfo ri, Element md, Element info, boolean loca } context.getBean(IMetadataManager.class).save(metadata); - dataMan.indexMetadata(id, Math.random() < 0.01); + metadataIndexer.indexMetadata(id, true, IndexingMode.full); result.addedMetadata++; return id; @@ -602,12 +618,12 @@ private Map> buildPrivileges(Element privil) { private void addOperations(String id, String groupId, Set oper) throws Exception { for (String opName : oper) { - int opId = dataMan.getAccessManager().getPrivilegeId(opName); + int opId = accessManager.getPrivilegeId(opName); //--- allow only: view, download, dynamic, featured if (opId == 0 || opId == 1 || opId == 5 || opId == 6) { if (log.isDebugEnabled()) log.debug(" --> " + opName); - dataMan.setOperation(context, id, groupId, opId + ""); + metadataOperations.setOperation(context, id, groupId, opId + ""); } else { if (log.isDebugEnabled()) log.debug(" --> " + opName + " (skipped)"); } @@ -660,7 +676,18 @@ private void updateMetadata(final RecordInfo ri, final String id, final boolean Path mefFile = null; try { - mefFile = geoNetworkApiClient.retrieveMEF(params.host + "/" + params.getNode(), ri.uuid); + String username; + String password; + + if (params.isUseAccount()) { + username = params.getUsername(); + password = params.getPassword(); + } else { + username = ""; + password = ""; + } + + mefFile = geoNetworkApiClient.retrieveMEF(params.host + "/" + params.getNode(), ri.uuid, username, password); String fileType = "mef"; MEFLib.Version version = MEFLib.getMEFVersion(mefFile); @@ -742,7 +769,6 @@ private void updateMetadata(RecordInfo ri, String id, Element md, return; } - final IMetadataManager metadataManager = context.getBean(IMetadataManager.class); Metadata metadata; if (!force && !ri.isMoreRecentThan(date)) { if (log.isDebugEnabled()) @@ -761,7 +787,7 @@ private void updateMetadata(RecordInfo ri, String id, Element md, } if (!params.xslfilter.equals("")) { - md = HarvesterUtil.processMetadata(dataMan.getSchema(ri.schema), + md = HarvesterUtil.processMetadata(metadataSchemaUtils.getSchema(ri.schema), md, processName, processParams); } // update metadata @@ -819,7 +845,7 @@ private void updateMetadata(RecordInfo ri, String id, Element md, metadataManager.save(metadata); - dataMan.indexMetadata(id, Math.random() < 0.01); + metadataIndexer.indexMetadata(id, true, IndexingMode.full); } private void handleFile(String id, String file, MetadataResourceVisibility visibility, String changeDate, @@ -829,7 +855,6 @@ private void handleFile(String id, String file, MetadataResourceVisibility visib log.debug(" - No file found in info.xml. Cannot update file:" + file); } else { final Store store = context.getBean("resourceStore", Store.class); - final IMetadataUtils metadataUtils = context.getBean(IMetadataUtils.class); final String metadataUuid = metadataUtils.getMetadataUuid(id); removeOldFile(store, metadataUuid, files, visibility); saveFile(store, metadataUuid, file, visibility, changeDate, is); diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java index 163763e5a0a..9da62f98b17 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== @@ -25,13 +25,25 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; import com.google.common.io.CharStreams; +import org.apache.commons.lang.StringUtils; import org.apache.http.Header; import org.apache.http.HttpHeaders; +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.AuthCache; +import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicAuthCache; +import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.message.BasicHeader; import org.elasticsearch.action.search.SearchResponse; @@ -46,6 +58,7 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.json.JsonXContent; +import org.fao.geonet.ApplicationContextHolder; import org.fao.geonet.domain.Group; import org.fao.geonet.domain.Source; import org.fao.geonet.exceptions.BadParameterEx; @@ -56,11 +69,11 @@ import org.springframework.http.client.ClientHttpResponse; import org.springframework.stereotype.Component; +import javax.annotation.Nullable; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; @@ -85,8 +98,8 @@ public class GeoNetworkApiClient { * @throws URISyntaxException * @throws IOException */ - public Map retrieveSources(String serverUrl) throws URISyntaxException, IOException { - String sourcesJson = retrieveUrl(addUrlSlash(serverUrl) + "api/sources"); + public Map retrieveSources(String serverUrl, String user, String password) throws URISyntaxException, IOException { + String sourcesJson = retrieveUrl(addUrlSlash(serverUrl) + "api/sources", user, password); ObjectMapper objectMapper = new ObjectMapper(); List sourceList @@ -106,8 +119,8 @@ public Map retrieveSources(String serverUrl) throws URISyntaxExc * @throws URISyntaxException * @throws IOException */ - public List retrieveGroups(String serverUrl) throws URISyntaxException, IOException { - String groupsJson = retrieveUrl(addUrlSlash(serverUrl) + "api/groups"); + public List retrieveGroups(String serverUrl, String user, String password) throws URISyntaxException, IOException { + String groupsJson = retrieveUrl(addUrlSlash(serverUrl) + "api/groups", user, password); ObjectMapper objectMapper = new ObjectMapper(); return objectMapper.readValue(groupsJson, new TypeReference<>(){}); @@ -122,10 +135,7 @@ public List retrieveGroups(String serverUrl) throws URISyntaxException, I * @throws URISyntaxException * @throws IOException */ - public SearchResponse query(String serverUrl, String query) throws URISyntaxException, IOException { - final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); - Lib.net.setupProxy(settingManager, clientBuilder, new URL(addUrlSlash(serverUrl)).getHost()); - + public SearchResponse query(String serverUrl, String query, String user, String password) throws URISyntaxException, IOException { HttpPost httpMethod = new HttpPost(createUrl(addUrlSlash(serverUrl) + "api/search/records/_search")); final Header headerContentType = new BasicHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); final Header header = new BasicHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.getMimeType()); @@ -134,7 +144,7 @@ public SearchResponse query(String serverUrl, String query) throws URISyntaxExce final StringEntity entity = new StringEntity(query); httpMethod.setEntity(entity); - try (ClientHttpResponse httpResponse = requestFactory.execute(httpMethod)){ + try (ClientHttpResponse httpResponse = doExecute(httpMethod, user, password)) { String jsonResponse = CharStreams.toString(new InputStreamReader(httpResponse.getBody())); return getSearchResponseFromJson(jsonResponse); @@ -151,7 +161,11 @@ public SearchResponse query(String serverUrl, String query) throws URISyntaxExce * @throws URISyntaxException * @throws IOException */ - public Path retrieveMEF(String serverUrl, String uuid) throws URISyntaxException, IOException { + public Path retrieveMEF(String serverUrl, String uuid, String user, String password) throws URISyntaxException, IOException { + if (!Lib.net.isUrlValid(serverUrl)) { + throw new BadParameterEx("Invalid URL", serverUrl); + } + Path tempFile = Files.createTempFile("temp-", ".dat"); String url = addUrlSlash(serverUrl) + @@ -161,10 +175,7 @@ public Path retrieveMEF(String serverUrl, String uuid) throws URISyntaxException final Header header = new BasicHeader(HttpHeaders.ACCEPT, "application/x-gn-mef-2-zip"); httpMethod.addHeader(header); - final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); - Lib.net.setupProxy(settingManager, clientBuilder, new URL(addUrlSlash(serverUrl)).getHost()); - - try (ClientHttpResponse httpResponse = requestFactory.execute(httpMethod)){ + try (ClientHttpResponse httpResponse = doExecute(httpMethod, user, password)){ Files.copy(httpResponse.getBody(), tempFile, StandardCopyOption.REPLACE_EXISTING); } @@ -176,17 +187,16 @@ private URI createUrl(String jsonUrl) throws URISyntaxException { return new URI(jsonUrl); } - private String retrieveUrl(String url) throws URISyntaxException, IOException { - if (!Lib.net.isUrlValid(url)) - throw new BadParameterEx("Invalid URL", url); - HttpGet httpMethod = new HttpGet(createUrl(url)); + private String retrieveUrl(String serverUrl, String user, String password) throws URISyntaxException, IOException { + if (!Lib.net.isUrlValid(serverUrl)) { + throw new BadParameterEx("Invalid URL", serverUrl); + } + + HttpGet httpMethod = new HttpGet(createUrl(serverUrl)); final Header header = new BasicHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.toString()); httpMethod.addHeader(header); - final HttpClientBuilder clientBuilder = requestFactory.getDefaultHttpClientBuilder(); - Lib.net.setupProxy(settingManager, clientBuilder, new URL(url).getHost()); - - try ( ClientHttpResponse httpResponse = requestFactory.execute(httpMethod);){ + try ( ClientHttpResponse httpResponse = doExecute(httpMethod, user, password)){ return CharStreams.toString(new InputStreamReader(httpResponse.getBody())); } } @@ -210,4 +220,55 @@ private SearchResponse getSearchResponseFromJson(String jsonResponse) throws IOE XContentParser parser = JsonXContent.jsonXContent.createParser(registry, null, jsonResponse); return SearchResponse.fromXContent(parser); } + + + protected ClientHttpResponse doExecute(HttpUriRequest method, String username, String password) throws IOException { + final String requestHost = method.getURI().getHost(); + HttpClientContext httpClientContext = HttpClientContext.create(); + + final Function requestConfiguration = new Function<>() { + @Nullable + @Override + public Void apply(HttpClientBuilder input) { + if (StringUtils.isNotEmpty(username) && StringUtils.isNotEmpty(password)) { + HttpHost targetHost = new HttpHost( + method.getURI().getHost(), + method.getURI().getPort(), + method.getURI().getScheme()); + + final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials( + new AuthScope(targetHost.getHostName(), targetHost.getPort()), + new UsernamePasswordCredentials(username, password)); + + final RequestConfig.Builder builder = RequestConfig.custom(); + builder.setAuthenticationEnabled(true); + builder.setRedirectsEnabled(true); + builder.setRelativeRedirectsAllowed(true); + builder.setCircularRedirectsAllowed(true); + builder.setMaxRedirects(3); + + input.setDefaultRequestConfig(builder.build()); + + // Preemptive authentication + // Create AuthCache instance + AuthCache authCache = new BasicAuthCache(); + // Generate BASIC scheme object and add it to the local auth cache + BasicScheme basicAuth = new BasicScheme(); + authCache.put(targetHost, basicAuth); + + // Add AuthCache to the execution context + httpClientContext.setCredentialsProvider(credentialsProvider); + httpClientContext.setAuthCache(authCache); + } + + Lib.net.setupProxy(settingManager, input, requestHost); + input.useSystemProperties(); + + return null; + } + }; + + return requestFactory.execute(method, requestConfiguration, httpClientContext); + } } diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java index 2aad71ced60..fa52c14510d 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Geonet40Harvester.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java index 5e1410d15c5..6caf463c7de 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeonetParams.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java index aa35b0294da..f3090b5daf4 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Group.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java index 0f15f5e6f83..16fa97364ad 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Harvester.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== @@ -81,8 +81,15 @@ public HarvestResult harvest(Logger log) throws Exception { geoNetworkApiClient = context.getBean(GeoNetworkApiClient.class); //--- login + String username; + String password; + if (params.isUseAccount()) { - // TODO: Implement + username = params.getUsername(); + password = params.getPassword(); + } else { + username = ""; + password = ""; } //--- retrieve info on categories and groups @@ -90,9 +97,9 @@ public HarvestResult harvest(Logger log) throws Exception { log.info("Retrieving information from : " + params.host); String serverUrl = getServerUrl(); - Map sources = geoNetworkApiClient.retrieveSources(serverUrl); + Map sources = geoNetworkApiClient.retrieveSources(serverUrl, username, password); - List groupList = geoNetworkApiClient.retrieveGroups(serverUrl); + List groupList = geoNetworkApiClient.retrieveGroups(serverUrl, username, password); //--- perform all searches @@ -202,8 +209,19 @@ private Set processSearchResult(SearchHit[] searchHits) { private SearchResponse doSearch(Search s) throws OperationAbortedEx { try { + String username; + String password; + + if (params.isUseAccount()) { + username = params.getUsername(); + password = params.getPassword(); + } else { + username = ""; + password = ""; + } + String queryBody = s.createElasticsearchQuery(); - return geoNetworkApiClient.query(getServerUrl(), queryBody); + return geoNetworkApiClient.query(getServerUrl(), queryBody, username, password); } catch (Exception ex) { Log.error(LOGGER_NAME, ex.getMessage(), ex); HarvestError harvestError = new HarvestError(context, ex); diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java index 12a0e522d29..e7f3e2e19e2 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java @@ -1,5 +1,5 @@ //============================================================================= -//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the //=== United Nations (FAO-UN), United Nations World Food Programme (WFP) //=== and United Nations Environment Programme (UNEP) //=== @@ -31,7 +31,6 @@ import org.fao.geonet.utils.Log; import org.jdom.Element; -import java.util.Iterator; //============================================================================= @@ -128,11 +127,6 @@ public String createElasticsearchQuery() { return queryBody; } - private void add(Element req, String name, String value) { - if (value.length() != 0) - req.addContent(new Element(name).setText(value)); - } - public void setRange(int from, int to) { this.from = from; this.to = to; From 31dc0c483bf2e69c35cb9b88743fea412c13b6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Tue, 26 Dec 2023 16:08:05 +0100 Subject: [PATCH 3/8] GeoNetwork 4.x harvester / Unit tests --- .../geonet40/GeoNetworkApiClient.java | 3 +- .../geonet40/GeoNetworkApiClientTest.java | 224 ++++++++++++++++++ 2 files changed, 225 insertions(+), 2 deletions(-) create mode 100644 harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClientTest.java diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java index 9da62f98b17..87fd1160f5f 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClient.java @@ -58,7 +58,6 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.json.JsonXContent; -import org.fao.geonet.ApplicationContextHolder; import org.fao.geonet.domain.Group; import org.fao.geonet.domain.Source; import org.fao.geonet.exceptions.BadParameterEx; @@ -215,7 +214,7 @@ private List getDefaultNamedXContents() { .collect(Collectors.toList()); } - private SearchResponse getSearchResponseFromJson(String jsonResponse) throws IOException { + SearchResponse getSearchResponseFromJson(String jsonResponse) throws IOException { NamedXContentRegistry registry = new NamedXContentRegistry(getDefaultNamedXContents()); XContentParser parser = JsonXContent.jsonXContent.createParser(registry, null, jsonResponse); return SearchResponse.fromXContent(parser); diff --git a/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClientTest.java b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClientTest.java new file mode 100644 index 00000000000..20b5e35fbb0 --- /dev/null +++ b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/geonet40/GeoNetworkApiClientTest.java @@ -0,0 +1,224 @@ +//============================================================================= +//=== Copyright (C) 2001-2023 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.geonet40; + +import org.elasticsearch.action.search.SearchResponse; +import org.fao.geonet.domain.Group; +import org.fao.geonet.domain.Source; +import org.fao.geonet.exceptions.BadParameterEx; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class GeoNetworkApiClientTest { + @Mock + private GeoNetworkApiClient geoNetworkApiClient; + + @Test + public void testRetrieveGroups() { + List groupList = new ArrayList<>(); + Group group1 = new Group(); + group1.setId(1); + group1.setName("group1"); + groupList.add(group1); + + Group group2 = new Group(); + group1.setId(2); + group1.setName("group2"); + groupList.add(group2); + + try { + when(geoNetworkApiClient.retrieveGroups("http://localhost:8080/geonetwork", "", "")).thenReturn(groupList); + when(geoNetworkApiClient.retrieveGroups("invalidURL", "", "")).thenThrow(BadParameterEx.class); + + List groupListRetrieved = geoNetworkApiClient.retrieveGroups("http://localhost:8080/geonetwork", "", ""); + assertEquals(groupList, groupListRetrieved); + + assertThrows(BadParameterEx.class, () -> geoNetworkApiClient.retrieveGroups("invalidURL", "", "")); + } catch (URISyntaxException | IOException ex) { + fail("Error retrieving groups"); + } + } + + + @Test + public void testRetrieveSources() { + Map sourceMap = new HashMap<>(); + Source source1 = new Source(); + source1.setUuid(UUID.randomUUID().toString()); + source1.setName("source1"); + sourceMap.put(source1.getUuid(), source1); + + Source source2 = new Source(); + source2.setUuid(UUID.randomUUID().toString()); + source2.setName("source2"); + sourceMap.put(source2.getUuid(), source2); + + try { + when(geoNetworkApiClient.retrieveSources("http://localhost:8080/geonetwork", "", "")).thenReturn(sourceMap); + when(geoNetworkApiClient.retrieveSources("invalidURL", "", "")).thenThrow(BadParameterEx.class); + + Map sourceMapRetrieved = geoNetworkApiClient.retrieveSources("http://localhost:8080/geonetwork", "", ""); + assertEquals(sourceMap, sourceMapRetrieved); + + assertThrows(BadParameterEx.class, () -> geoNetworkApiClient.retrieveSources("invalidURL", "", "")); + } catch (URISyntaxException | IOException ex) { + fail("Error retrieving sources"); + } + } + + @Test + public void testRetrieveMEF() { + try { + Path mefFilePath = Files.createTempFile("temp-", ".dat"); + + when(geoNetworkApiClient.retrieveMEF("http://localhost:8080/geonetwork", "aaaa", "", "")).thenReturn(mefFilePath); + when(geoNetworkApiClient.retrieveMEF("invalidURL", "aaaa", "", "")).thenThrow(BadParameterEx.class); + + Path mefFilePathRetrieved = geoNetworkApiClient.retrieveMEF("http://localhost:8080/geonetwork", "aaaa", "", ""); + assertEquals(mefFilePath, mefFilePathRetrieved); + + assertThrows(BadParameterEx.class, () -> geoNetworkApiClient.retrieveMEF("invalidURL", "aaaa", "", "")); + } catch (URISyntaxException | IOException ex) { + fail("Error retrieving sources"); + } + } + + @Test + public void testQuery() { + try { + String query = String.format("{\n" + + " \"from\": %d,\n" + + " \"size\": %d,\n" + + " \"sort\": [\"_score\"],\n" + + " \"query\": {\"bool\": {\"must\": [{\"terms\": {\"isTemplate\": [\"n\"]}},{\"term\": {\"sourceCatalogue\": \"%s\"}}]}},\n" + + " \"_source\": {\"includes\": [\n" + + " \"uuid\",\n" + + " \"id\",\n" + + " \"isTemplate\",\n" + + " \"sourceCatalogue\",\n" + + " \"dateStamp\",\n" + + " \"documentStandard\"\n" + + " ]},\n" + + " \"track_total_hits\": true\n" + + "}", 1, 30, "fee3d1ae-f32b-4435-865d-36af0a489e3c"); + + String queryResult = "{\n" + + " \"took\": 41,\n" + + " \"timed_out\": false,\n" + + " \"_shards\": {\n" + + " \"total\": 1,\n" + + " \"successful\": 1,\n" + + " \"skipped\": 0,\n" + + " \"failed\": 0\n" + + " },\n" + + " \"hits\": {\n" + + " \"total\": {\n" + + " \"value\": 2,\n" + + " \"relation\": \"eq\"\n" + + " },\n" + + " \"max_score\": 1,\n" + + " \"hits\": [\n" + + " {\n" + + " \"_index\": \"gn-records\",\n" + + " \"_type\": \"_doc\",\n" + + " \"_id\": \"b5576133-8a6f-4b47-a973-e2b3c80c8a75\",\n" + + " \"_score\": 1,\n" + + " \"_source\": {\n" + + " \"owner\": \"1\",\n" + + " \"groupOwner\": \"11855\",\n" + + " \"uuid\": \"b5576133-8a6f-4b47-a973-e2b3c80c8a75\",\n" + + " \"documentStandard\": \"iso19139\",\n" + + " \"sourceCatalogue\": \"fee3d1ae-f32b-4435-865d-36af0a489e3c\",\n" + + " \"dateStamp\": \"2021-05-12T09:53:28.000Z\",\n" + + " \"isTemplate\": \"n\",\n" + + " \"id\": \"151659\"\n" + + " },\n" + + " \"edit\": false,\n" + + " \"canReview\": false,\n" + + " \"owner\": false,\n" + + " \"isPublishedToAll\": true,\n" + + " \"view\": true,\n" + + " \"notify\": false,\n" + + " \"download\": true,\n" + + " \"dynamic\": true,\n" + + " \"featured\": false,\n" + + " \"selected\": false\n" + + " },\n" + + " {\n" + + " \"_index\": \"gn-records\",\n" + + " \"_type\": \"_doc\",\n" + + " \"_id\": \"5b7008b9-84db-4ae3-9e9d-4de926b00ad9\",\n" + + " \"_score\": 1,\n" + + " \"_source\": {\n" + + " \"owner\": \"1\",\n" + + " \"groupOwner\": \"11857\",\n" + + " \"uuid\": \"5b7008b9-84db-4ae3-9e9d-4de926b00ad9\",\n" + + " \"documentStandard\": \"iso19139\",\n" + + " \"sourceCatalogue\": \"fee3d1ae-f32b-4435-865d-36af0a489e3c\",\n" + + " \"dateStamp\": \"2021-05-12T09:52:34.000Z\",\n" + + " \"isTemplate\": \"n\",\n" + + " \"id\": \"151601\"\n" + + " },\n" + + " \"edit\": false,\n" + + " \"canReview\": false,\n" + + " \"owner\": false,\n" + + " \"isPublishedToAll\": true,\n" + + " \"view\": true,\n" + + " \"notify\": false,\n" + + " \"download\": true,\n" + + " \"dynamic\": true,\n" + + " \"featured\": false,\n" + + " \"selected\": false\n" + + " }\n" + + " ]\n" + + " }\n" + + "}"; + + SearchResponse searchResponse = geoNetworkApiClient.getSearchResponseFromJson(queryResult); + + when(geoNetworkApiClient.query("http://localhost:8080/geonetwork", query, "", "")).thenReturn(searchResponse); + when(geoNetworkApiClient.query("invalidURL", query, "", "")).thenThrow(BadParameterEx.class); + + SearchResponse searchResponseRetrieved = geoNetworkApiClient.query("http://localhost:8080/geonetwork", query, "", ""); + + assertEquals(searchResponse, searchResponseRetrieved); + + assertThrows(BadParameterEx.class, () -> geoNetworkApiClient.query("invalidURL", query, "", "")); + } catch (URISyntaxException | IOException ex) { + fail("Error retrieving sources"); + } + } +} From 4ac70906d82a07144672697bd87c19064b3626a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Tue, 2 Jan 2024 11:25:41 +0100 Subject: [PATCH 4/8] Improve GeoNetwork harvesters documentation pages --- .../harvesting/harvesting-geonetwork-20.md | 27 +++++++++++++ .../harvesting/harvesting-geonetwork-3x.md | 39 +++++++++++++++++++ .../harvesting/harvesting-geonetwork-4x.md | 39 +++++++++++++++++++ .../harvesting/harvesting-geonetwork.md | 3 -- .../docs/user-guide/harvesting/index.md | 4 +- docs/manual/mkdocs.yml | 4 +- 6 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md create mode 100644 docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md create mode 100644 docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md delete mode 100644 docs/manual/docs/user-guide/harvesting/harvesting-geonetwork.md diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md new file mode 100644 index 00000000000..280d91e77b6 --- /dev/null +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md @@ -0,0 +1,27 @@ +# GeoNetwork 2.0 Harvester {#gn2_harvester} + +GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with GeoNetwork version 2.0 based catalogues. Old 2.0 servers can still harvest from 2.1 servers but harvesting metadata from a v2.0 server requires this harvesting type. Due to the fact that GeoNetwork 2.0 was released more than 5 years ago, this harvesting type is deprecated. + +## Adding a GeoNetwork 2.0 Harvester + +Configuration options: + +- **Identification** - Options describing the remote site. + - *Name* - This is a short description of the remote site. It will be shown in the harvesting main page as the name for this instance of the harvester. + - *Group* - Group that owns the harvested metadata. + - *User* - User that owns the harvested metadata. +- **Schedule** - Schedule configuration to execute the harvester. +- **Configure connection to GeoNetwork**: + - *Catalog URL* - The URL of the GeoNetwork server from which metadata will be harvested. + - *Search filter* - This allows you to select metadata records for harvest based on certain criteria: + - *Full text* + - *Title* + - *Abstract* + - *Keyword* + - *Site id* - Identifier of the source to filter the metadata to harvest. + +- **Configure response processing** + - *Remote authentication* + - *Validate records before import* + +- **Privileges** - Assign privileges to harvested metadata. diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md new file mode 100644 index 00000000000..bafc1375217 --- /dev/null +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md @@ -0,0 +1,39 @@ +# GeoNetwork 2.1-3.x Harvester {#gn3_harvester} + +GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with GeoNetwork version 2.0 +based catalogues. To harvest GeoNetwork servers based on versions 2.1 or 3.x requires this harvesting type. + +## Adding a GeoNetwork 2.1-3.x Harvester + +Configuration options: + +- **Identification** - Options describing the remote site. + - *Name* - This is a short description of the remote site. It will be shown in the harvesting main page as the name for this instance of the harvester. + - *Group* - Group that owns the harvested metadata. + - *User* - User that owns the harvested metadata. +- **Schedule** - Schedule configuration to execute the harvester. +- **Configure connection to GeoNetwork**: + - *Catalog URL* - The URL of the GeoNetwork server from which metadata will be harvested. + - *Node name* - GeoNetwork node name to harvest, by default `srv`. + - *Search filter* - This allows you to select metadata records for harvest based on certain criteria: + - *Full text* + - *Title* + - *Abstract* + - *Keyword* + - *Custom criteria* - Allows to define whatever criteria are supported by the remote node and not available in the predefined filters (eg. `similarity` set to `1` for non fuzzy search). You may specify multiple criteria separated by `;` (eg. `_schema;siteId` with values `iso19139;7fc45be3-9aba-4198-920c-b8737112d522`). + - *Catalog* - Allows to select a source to filter the metadata to harvest. + +- **Configure response processing** + - *Action on UUID collision* - Allows to configure the action when a harvester finds the same uuid on a record collected by another method (another harvester, importer, dashboard editor,...). + - skipped (default) + - overriden + - generate a new UUID + - *Remote authentication* - User credentials to retrieved non-public metadata. + - *Use full MEF format* + - *Use change date for comparison* + - *Set category if it exists locally* + - *Category for harvested records* + - *XSL filter name to apply* + - *Validate records before import* + +- **Privileges** - Assign privileges to harvested metadata. diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md new file mode 100644 index 00000000000..0e2b737d9b7 --- /dev/null +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md @@ -0,0 +1,39 @@ +# GeoNetwork 4.x Harvester {#gn4_harvester} + +GeoNetwork 4.x changed the search engine to Elasticsearch, that is not compatible with previous versions. To harvest +a catalogue based on GeoNetwork 4.x requires this harvesting type. + +## Adding a GeoNetwork 4.x Harvester + +Configuration options: + +- **Identification** - Options describing the remote site. + - *Name* - This is a short description of the remote site. It will be shown in the harvesting main page as the name for this instance of the harvester. + - *Group* - Group that owns the harvested metadata. + - *User* - User that owns the harvested metadata. +- **Schedule** - Schedule configuration to execute the harvester. +- **Configure connection to GeoNetwork**: + - *Catalog URL* - The URL of the GeoNetwork server from which metadata will be harvested. + - *Node name* - GeoNetwork node name to harvest, by default `srv`. + - *Search filter* - This allows you to select metadata records for harvest based on certain criteria: + - *Full text* + - *Title* + - *Abstract* + - *Keyword* + - *Catalog* - Allows to select a source to filter the metadata to harvest. + +- **Configure response processing** + - *Action on UUID collision* - Allows to configure the action when a harvester finds the same uuid on a record collected by another method (another harvester, importer, dashboard editor,...). + - skipped (default) + - overriden + - generate a new UUID + - *Remote authentication* + - *Use full MEF format* + - *Use change date for comparison* + - *Set category if it exists locally* + - *Category for harvested records* + - *XSL filter name to apply* + - *Validate records before import* + +- **Privileges** - Assign privileges to harvested metadata. + diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork.md deleted file mode 100644 index b3bbff7fb44..00000000000 --- a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork.md +++ /dev/null @@ -1,3 +0,0 @@ -# GeoNetwork 2.0 Harvester {#gn2_harvester} - -GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with GeoNetwork version 2.0 based catalogues. Old 2.0 servers can still harvest from 2.1 servers but harvesting metadata from a v2.0 server requires this harvesting type. Due to the fact that GeoNetwork 2.0 was released more than 5 years ago, this harvesting type is deprecated. diff --git a/docs/manual/docs/user-guide/harvesting/index.md b/docs/manual/docs/user-guide/harvesting/index.md index 46f52f782c5..01936a2e1b1 100644 --- a/docs/manual/docs/user-guide/harvesting/index.md +++ b/docs/manual/docs/user-guide/harvesting/index.md @@ -6,7 +6,9 @@ Harvesting is the process of ingesting metadata from remote sources and storing The following sources can be harvested: -- [GeoNetwork 2.0 Harvester](harvesting-geonetwork.md) +- [GeoNetwork 4.x Harvester](harvesting-geonetwork-4x.md) +- [GeoNetwork 2.1-3.x Harvester](harvesting-geonetwork-3x.md) +- [GeoNetwork 2.0 Harvester](harvesting-geonetwork-20.md) - [Harvesting CSW services](harvesting-csw.md) - [Harvesting OGC Services](harvesting-ogcwxs.md) - [Simple URL harvesting (opendata)](harvesting-simpleurl.md) diff --git a/docs/manual/mkdocs.yml b/docs/manual/mkdocs.yml index f418b887e91..eccd1e160f4 100644 --- a/docs/manual/mkdocs.yml +++ b/docs/manual/mkdocs.yml @@ -271,7 +271,9 @@ nav: - user-guide/harvesting/index.md - user-guide/harvesting/harvesting-csw.md - user-guide/harvesting/harvesting-filesystem.md - - user-guide/harvesting/harvesting-geonetwork.md + - user-guide/harvesting/harvesting-geonetwork-4x.md + - user-guide/harvesting/harvesting-geonetwork-3x.md + - user-guide/harvesting/harvesting-geonetwork-20.md - user-guide/harvesting/harvesting-geoportal.md - user-guide/harvesting/harvesting-oaipmh.md - user-guide/harvesting/harvesting-ogcwxs.md From fa1be8ea0157e0608dd734dfbb1876d140928777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Tue, 2 Jan 2024 15:21:28 +0100 Subject: [PATCH 5/8] GeoNetwork 4.x harvester / improve filter queries --- .../geonet/kernel/harvest/harvester/geonet40/Search.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java index e7f3e2e19e2..d1f89082734 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/geonet40/Search.java @@ -85,17 +85,17 @@ public String createElasticsearchQuery() { String freeTextFilter = ""; if (StringUtils.isNotEmpty(freeText)) { - freeTextFilter = String.format(",{\"term\": {\"any.default\": \"%s\"}}", freeText); + freeTextFilter = String.format(",{\"query_string\": {\"query\": \"(any.\\\\*:(%s) OR any.common:(%s))\", \"default_operator\": \"AND\"}}", freeText, freeText); } String titleFilter = ""; if (StringUtils.isNotEmpty(title)) { - titleFilter = String.format(",{\"term\": {\"resourceTitleObject.default\": \"%s\"}}", title); + titleFilter = String.format(",{\"query_string\": {\"query\": \"(resourceTitleObject.\\\\*:(%s))\", \"default_operator\": \"AND\"}}", title); } String abstractFilter = ""; if (StringUtils.isNotEmpty(abstrac)) { - abstractFilter = String.format(",{\"term\": {\"resourceAbstractObject.default\": \"%s\"}}", abstrac); + abstractFilter = String.format(",{\"query_string\": {\"query\": \"(resourceAbstractObject.\\\\*:(%s))\", \"default_operator\": \"AND\"}}", abstrac); } String keywordFilter = ""; From 84cfd7b645ad1a979fa58ec1457a2dfc8a574bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Thu, 5 Sep 2024 14:32:10 +0200 Subject: [PATCH 6/8] Update docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md @jodygarnett I don't think I've ever used GeoNetwork 2.0 since I started working on GeoNetwork, it seems like a very old version. We can probably get rid of that harvester, I doubt anyone uses that version. Anyway your change looks fine. Co-authored-by: Jody Garnett --- .../harvesting/harvesting-geonetwork-20.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md index 280d91e77b6..083b20b525b 100644 --- a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-20.md @@ -1,6 +1,16 @@ # GeoNetwork 2.0 Harvester {#gn2_harvester} -GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with GeoNetwork version 2.0 based catalogues. Old 2.0 servers can still harvest from 2.1 servers but harvesting metadata from a v2.0 server requires this harvesting type. Due to the fact that GeoNetwork 2.0 was released more than 5 years ago, this harvesting type is deprecated. +This harvester is required for working with GeoNetwork 2.0. GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with the previous GeoNetwork version 2.0 based catalogues. + +This harvesting type is deprecated as GeoNetwork 2.0 has reached end-of-life. + + +| Harvester | Harvet from | +| ------------------------ | ------------------------------------ | +| GeoNetwork 2.0 Harvester | GeoNetwork 2.0 Catalogue | +| GeoNetwork 3.x Harvester | GeoNetwork 2.1 and greater | +| GeoNetwork 3.x Harvester | GeoNetwork 3.x series | +| GeoNetwork 4.x Harvester | GeoNetwork 4.x series | ## Adding a GeoNetwork 2.0 Harvester From 0c82fd36e3bda822ca4edd46401098dffcb6b0c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Thu, 5 Sep 2024 14:32:29 +0200 Subject: [PATCH 7/8] Update docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md Co-authored-by: Jody Garnett --- .../docs/user-guide/harvesting/harvesting-geonetwork-3x.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md index bafc1375217..63b73c4aa6f 100644 --- a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-3x.md @@ -3,6 +3,12 @@ GeoNetwork 2.1 introduced a new powerful harvesting engine which is not compatible with GeoNetwork version 2.0 based catalogues. To harvest GeoNetwork servers based on versions 2.1 or 3.x requires this harvesting type. +| Harvester | Harvet from | +| ------------------------ | ------------------------------------ | +| GeoNetwork 2.0 Harvester | GeoNetwork 2.0 Catalogue | +| GeoNetwork 3.x Harvester | GeoNetwork 2.1 and greater | +| GeoNetwork 3.x Harvester | GeoNetwork 3.x series | +| GeoNetwork 4.x Harvester | GeoNetwork 4.x series | ## Adding a GeoNetwork 2.1-3.x Harvester Configuration options: From f5c229fa77ccff002a1ded5ce2fb6747aa3ddf59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Thu, 5 Sep 2024 14:32:39 +0200 Subject: [PATCH 8/8] Update docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md Co-authored-by: Jody Garnett --- .../docs/user-guide/harvesting/harvesting-geonetwork-4x.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md index 0e2b737d9b7..d0e3ddc0804 100644 --- a/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md +++ b/docs/manual/docs/user-guide/harvesting/harvesting-geonetwork-4x.md @@ -3,6 +3,12 @@ GeoNetwork 4.x changed the search engine to Elasticsearch, that is not compatible with previous versions. To harvest a catalogue based on GeoNetwork 4.x requires this harvesting type. +| Harvester | Harvet from | +| ------------------------ | ------------------------------------ | +| GeoNetwork 2.0 Harvester | GeoNetwork 2.0 Catalogue | +| GeoNetwork 3.x Harvester | GeoNetwork 2.1 and greater | +| GeoNetwork 3.x Harvester | GeoNetwork 3.x series | +| GeoNetwork 4.x Harvester | GeoNetwork 4.x series | ## Adding a GeoNetwork 4.x Harvester Configuration options: