From 2bad0f4399c55583ab3d1222dbaf9c7b95526793 Mon Sep 17 00:00:00 2001 From: Francois Prunayre Date: Mon, 14 Oct 2024 15:25:19 +0200 Subject: [PATCH] Harvester / Remove records by harvester UUID When harvester contains lot of records, remove records take a while or could even return heapspace errors. Try to improve performances by using delete by query (instead of loop on each records) eg. 1500 records * Select > Delete all = 2min * Harvester > Remove records = 700ms This will bypass events but maybe that is fine for harvested records? Maybe there is better JPA alternative for this kind of query? --- .../geonet/repository/MetadataRepository.java | 86 ++++++++++++++++--- .../kernel/harvest/HarvestManagerImpl.java | 12 ++- .../harvest/harvester/AbstractHarvester.java | 22 +++-- 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/domain/src/main/java/org/fao/geonet/repository/MetadataRepository.java b/domain/src/main/java/org/fao/geonet/repository/MetadataRepository.java index 3a8fefb4955..6f055b1e7cb 100644 --- a/domain/src/main/java/org/fao/geonet/repository/MetadataRepository.java +++ b/domain/src/main/java/org/fao/geonet/repository/MetadataRepository.java @@ -24,20 +24,17 @@ package org.fao.geonet.repository; import java.util.List; - import javax.annotation.Nonnull; import javax.annotation.Nullable; - import org.fao.geonet.domain.Metadata; import org.springframework.data.jpa.repository.JpaSpecificationExecutor; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.transaction.annotation.Transactional; /** * Data Access object for the {@link Metadata} entities. - * + *

* The use of this class is discouraged, you should use IMetadataUtils or IMetadataManager instead. * * @author Jesse @@ -60,12 +57,12 @@ public interface MetadataRepository extends GeonetRepository, /** * Find all metadata by the metadata's uuid. - * - * @param uuid the uuid of the metadata to find - * @return a list of metadata. - */ - @Nullable - List findAllByUuid(@Nonnull String uuid); + * + * @param uuid the uuid of the metadata to find + * @return a list of metadata. + */ + @Nullable + List findAllByUuid(@Nonnull String uuid); /** * Find all metadata harvested by the identified harvester. @@ -76,7 +73,76 @@ public interface MetadataRepository extends GeonetRepository, @Nonnull List findAllByHarvestInfo_Uuid(@Nonnull String uuid); + int countByHarvestInfo_Uuid(@Nonnull String uuid); + + @Query(value = "SELECT distinct(source) FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + List findDistinctSourcesByHarvestInfo__uuid(@Param("harvesterUuid") String harvesterUuid); + + + @Query(value = "DELETE FROM operationallowed WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllOperationAllowedByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatarating WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadataRatingByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM validation WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllValidationByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM usersavedselections WHERE metadatauuid IN (SELECT uuid FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllUsersavedselectionsByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatafiledownloads WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadatafiledownloadsByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatafileuploads WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadatafileuploadsByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatastatus WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadatastatusByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatalink WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadatalinkByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadatacateg WHERE metadataid IN (SELECT id FROM metadata WHERE harvestuuid = :harvesterUuid)", + nativeQuery = true) + @Modifying + void deleteAllMetadatacategByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + + @Query(value = "DELETE FROM metadata WHERE harvestuuid = :harvesterUuid", + nativeQuery = true) + @Modifying + void deleteAllMetadataByHarvesterUuid(@Param("harvesterUuid") String harvesterUuid); + default void deleteAllByHarvesterUuid(String harvesterUuid) { + deleteAllOperationAllowedByHarvesterUuid(harvesterUuid); + deleteAllMetadataRatingByHarvesterUuid(harvesterUuid); + deleteAllValidationByHarvesterUuid(harvesterUuid); + deleteAllUsersavedselectionsByHarvesterUuid(harvesterUuid); + deleteAllMetadatafiledownloadsByHarvesterUuid(harvesterUuid); + deleteAllMetadatafiledownloadsByHarvesterUuid(harvesterUuid); + deleteAllMetadatafileuploadsByHarvesterUuid(harvesterUuid); + deleteAllMetadatastatusByHarvesterUuid(harvesterUuid); + deleteAllMetadatalinkByHarvesterUuid(harvesterUuid); + deleteAllMetadatacategByHarvesterUuid(harvesterUuid); + deleteAllMetadataByHarvesterUuid(harvesterUuid); + } @Query(value = "SELECT replace(data, :search, :replace) FROM metadata m " + "WHERE uuid = :uuid", diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/HarvestManagerImpl.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/HarvestManagerImpl.java index df69be69a33..35122c0d496 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/HarvestManagerImpl.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/HarvestManagerImpl.java @@ -41,12 +41,15 @@ import org.fao.geonet.kernel.AccessManager; import org.fao.geonet.kernel.DataManager; import org.fao.geonet.kernel.HarvestInfoProvider; +import org.fao.geonet.kernel.datamanager.IMetadataManager; import org.fao.geonet.kernel.harvest.Common.OperResult; import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; import org.fao.geonet.kernel.harvest.harvester.AbstractParams; import org.fao.geonet.kernel.harvest.harvester.HarversterJobListener; +import org.fao.geonet.kernel.search.EsSearchManager; import org.fao.geonet.kernel.setting.HarvesterSettingsManager; import org.fao.geonet.repository.HarvestHistoryRepository; +import org.fao.geonet.repository.MetadataRepository; import org.fao.geonet.repository.specification.MetadataSpecs; import org.fao.geonet.utils.Log; import org.fao.geonet.utils.Xml; @@ -83,6 +86,8 @@ public class HarvestManagerImpl implements HarvestInfoProvider, HarvestManager { private ServiceContext context; private boolean readOnly; private ConfigurableApplicationContext applicationContext; + protected MetadataRepository metadataRepository; + protected EsSearchManager searchManager; private Map hmHarvesters = new HashMap<>(); private Map hmHarvestLookup = new HashMap<>(); @@ -108,6 +113,8 @@ public ConfigurableApplicationContext getApplicationContext() { public void init(ServiceContext context, boolean isReadOnly) throws Exception { this.context = context; this.dataMan = context.getBean(DataManager.class); + this.metadataRepository = context.getBean(MetadataRepository.class); + this.searchManager = context.getBean(EsSearchManager.class); this.settingMan = context.getBean(HarvesterSettingsManager.class); this.translationPackBuilder = context.getBean(TranslationPackBuilder.class); @@ -689,9 +696,10 @@ public synchronized OperResult clearBatch(String id) throws Exception { long elapsedTime = System.currentTimeMillis(); String harvesterUUID = ah.getParams().getUuid(); + int numberOfRecordsRemoved = metadataRepository.countByHarvestInfo_Uuid(harvesterUUID); + metadataRepository.deleteAllByHarvesterUuid(harvesterUUID); + searchManager.delete(String.format("+harvesterUuid:\"%s\"", harvesterUUID)); - final Specification specification = (Specification) MetadataSpecs.hasHarvesterUuid(harvesterUUID); - int numberOfRecordsRemoved = dataMan.batchDeleteMetadataAndUpdateIndex(specification); ah.emptyResult(); elapsedTime = (System.currentTimeMillis() - elapsedTime) / 1000; diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java index 2398aa96c10..1a3670133c3 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java @@ -23,6 +23,7 @@ package org.fao.geonet.kernel.harvest.harvester; +import com.google.common.io.Files; import jeeves.server.UserSession; import jeeves.server.context.ServiceContext; import org.apache.commons.lang.StringUtils; @@ -49,11 +50,13 @@ import org.fao.geonet.kernel.datamanager.IMetadataUtils; import org.fao.geonet.kernel.harvest.Common.OperResult; import org.fao.geonet.kernel.harvest.Common.Status; +import org.fao.geonet.kernel.search.EsSearchManager; import org.fao.geonet.kernel.setting.HarvesterSettingsManager; import org.fao.geonet.kernel.setting.SettingManager; import org.fao.geonet.kernel.setting.Settings; import org.fao.geonet.repository.GroupRepository; import org.fao.geonet.repository.HarvestHistoryRepository; +import org.fao.geonet.repository.MetadataRepository; import org.fao.geonet.repository.SortUtils; import org.fao.geonet.repository.SourceRepository; import org.fao.geonet.repository.UserRepository; @@ -81,6 +84,7 @@ import java.io.File; import java.io.IOException; import java.net.UnknownHostException; +import java.nio.file.FileSystem; import java.sql.SQLException; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -128,6 +132,8 @@ public abstract class AbstractHarvester ownedByHarvester = Specification.where(MetadataSpecs.hasHarvesterUuid(getParams().getUuid())); - Set sources = new HashSet<>(); - for (Integer metadataId : metadataRepository.findAllIdsBy(ownedByHarvester)) { - sources.add(metadataUtils.findOne(metadataId).getSourceInfo().getSourceId()); - metadataManager.deleteMetadata(context, "" + metadataId); - } + List sources = metadataRepository.findDistinctSourcesByHarvestInfo__uuid(getParams().getUuid()); + metadataRepository.deleteAllByHarvesterUuid(getParams().getUuid()); + searchManager.delete(String.format("+harvesterUuid:\"%s\"", getParams().getUuid())); // Remove all sources related to the harvestUuid if they are not linked to any record anymore for (String sourceUuid : sources) { Long ownedBySource = - metadataRepository.count(Specification.where(MetadataSpecs.hasSource(sourceUuid))); + metadataUtils.count(Specification.where(MetadataSpecs.hasSource(sourceUuid))); if (ownedBySource == 0 && !sourceUuid.equals(params.getUuid()) && sourceRepository.existsById(sourceUuid)) {