Skip to content

Commit

Permalink
Harvester / Simple URL / Fix multiple URL alignement
Browse files Browse the repository at this point in the history
Cleanup records to remove, only once all URL are processed. No need for the Element to be preserved, alignement only require the list of UUIds.
  • Loading branch information
fxprunayre committed Oct 16, 2024
1 parent 7b9361a commit 5ef4f51
Showing 1 changed file with 4 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public HarvestResult harvest(Logger log) throws Exception {
String[] urlList = params.url.split("\n");
boolean error = false;
Aligner aligner = new Aligner(cancelMonitor, context, params, log);
Set<String> listOfUuids = new HashSet<>();

for (String url : urlList) {
log.debug("Loading URL: " + url);
Expand Down Expand Up @@ -151,7 +152,6 @@ public HarvestResult harvest(Logger log) throws Exception {
params.numberOfRecordPath, e.getMessage()));
}
}
Map<String, Element> allUuids = new HashMap<>();
try {
List<String> listOfUrlForPages = buildListOfUrl(params, numberOfRecordsToHarvest);
for (int i = 0; i < listOfUrlForPages.size(); i++) {
Expand All @@ -166,7 +166,6 @@ public HarvestResult harvest(Logger log) throws Exception {
if (StringUtils.isNotEmpty(params.loopElement)
|| type == SimpleUrlResourceType.RDFXML) {
Map<String, Element> uuids = new HashMap<>();

try {
if (type == SimpleUrlResourceType.XML) {
collectRecordsFromXml(xmlObj, uuids, aligner);
Expand All @@ -176,15 +175,14 @@ public HarvestResult harvest(Logger log) throws Exception {
collectRecordsFromJson(jsonObj, uuids, aligner);
}
aligner.align(uuids, errors);
allUuids.putAll(uuids);
listOfUuids.addAll(uuids.keySet());
} catch (Exception e) {
errors.add(new HarvestError(this.context, e));
log.error(String.format("Failed to collect record in response at path %s. Error is: %s",
params.loopElement, e.getMessage()));
}
}
}
aligner.cleanupRemovedRecords(allUuids.keySet());
} catch (Exception t) {
error = true;
log.error("Unknown error trying to harvest");
Expand All @@ -198,11 +196,12 @@ public HarvestResult harvest(Logger log) throws Exception {
errors.add(new HarvestError(context, t));
}

log.info("Total records processed in all searches :" + allUuids.size());
log.info("Total records processed in all searches :" + listOfUuids.size());
if (error) {
log.warning("Due to previous errors the align process has not been called");
}
}
aligner.cleanupRemovedRecords(listOfUuids);
return aligner.getResult();
}

Expand Down

0 comments on commit 5ef4f51

Please sign in to comment.