Skip to content

Commit

Permalink
EA-37017 implement pre processors
Browse files Browse the repository at this point in the history
  • Loading branch information
SrishtiSingh-eu committed Feb 1, 2024
1 parent 0759a13 commit b528d8b
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<maven.compiler.release>${java.version}</maven.compiler.release>
<api-commons.version>0.3.22</api-commons.version>
<api-commons.version>0.3.23-SNAPSHOT</api-commons.version>
<jettison.version>1.3</jettison.version>
<spring-boot.version>2.5.14</spring-boot.version>
<springdoc.version>1.6.14</springdoc.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public class TranslationObj {
private String translation;
private String cacheKey;
private boolean availableInCache;
private boolean isTranslated;

public String getText() {
return text;
Expand Down Expand Up @@ -49,4 +50,11 @@ public void setAvailableInCache(boolean cached) {
this.availableInCache = cached;
}

public boolean isTranslated() {
return isTranslated;
}

public void setIsTranslated(boolean translated) {
isTranslated = translated;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import javax.validation.constraints.NotNull;

import eu.europeana.api.translation.web.service.LangDetectionPreProcessor;
import eu.europeana.api.translation.web.service.TranslationPreProcessor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.BeansException;
Expand Down Expand Up @@ -51,6 +54,8 @@ public class TranslationServiceProvider {
ApplicationContext applicationContext;

TranslationServicesConfiguration translationServicesConfig;
LanguageDetectionService languageDetectionPreProcessor;
TranslationService translationServicePreProcessor;
Map<String, LanguageDetectionService> langDetectServices = new ConcurrentHashMap<>();
Map<String, TranslationService> translationServices = new ConcurrentHashMap<>();
Map<String, TranslationService> langMappings4TranslateServices = new ConcurrentHashMap<>();
Expand Down Expand Up @@ -107,6 +112,13 @@ public void initTranslationServicesConfiguration()
// init translation services
readServiceConfigurations();
validateAndInitServices();
// init preprocessor
initPreProcessor();
}

private void initPreProcessor() {
languageDetectionPreProcessor = new LangDetectionPreProcessor();
translationServicePreProcessor = new TranslationPreProcessor();
}

/**
Expand Down Expand Up @@ -370,6 +382,11 @@ public Map<String, TranslationService> getLangMappings4TranslateServices() {
return langMappings4TranslateServices;
}

public LanguageDetectionService getLanguageDetectionPreProcessor() {
return languageDetectionPreProcessor;
}


public TranslationService getTranslationServicePreProcessor() {
return translationServicePreProcessor;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package eu.europeana.api.translation.web.service;

import eu.europeana.api.translation.service.LanguageDetectionService;
import eu.europeana.api.translation.service.exception.LanguageDetectionException;
import eu.europeana.api.translation.web.utils.PreProcessorUtils;

import java.util.List;

public class LangDetectionPreProcessor implements LanguageDetectionService {

@Override
public boolean isSupported(String srcLang) {
return false;
}

@Override
public String getServiceId() {
return null;
}

@Override
public void setServiceId(String serviceId) {

}

@Override
public List<String> detectLang(List<String> texts, String langHint) throws LanguageDetectionException {
return PreProcessorUtils.filterEligibleValues(texts);
}

@Override
public void close() {

}

@Override
public String getExternalServiceEndPoint() {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import static eu.europeana.api.translation.web.I18nErrorMessageKeys.ERROR_INVALID_PARAM_VALUE;
import static eu.europeana.api.translation.web.I18nErrorMessageKeys.ERROR_UNSUPPORTED_LANG;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import javax.annotation.PreDestroy;
Expand Down Expand Up @@ -33,17 +35,20 @@ public LangDetectResponse detectLang(LangDetectRequest langDetectRequest)
LanguageDetectionService fallback = getFallbackService(langDetectRequest);
List<String> langs = null;
String serviceId = null;
List<String> eligibleValues = new ArrayList<>();
try {
// preprocess the values
eligibleValues = translationServiceProvider.getLanguageDetectionPreProcessor().detectLang(langDetectRequest.getText(), langDetectRequest.getLang());
langs =
langDetectService.detectLang(langDetectRequest.getText(), langDetectRequest.getLang());
langDetectService.detectLang(eligibleValues, langDetectRequest.getLang());
serviceId = langDetectService.getServiceId();
} catch (LanguageDetectionException originalError) {
// check if fallback is available
if (fallback == null) {
throwApiException(originalError);
} else {
try {
langs = fallback.detectLang(langDetectRequest.getText(), langDetectRequest.getLang());
langs = fallback.detectLang(eligibleValues, langDetectRequest.getLang());
serviceId = fallback.getServiceId();
} catch (LanguageDetectionException e) {
if (logger.isDebugEnabled()) {
Expand All @@ -53,10 +58,32 @@ public LangDetectResponse detectLang(LangDetectRequest langDetectRequest)
}
}
}
return new LangDetectResponse(accumulateLanguages(langDetectRequest.getText(), langs, eligibleValues), serviceId);
}

return new LangDetectResponse(langs, serviceId);
/**
* Returns languages detected and null responses for the non-eligible values
* @param texts texts sent in the request
* @param langDetected languages detected by the service
* @param eligibleValues text sent for language detection
* @return
*/
private List<String> accumulateLanguages(List<String> texts, List<String> langDetected, List<String> eligibleValues) {
List<String> languages = new ArrayList<>(texts.size());
for (int i = 0; i < langDetected.size(); i++) {
for (int j = 0; j < texts.size(); j++) {
if (eligibleValues.get(i).equals(texts.get(j))) {
languages.add(langDetected.get(i));
break;
} else {
languages.add(null);
}
}
}
return languages;
}


private LanguageDetectionService getFallbackService(LangDetectRequest langDetectRequest)
throws ParamValidationException {
// only if indicated in request
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package eu.europeana.api.translation.web.service;

import eu.europeana.api.translation.definitions.model.TranslationObj;
import eu.europeana.api.translation.service.TranslationService;
import eu.europeana.api.translation.service.exception.TranslationException;
import eu.europeana.api.translation.web.utils.PreProcessorUtils;

import java.util.List;

public class TranslationPreProcessor implements TranslationService {

@Override
public String getServiceId() {
return null;
}

@Override
public void setServiceId(String serviceId) {
// leave empty
}

@Override
public boolean isSupported(String srcLang, String trgLang) {
return false;
}

@Override
public void translate(List<TranslationObj> translationStrings) throws TranslationException {
PreProcessorUtils.processForEligibleValues(translationStrings);
}

@Override
public void close() {

}

@Override
public String getExternalServiceEndPoint() {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ public TranslationWebService(TranslationServiceProvider translationServiceProvid
public TranslationResponse translate(TranslationRequest translationRequest)
throws EuropeanaI18nApiException {
List<TranslationObj> translObjs = buildTranslationObjectList(translationRequest);

// pre processing for translation
try {
translationServiceProvider.getTranslationServicePreProcessor().translate(translObjs);
} catch (TranslationException e) {
e.printStackTrace();
}
// get the configured translation services
LanguagePair languagePair =
new LanguagePair(translationRequest.getSource(), translationRequest.getTarget());
Expand All @@ -59,7 +64,7 @@ public TranslationResponse translate(TranslationRequest translationRequest)
String serviceId = null;
for (TranslationService cachedTranslationService : cachedTranslationServices) {
try {
cachedTranslationService.translate(translObjs);
cachedTranslationService.translate(translObjs.stream().filter(to -> to.isTranslated()).collect(Collectors.toList()));
// call this method after the translate() method, because the serviceId changes depending if
// there is sth in the cache
serviceId = cachedTranslationService.getServiceId();
Expand Down Expand Up @@ -131,6 +136,7 @@ private List<TranslationObj> buildTranslationObjectList(TranslationRequest trans
newTranslObj.setSourceLang(translationRequest.getSource());
newTranslObj.setTargetLang(translationRequest.getTarget());
newTranslObj.setText(inputText);
newTranslObj.setIsTranslated(true);
translObjs.add(newTranslObj);
}
return translObjs;
Expand All @@ -153,7 +159,6 @@ private TranslationService selectTranslationService(TranslationRequest translati
// if none selected pick the default
final String defaultServiceId = translationServiceProvider.getTranslationServicesConfig()
.getTranslationConfig().getDefaultServiceId();

return getTranslationService(defaultServiceId, languagePair);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package eu.europeana.api.translation.web.utils;

import eu.europeana.api.translation.definitions.model.TranslationObj;

import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class PreProcessorUtils {

/**
* Any value that has at least 2 unicode consecutive letters. The condition considered the
* fact that there can be words with only 2 letters that retain sufficient meaning and are therefore reasonable to be translated,
* especially when looking at languages other than English (see article - https://www.grammarly.com/blog/the-shortest-words-in-the-english-language/).
*/
private static final String PATTERN = "\\p{IsAlphabetic}{2,}";
private static final Pattern isAlphabetic = Pattern.compile(PATTERN);


/**
* Check if the text present is an eligible value.
* Eligible Value : Any value that has at least 2 unicode consecutive letters.
* If value is not eligible, set isTranslated as false, which means we will not translate that text/value
* @param translationObjs
* @return
*/
public static void processForEligibleValues(List<TranslationObj> translationObjs) {
for (TranslationObj obj : translationObjs) {
if (!isAlphabetic.matcher(obj.getText()).find()) {
obj.setIsTranslated(false);
}
}
}

public static List<String> filterEligibleValues(List<String> texts) {
return texts.stream().filter(value -> isAlphabetic.matcher(value).find()).collect(Collectors.toList());
}

}

0 comments on commit b528d8b

Please sign in to comment.