-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0759a13
commit b528d8b
Showing
8 changed files
with
185 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
...web/src/main/java/eu/europeana/api/translation/web/service/LangDetectionPreProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package eu.europeana.api.translation.web.service; | ||
|
||
import eu.europeana.api.translation.service.LanguageDetectionService; | ||
import eu.europeana.api.translation.service.exception.LanguageDetectionException; | ||
import eu.europeana.api.translation.web.utils.PreProcessorUtils; | ||
|
||
import java.util.List; | ||
|
||
public class LangDetectionPreProcessor implements LanguageDetectionService { | ||
|
||
@Override | ||
public boolean isSupported(String srcLang) { | ||
return false; | ||
} | ||
|
||
@Override | ||
public String getServiceId() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public void setServiceId(String serviceId) { | ||
|
||
} | ||
|
||
@Override | ||
public List<String> detectLang(List<String> texts, String langHint) throws LanguageDetectionException { | ||
return PreProcessorUtils.filterEligibleValues(texts); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
|
||
} | ||
|
||
@Override | ||
public String getExternalServiceEndPoint() { | ||
return null; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
...n-web/src/main/java/eu/europeana/api/translation/web/service/TranslationPreProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package eu.europeana.api.translation.web.service; | ||
|
||
import eu.europeana.api.translation.definitions.model.TranslationObj; | ||
import eu.europeana.api.translation.service.TranslationService; | ||
import eu.europeana.api.translation.service.exception.TranslationException; | ||
import eu.europeana.api.translation.web.utils.PreProcessorUtils; | ||
|
||
import java.util.List; | ||
|
||
public class TranslationPreProcessor implements TranslationService { | ||
|
||
@Override | ||
public String getServiceId() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public void setServiceId(String serviceId) { | ||
// leave empty | ||
} | ||
|
||
@Override | ||
public boolean isSupported(String srcLang, String trgLang) { | ||
return false; | ||
} | ||
|
||
@Override | ||
public void translate(List<TranslationObj> translationStrings) throws TranslationException { | ||
PreProcessorUtils.processForEligibleValues(translationStrings); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
|
||
} | ||
|
||
@Override | ||
public String getExternalServiceEndPoint() { | ||
return null; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
translation-web/src/main/java/eu/europeana/api/translation/web/utils/PreProcessorUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package eu.europeana.api.translation.web.utils; | ||
|
||
import eu.europeana.api.translation.definitions.model.TranslationObj; | ||
|
||
import java.util.List; | ||
import java.util.regex.Pattern; | ||
import java.util.stream.Collectors; | ||
|
||
public class PreProcessorUtils { | ||
|
||
/** | ||
* Any value that has at least 2 unicode consecutive letters. The condition considered the | ||
* fact that there can be words with only 2 letters that retain sufficient meaning and are therefore reasonable to be translated, | ||
* especially when looking at languages other than English (see article - https://www.grammarly.com/blog/the-shortest-words-in-the-english-language/). | ||
*/ | ||
private static final String PATTERN = "\\p{IsAlphabetic}{2,}"; | ||
private static final Pattern isAlphabetic = Pattern.compile(PATTERN); | ||
|
||
|
||
/** | ||
* Check if the text present is an eligible value. | ||
* Eligible Value : Any value that has at least 2 unicode consecutive letters. | ||
* If value is not eligible, set isTranslated as false, which means we will not translate that text/value | ||
* @param translationObjs | ||
* @return | ||
*/ | ||
public static void processForEligibleValues(List<TranslationObj> translationObjs) { | ||
for (TranslationObj obj : translationObjs) { | ||
if (!isAlphabetic.matcher(obj.getText()).find()) { | ||
obj.setIsTranslated(false); | ||
} | ||
} | ||
} | ||
|
||
public static List<String> filterEligibleValues(List<String> texts) { | ||
return texts.stream().filter(value -> isAlphabetic.matcher(value).find()).collect(Collectors.toList()); | ||
} | ||
|
||
} |