From 44f117f76b6696ff1bbca9a5becde941b7fbc200 Mon Sep 17 00:00:00 2001 From: Srdjan Stevanetic Date: Mon, 27 Nov 2023 14:23:18 +0100 Subject: [PATCH 01/25] apache tika first impl --- pom.xml | 3 +- translation-service-apache-tika/pom.xml | 33 ++++++ .../ApacheTikaLangDetectService.java | 105 ++++++++++++++++++ .../PangeanicTranslationService.java | 2 +- 4 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 translation-service-apache-tika/pom.xml create mode 100644 translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java diff --git a/pom.xml b/pom.xml index 25de0f24..9978d7fb 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,8 @@ translation-service-pangeanic translation-web translation-tests - + + translation-service-apache-tika diff --git a/translation-service-apache-tika/pom.xml b/translation-service-apache-tika/pom.xml new file mode 100644 index 00000000..ccb94694 --- /dev/null +++ b/translation-service-apache-tika/pom.xml @@ -0,0 +1,33 @@ + + 4.0.0 + + eu.europeana.api + translation-api + 0.0.1-SNAPSHOT + + translation-service-apache-tika + translation-service-apache-tika + The Java APIs for the Apache Tika language detection services (part of Translation API) + + + ${basedir}/../${aggregate.report.xml} + + + + + eu.europeana.api + translation-service-common + 0.0.1-SNAPSHOT + + + org.apache.tika + tika-core + 2.9.1 + + + org.apache.tika + tika-langdetect-optimaize + 2.9.1 + + + \ No newline at end of file diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java new file mode 100644 index 00000000..e460b1ba --- /dev/null +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -0,0 +1,105 @@ +package eu.europeana.translation.service.apachetika; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.tika.langdetect.optimaize.OptimaizeLangDetector; +import org.apache.tika.language.detect.LanguageDetector; +import org.apache.tika.language.detect.LanguageResult; +import eu.europeana.api.translation.service.LanguageDetectionService; +import eu.europeana.api.translation.service.exception.LanguageDetectionException; + +public class ApacheTikaLangDetectService implements LanguageDetectionService { + + protected static final Logger LOG = LogManager.getLogger(ApacheTikaLangDetectService.class); + private LanguageDetector detector; + private String serviceId; + + private Set supportedLanguages = Set.of("af", "an", "ar", "ast", "be", "br", "ca", "bg", + "bn", "cs", "cy", "da", "de", "el", "es", "et", "eu", "fa", "fi", "fr", "ga", "gl", "gu", "he", "hi", + "hr", "ht", "hu", "id", "is", "it", "ja", "km", "kn", "ko", "lt", "lv", "mk", "ml", "mr", "ms", "mt", + "ne", "nl", "no", "oc", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sr", "sv", "sw", "ta", "te", "th", "tl", + "tr", "uk", "ur", "vi", "wa", "yi", "zh-cn", "zh-tw"); + + public ApacheTikaLangDetectService() { + this.detector = new OptimaizeLangDetector().loadModels(); + } + + @Override + public boolean isSupported(String srcLang) { + return supportedLanguages.contains(srcLang); + } + + @Override + public List detectLang(List texts, String langHint) throws LanguageDetectionException { + if (texts.isEmpty()) { + return new ArrayList<>(); + } + + List detectedLangs = new ArrayList(); + for(String text : texts) { + + //returns all tika languages sorted by score + List tikaLanguages = this.detector.detectAll(text); + if(tikaLanguages.isEmpty()) { + detectedLangs.add(null); + continue; + } + + //if langHint is null, return the first detected language (has the highest confidence) + if(StringUtils.isBlank(langHint)) { + detectedLangs.add(tikaLanguages.get(0).getLanguage()); + continue; + } + + /* + * in case lang hint is not null, check if it myabe exists among the langs with the highest confidence, + * and if so return that as a detected lang + */ + String detectedLang=tikaLanguages.get(0).getLanguage(); + if(detectedLang.equals(langHint)) { + detectedLangs.add(langHint); + continue; + } + float confidence=tikaLanguages.get(0).getRawScore(); + for(int i=1;i toTranslatePerLa private List getObjectsWithSourceLanguage(List translationObjs, String sourceLanguage) { return translationObjs.stream() - .filter(to -> sourceLanguage.equals(to.getSourceLang())).toList(); + .filter(to -> sourceLanguage.equals(to.getSourceLang())).collect(Collectors.toList()); } From 174d3d71d1323d2fda60f4b9609b039101d29bf6 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Tue, 28 Nov 2023 10:21:39 +0100 Subject: [PATCH 02/25] apache-tika lang detect bean config and integration test --- .../ApacheTikaLangDetectService.java | 12 ++++------ .../DummyApacheTikaLangDetectService.java | 23 +++++++++++++++++++ .../tests/IntegrationTestUtils.java | 1 + .../tests/web/LangDetectionRestIT.java | 22 +++++++++++++++++- .../content/lang_detection_apache_tika.json | 8 +++++++ translation-web/pom.xml | 6 +++++ .../api/translation/config/BeanNames.java | 2 +- .../config/TranslationApiAutoconfig.java | 11 +++++++++ .../src/main/resources/translation.properties | 3 --- .../translation_service_configuration.json | 6 ++++- 10 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/DummyApacheTikaLangDetectService.java create mode 100644 translation-tests/src/integration-test/resources/content/lang_detection_apache_tika.json diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index e460b1ba..a368b3d2 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -19,7 +19,7 @@ public class ApacheTikaLangDetectService implements LanguageDetectionService { private String serviceId; private Set supportedLanguages = Set.of("af", "an", "ar", "ast", "be", "br", "ca", "bg", - "bn", "cs", "cy", "da", "de", "el", "es", "et", "eu", "fa", "fi", "fr", "ga", "gl", "gu", "he", "hi", + "bn", "cs", "cy", "da", "de", "el", "en", "es", "et", "eu", "fa", "fi", "fr", "ga", "gl", "gu", "he", "hi", "hr", "ht", "hu", "id", "is", "it", "ja", "km", "kn", "ko", "lt", "lv", "mk", "ml", "mr", "ms", "mt", "ne", "nl", "no", "oc", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sr", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "wa", "yi", "zh-cn", "zh-tw"); @@ -41,14 +41,12 @@ public List detectLang(List texts, String langHint) throws Langu List detectedLangs = new ArrayList(); for(String text : texts) { - //returns all tika languages sorted by score List tikaLanguages = this.detector.detectAll(text); if(tikaLanguages.isEmpty()) { detectedLangs.add(null); continue; } - //if langHint is null, return the first detected language (has the highest confidence) if(StringUtils.isBlank(langHint)) { detectedLangs.add(tikaLanguages.get(0).getLanguage()); @@ -57,17 +55,17 @@ public List detectLang(List texts, String langHint) throws Langu /* * in case lang hint is not null, check if it myabe exists among the langs with the highest confidence, - * and if so return that as a detected lang + * and if so return the langHint as a detected lang, if not return the first one */ String detectedLang=tikaLanguages.get(0).getLanguage(); - if(detectedLang.equals(langHint)) { + if(langHint.equals(detectedLang)) { detectedLangs.add(langHint); continue; } float confidence=tikaLanguages.get(0).getRawScore(); for(int i=1;i detectLang(List texts, String langHint) throws Langu } detectedLangs.add(detectedLang); } - return detectedLangs; - } @Override diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/DummyApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/DummyApacheTikaLangDetectService.java new file mode 100644 index 00000000..578658e6 --- /dev/null +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/DummyApacheTikaLangDetectService.java @@ -0,0 +1,23 @@ +package eu.europeana.translation.service.apachetika; + +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang3.StringUtils; +import eu.europeana.api.translation.service.exception.LanguageDetectionException; + +public class DummyApacheTikaLangDetectService extends ApacheTikaLangDetectService { + + public DummyApacheTikaLangDetectService() { + super(); + } + + @Override + public List detectLang(List texts, String langHint) throws LanguageDetectionException { + String value = StringUtils.isNotBlank(langHint) ? langHint : "en"; + ArrayList ret = new ArrayList<>(); + for (int i = 0; i < texts.size(); i++) { + ret.add(value); + } + return ret; + } +} diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/IntegrationTestUtils.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/IntegrationTestUtils.java index 682b4f13..0f8373b3 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/IntegrationTestUtils.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/IntegrationTestUtils.java @@ -11,6 +11,7 @@ public abstract class IntegrationTestUtils { public static final String BASE_URL_DETECT = "/detect"; public static final String LANG_DETECT_REQUEST = "/content/lang_detection_request.json"; + public static final String LANG_DETECT_APACHE_TIKA = "/content/lang_detection_apache_tika.json"; public static final String LANG_DETECT_PANGEANIC_REQUEST = "/content/pangeanic/detect/lang_detection_pangeanic_request.json"; public static final String LANG_DETECT_PANGEANIC_RESPONSE = "/content/pangeanic/detect/lang_detection_pangeanic_response.json"; diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java index 6185f7c2..fc471871 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java @@ -68,7 +68,27 @@ void langDetection() throws Exception { String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE); assertNotNull(serviceFieldValue); } - + + @Test + void langDetectionApacheTika() throws Exception { + String requestJson = getJsonStringInput(LANG_DETECT_APACHE_TIKA); + String result = mockMvc + .perform( + post(BASE_URL_DETECT) + .header(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE) + .content(requestJson)) + .andExpect(status().isOk()) + .andReturn().getResponse().getContentAsString(); + + assertNotNull(result); + JSONObject json = new JSONObject(result); + List langs = Collections.singletonList(json.getString(TranslationAppConstants.LANGS)); + assertTrue(langs.size()>0); + String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE); + assertNotNull(serviceFieldValue); + } + @Test void langDetectionGoogle() throws Exception { String requestJson = getJsonStringInput(LANG_DETECT_REQUEST_3); diff --git a/translation-tests/src/integration-test/resources/content/lang_detection_apache_tika.json b/translation-tests/src/integration-test/resources/content/lang_detection_apache_tika.json new file mode 100644 index 00000000..e7aace47 --- /dev/null +++ b/translation-tests/src/integration-test/resources/content/lang_detection_apache_tika.json @@ -0,0 +1,8 @@ +{ + "text": [ + "Ovo je tekst na srpskom jeziku", + "Das ist ein deutscher Text", + "This is an English text" + ], + "service": "APACHE-TIKA" +} \ No newline at end of file diff --git a/translation-web/pom.xml b/translation-web/pom.xml index 3c3541d6..1cd957ab 100644 --- a/translation-web/pom.xml +++ b/translation-web/pom.xml @@ -71,6 +71,12 @@ 0.0.1-SNAPSHOT + + eu.europeana.api + translation-service-apache-tika + 0.0.1-SNAPSHOT + + eu.europeana.api.commons commons-web diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/BeanNames.java b/translation-web/src/main/java/eu/europeana/api/translation/config/BeanNames.java index 4d7cfdb8..7a2168ba 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/BeanNames.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/BeanNames.java @@ -2,7 +2,7 @@ public interface BeanNames { - + String BEAN_APACHE_TIKA_LANG_DETECT_SERVICE = "apacheTikaLangDetectService"; String BEAN_PANGEANIC_LANG_DETECT_SERVICE = "pangeanicLangDetectService"; String BEAN_PANGEANIC_TRANSLATION_SERVICE = "pangeanicTranslationService"; String BEAN_GOOGLE_TRANSLATION_CLIENT_WRAPPER = "googleTranslationClientWrapper"; diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java index 04251553..d87f62aa 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java @@ -45,6 +45,8 @@ import eu.europeana.api.translation.service.pangeanic.PangeanicLangDetectService; import eu.europeana.api.translation.service.pangeanic.PangeanicTranslationService; import eu.europeana.api.translation.web.service.RedisCacheService; +import eu.europeana.translation.service.apachetika.ApacheTikaLangDetectService; +import eu.europeana.translation.service.apachetika.DummyApacheTikaLangDetectService; import io.lettuce.core.ClientOptions; import io.lettuce.core.SslOptions; @@ -99,6 +101,15 @@ public GoogleTranslationServiceClientWrapper getGoogleTranslationServiceClientWr translationConfig.getGoogleTranslateProjectId(), translationConfig.useGoogleHttpClient()); } + @Bean(BeanNames.BEAN_APACHE_TIKA_LANG_DETECT_SERVICE) + public ApacheTikaLangDetectService getApacheTikaLangDetectService() { + if (useDummyServices) { + return new DummyApacheTikaLangDetectService(); + } else { + return new ApacheTikaLangDetectService(); + } + } + @Bean(BeanNames.BEAN_PANGEANIC_LANG_DETECT_SERVICE) public PangeanicLangDetectService getPangeanicLangDetectService() { if (useDummyServices) { diff --git a/translation-web/src/main/resources/translation.properties b/translation-web/src/main/resources/translation.properties index df759cd4..b5ba568b 100644 --- a/translation-web/src/main/resources/translation.properties +++ b/translation-web/src/main/resources/translation.properties @@ -14,6 +14,3 @@ auth.write.enabled=true # preffer http over the grpc protocol translation.google.usehttpclient=false - -# For using dummy services -#use.dummy.services=true \ No newline at end of file diff --git a/translation-web/src/main/resources/translation_service_configuration.json b/translation-web/src/main/resources/translation_service_configuration.json index dfa609f2..d7811800 100644 --- a/translation-web/src/main/resources/translation_service_configuration.json +++ b/translation-web/src/main/resources/translation_service_configuration.json @@ -11,7 +11,11 @@ { "id": "GOOGLE" , "classname": "eu.europeana.api.translation.service.google.GoogleLangDetectService" - } + }, + { + "id": "APACHE-TIKA" + , "classname": "eu.europeana.translation.service.apachetika.ApacheTikaLangDetectService" + } ], "default": "PANGEANIC" }, From 18d687d86e183c5cea60338eb2061a2eb35b76ac Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Tue, 28 Nov 2023 11:52:55 +0100 Subject: [PATCH 03/25] added lang hint for apache-tika (comment out) --- .../apachetika/ApacheTikaLangDetectService.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index a368b3d2..8c47601f 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -38,6 +38,20 @@ public List detectLang(List texts, String langHint) throws Langu if (texts.isEmpty()) { return new ArrayList<>(); } + + /* + * this code can be used for testing the lang hint, but the setPriors map cannot be sent empty or null + */ +// try { +// Map languageProbabilities = new HashMap(); +// if(! StringUtils.isBlank(langHint)) { +// languageProbabilities.put(langHint, (float) 1.0); +// } +// this.detector.setPriors(languageProbabilities); +// } catch (IOException e) { +// throw new LanguageDetectionException( +// "Invalid setting of the language hint for the Apache-Tika service!", -1, e); +// } List detectedLangs = new ArrayList(); for(String text : texts) { From 80f85a32c5ed648dd3f295cade88e46aa05dc709 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Tue, 28 Nov 2023 13:31:15 +0100 Subject: [PATCH 04/25] small change to test --- .../service/apachetika/ApacheTikaLangDetectService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index 8c47601f..35ef8041 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -77,6 +77,7 @@ public List detectLang(List texts, String langHint) throws Langu continue; } float confidence=tikaLanguages.get(0).getRawScore(); + for(int i=1;i Date: Tue, 28 Nov 2023 14:08:47 +0100 Subject: [PATCH 05/25] update key generation #EA-3607 --- .../translation/tests/web/TestEncoding.java | 23 +++++++++++++++++++ .../web/service/RedisCacheService.java | 8 +++++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TestEncoding.java diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TestEncoding.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TestEncoding.java new file mode 100644 index 00000000..1c0f62a2 --- /dev/null +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TestEncoding.java @@ -0,0 +1,23 @@ +package eu.europeana.api.translation.tests.web; + +import java.util.Base64; +import com.google.common.primitives.Ints; + +public class TestEncoding { + + public static void main(String args[]) { + + // to use inputText.hashCode() in the implementation + int hascode = 1234567890; + + System.out.println(Base64.getEncoder().encodeToString(Ints.toByteArray(hascode)).trim()); + + System.out.println( + new String(Base64.getEncoder().withoutPadding().encode(Ints.toByteArray(hascode)))); + + StringBuilder builder = (new StringBuilder()).append("de").append("en"); + byte[] hash = Base64.getEncoder().withoutPadding().encode(Ints.toByteArray(hascode)); + builder.append(new String(hash)); + System.out.println("Redis Key: " + builder.toString()); + } +} diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index cd6043fb..b88ee46d 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -1,6 +1,7 @@ package eu.europeana.api.translation.web.service; import java.util.ArrayList; +import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -8,6 +9,7 @@ import org.apache.logging.log4j.Logger; import org.springframework.data.redis.connection.RedisConnectionFactory; import org.springframework.data.redis.core.RedisTemplate; +import com.google.common.primitives.Ints; import eu.europeana.api.translation.definitions.model.TranslationObj; import eu.europeana.api.translation.model.CachedTranslation; import io.micrometer.core.instrument.util.StringUtils; @@ -136,8 +138,10 @@ public void deleteAll() { * @return generated redis key */ public String generateRedisKey(String inputText, String sourceLang, String targetLang) { - String key = inputText + sourceLang + targetLang; - return String.valueOf(key.hashCode()); + StringBuilder builder = (new StringBuilder()).append(sourceLang).append(targetLang); + byte[] hash = Base64.getEncoder().withoutPadding().encode(Ints.toByteArray(inputText.hashCode())); + builder.append(new String(hash)); + return builder.toString(); } } From ee4142cf48eb708ed8a7f6d2ba22a844ce8a81d3 Mon Sep 17 00:00:00 2001 From: GordeaS Date: Tue, 28 Nov 2023 16:59:27 +0100 Subject: [PATCH 06/25] do not send translation request when language detection fails #EA-3625 --- .../pangeanic/PangeanicTranslationService.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java index 9171bf60..3b729d6b 100644 --- a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java +++ b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java @@ -105,8 +105,8 @@ public void translate(List translationObjs) throws TranslationEx return; } - // if source language is available for the first item it must be available for all if (translationObjs.get(0).getSourceLang() == null) { + // if the source language was not provided in the request, language detection needs to be called detectLanguages(translationObjs); } @@ -128,11 +128,6 @@ private void computeTranslations(List translationObjs) //the request has only one target language String targetLang = translationObjs.get(0).getTargetLang(); - //when language detection is used, some texts might still have no language (i.e. bellow threshold) - if(sourceLanguages.contains(null)) { - - } - for (String sourceLanguage : sourceLanguages) { if(sourceLanguages.size() == 1) { //not needed to iterate if all are in the same language, it will be only one translation request for all objects @@ -148,6 +143,12 @@ private void computeTranslations(List translationObjs) private void translateAndAccumulateResults(List toTranslatePerLanguage, String sourceLanguage, String targetLang) throws JSONException, TranslationException { + + if(sourceLanguage == null) { + //language not provided and not detected, skip translation request + return; + } + // send the translation request List translTexts = toTranslatePerLanguage.stream().map(to -> to.getText()).collect(Collectors.toList()); HttpPost translateRequest = PangeanicTranslationUtils.createTranslateRequest( From db28c7f689cb3d83a37e28b5e0b3caaa049be70b Mon Sep 17 00:00:00 2001 From: GordeaS Date: Tue, 28 Nov 2023 17:27:18 +0100 Subject: [PATCH 07/25] updating java version and caching in github actions #EA-3625 --- .github/workflows/build_test_analyse.yml | 26 ++++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index fdcf9ceb..2ef16a51 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -6,22 +6,26 @@ jobs: name: Build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: # Shallow clones should be disabled for a better relevancy of analysis fetch-depth: 0 - - name: Set up JDK 11 - uses: actions/setup-java@v1 + - name: Set up JDK 17 + uses: actions/setup-java@v3 with: - java-version: 11 - - name: Cache Maven packages - uses: actions/cache@v1 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 + distribution: 'temurin' #should use the same as in the docker file + java-version: 17 + cache: 'maven' + #cache-dependency-path: 'sub-project/pom.xml' # optional +# SG: likley not needed as integrated in the V3 of setupjava action +# - name: Cache Maven packages +# uses: actions/cache@v1 +# with: +# path: ~/.m2 +# key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} +# restore-keys: ${{ runner.os }}-m2 - name: Cache SonarCloud packages - uses: actions/cache@v1 + uses: actions/cache@v3 with: path: ~/.sonar/cache key: ${{ runner.os }}-sonar From 0027f932462a9b018221660abccb44b8e3ad9750 Mon Sep 17 00:00:00 2001 From: gsergiu <4517853+gsergiu@users.noreply.github.com> Date: Tue, 28 Nov 2023 17:31:01 +0100 Subject: [PATCH 08/25] Update build_test_analyse.yml --- .github/workflows/build_test_analyse.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index 2ef16a51..4a4868c6 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -17,7 +17,7 @@ jobs: java-version: 17 cache: 'maven' #cache-dependency-path: 'sub-project/pom.xml' # optional -# SG: likley not needed as integrated in the V3 of setupjava action +# SG: likely not needed as integrated in the V3 of setupjava action # - name: Cache Maven packages # uses: actions/cache@v1 # with: From ff99ace19da4e12e8070f970767605124cc3a75f Mon Sep 17 00:00:00 2001 From: GordeaS Date: Tue, 28 Nov 2023 17:59:47 +0100 Subject: [PATCH 09/25] fix broken alignement --- .github/workflows/build_test_analyse.yml | 13 ++++++------- pom.xml | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index 2ef16a51..99b7991b 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -17,13 +17,12 @@ jobs: java-version: 17 cache: 'maven' #cache-dependency-path: 'sub-project/pom.xml' # optional -# SG: likley not needed as integrated in the V3 of setupjava action -# - name: Cache Maven packages -# uses: actions/cache@v1 -# with: -# path: ~/.m2 -# key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} -# restore-keys: ${{ runner.os }}-m2 + #- name: Cache Maven packages # SG: likely not needed as integrated in the V3 of setupjava action + # uses: actions/cache@v1 + # with: + # path: ~/.m2 + # key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + # restore-keys: ${{ runner.os }}-m2 - name: Cache SonarCloud packages uses: actions/cache@v3 with: diff --git a/pom.xml b/pom.xml index 25de0f24..e94b7be0 100644 --- a/pom.xml +++ b/pom.xml @@ -48,6 +48,7 @@ 17 ${java.version} ${java.version} + ${java.version} 0.3.22-SNAPSHOT 1.3 From 569a5f762c36312329b83fb561cef3502ff9e16a Mon Sep 17 00:00:00 2001 From: GordeaS Date: Tue, 28 Nov 2023 18:08:31 +0100 Subject: [PATCH 10/25] test --- .github/workflows/build_test_analyse_17.yml | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/build_test_analyse_17.yml diff --git a/.github/workflows/build_test_analyse_17.yml b/.github/workflows/build_test_analyse_17.yml new file mode 100644 index 00000000..333903a7 --- /dev/null +++ b/.github/workflows/build_test_analyse_17.yml @@ -0,0 +1,32 @@ +name: Build, Run Tests and Sonar Analysis +on: push + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Shallow clones should be disabled for a better relevancy of analysis + fetch-depth: 0 + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' #should use the same as in the docker file + java-version: 17 + cache: 'maven' + #cache-dependency-path: 'sub-project/pom.xml' # optional + - name: Cache SonarCloud packages + uses: actions/cache@v3 + with: + path: ~/.sonar/cache + key: ${{ runner.os }}-sonar + restore-keys: ${{ runner.os }}-sonar + - name: Build, run tests and analyse + run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api + env: + # Needed to get some information about the pull request, if any + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # SonarCloud access token should be generated from https://sonarcloud.io/account/security/ + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} From b4cdba14ef77c3dfabea0026bd41a9b1587c842f Mon Sep 17 00:00:00 2001 From: gsergiu <4517853+gsergiu@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:10:40 +0100 Subject: [PATCH 11/25] Update build_test_analyse.yml --- .github/workflows/build_test_analyse.yml | 25 +++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index 99b7991b..fdcf9ceb 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -6,25 +6,22 @@ jobs: name: Build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v2 with: # Shallow clones should be disabled for a better relevancy of analysis fetch-depth: 0 - - name: Set up JDK 17 - uses: actions/setup-java@v3 + - name: Set up JDK 11 + uses: actions/setup-java@v1 with: - distribution: 'temurin' #should use the same as in the docker file - java-version: 17 - cache: 'maven' - #cache-dependency-path: 'sub-project/pom.xml' # optional - #- name: Cache Maven packages # SG: likely not needed as integrated in the V3 of setupjava action - # uses: actions/cache@v1 - # with: - # path: ~/.m2 - # key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - # restore-keys: ${{ runner.os }}-m2 + java-version: 11 + - name: Cache Maven packages + uses: actions/cache@v1 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 - name: Cache SonarCloud packages - uses: actions/cache@v3 + uses: actions/cache@v1 with: path: ~/.sonar/cache key: ${{ runner.os }}-sonar From 9db7ed4bde75ca022d94980b628280515003107d Mon Sep 17 00:00:00 2001 From: GordeaS Date: Tue, 28 Nov 2023 18:32:58 +0100 Subject: [PATCH 12/25] test --- .github/workflows/build_test_analyse.yml | 73 ++++++++++++------------ pom.xml | 12 ++++ 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index 99b7991b..cb7a1230 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -1,38 +1,35 @@ -name: Build, Run Tests and Sonar Analysis -on: push - -jobs: - build: - name: Build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - # Shallow clones should be disabled for a better relevancy of analysis - fetch-depth: 0 - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - distribution: 'temurin' #should use the same as in the docker file - java-version: 17 - cache: 'maven' - #cache-dependency-path: 'sub-project/pom.xml' # optional - #- name: Cache Maven packages # SG: likely not needed as integrated in the V3 of setupjava action - # uses: actions/cache@v1 - # with: - # path: ~/.m2 - # key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - # restore-keys: ${{ runner.os }}-m2 - - name: Cache SonarCloud packages - uses: actions/cache@v3 - with: - path: ~/.sonar/cache - key: ${{ runner.os }}-sonar - restore-keys: ${{ runner.os }}-sonar - - name: Build, run tests and analyse - run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api - env: - # Needed to get some information about the pull request, if any - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # SonarCloud access token should be generated from https://sonarcloud.io/account/security/ - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} +name: Build, Run Tests and Sonar Analysis +on: push + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + # Shallow clones should be disabled for a better relevancy of analysis + fetch-depth: 0 + - name: Set up JDK 17 + uses: actions/setup-java@v1 + with: + java-version: 17 + - name: Cache Maven packages + uses: actions/cache@v1 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Cache SonarCloud packages + uses: actions/cache@v1 + with: + path: ~/.sonar/cache + key: ${{ runner.os }}-sonar + restore-keys: ${{ runner.os }}-sonar + - name: Build, run tests and analyse + run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api + env: + # Needed to get some information about the pull request, if any + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # SonarCloud access token should be generated from https://sonarcloud.io/account/security/ + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} \ No newline at end of file diff --git a/pom.xml b/pom.xml index e94b7be0..f8afb338 100644 --- a/pom.xml +++ b/pom.xml @@ -273,6 +273,18 @@ + + true + org.apache.maven.plugins + maven-compiler-plugin + + + ${version.java} + ${version.java} + + org.apache.maven.plugins maven-surefire-plugin From ef52ad3429f50fbccc0614a89032fec7ed0d7a0f Mon Sep 17 00:00:00 2001 From: GordeaS Date: Wed, 29 Nov 2023 10:07:47 +0100 Subject: [PATCH 13/25] updating the JDK to 17 and actions version to 3 #EA-3625 --- .github/workflows/build_test_analyse.yml | 15 ++++------ .github/workflows/build_test_analyse_17.yml | 32 --------------------- 2 files changed, 6 insertions(+), 41 deletions(-) delete mode 100644 .github/workflows/build_test_analyse_17.yml diff --git a/.github/workflows/build_test_analyse.yml b/.github/workflows/build_test_analyse.yml index cb7a1230..a548094b 100644 --- a/.github/workflows/build_test_analyse.yml +++ b/.github/workflows/build_test_analyse.yml @@ -6,22 +6,19 @@ jobs: name: Build runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: # Shallow clones should be disabled for a better relevancy of analysis fetch-depth: 0 - name: Set up JDK 17 - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: + distribution: 'temurin' #should use the same as in the docker file java-version: 17 - - name: Cache Maven packages - uses: actions/cache@v1 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 + cache: 'maven' + cache-dependency-path: 'sub-project/pom.xml' # optional - name: Cache SonarCloud packages - uses: actions/cache@v1 + uses: actions/cache@v3 with: path: ~/.sonar/cache key: ${{ runner.os }}-sonar diff --git a/.github/workflows/build_test_analyse_17.yml b/.github/workflows/build_test_analyse_17.yml deleted file mode 100644 index 333903a7..00000000 --- a/.github/workflows/build_test_analyse_17.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Build, Run Tests and Sonar Analysis -on: push - -jobs: - build: - name: Build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - # Shallow clones should be disabled for a better relevancy of analysis - fetch-depth: 0 - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - distribution: 'temurin' #should use the same as in the docker file - java-version: 17 - cache: 'maven' - #cache-dependency-path: 'sub-project/pom.xml' # optional - - name: Cache SonarCloud packages - uses: actions/cache@v3 - with: - path: ~/.sonar/cache - key: ${{ runner.os }}-sonar - restore-keys: ${{ runner.os }}-sonar - - name: Build, run tests and analyse - run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api - env: - # Needed to get some information about the pull request, if any - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # SonarCloud access token should be generated from https://sonarcloud.io/account/security/ - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} From b583bc042164326818078ed377113c84fe7de1e6 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 11:05:18 +0100 Subject: [PATCH 14/25] improved sonar bugs --- .../service/apachetika/ApacheTikaLangDetectService.java | 2 +- .../service/pangeanic/PangeanicTranslationService.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index 35ef8041..cbb7fb99 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -79,7 +79,7 @@ public List detectLang(List texts, String langHint) throws Langu float confidence=tikaLanguages.get(0).getRawScore(); for(int i=1;i=confidence) { if(langHint.equals(tikaLanguages.get(i).getLanguage())) { detectedLang=langHint; break; diff --git a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java index 001e16e9..a5c37e74 100644 --- a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java +++ b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java @@ -185,8 +185,7 @@ private void detectLanguages(List translationObjs) throws Transl // verify language detection response if (detectedLanguages == null || detectedLanguages.contains(null) || detectedLanguages.size() != translationObjs.size()) { throw new TranslationException( - "The translation cannot be performed. Detected languaged are incomplete. Expected " - + translationObjs.size() + " but received: " + detectedLanguages.size()); + "The translation cannot be performed. A list of detected languages is null or contains nulls."); } if (LOG.isDebugEnabled()) { From 7a5724010ed15d45d74cf7c5f6fc625e44135d36 Mon Sep 17 00:00:00 2001 From: GordeaS Date: Wed, 29 Nov 2023 11:18:31 +0100 Subject: [PATCH 15/25] add error code verification to error tests #EA-3567 --- .../europeana/api/translation/tests/web/LangDetectionRestIT.java | 1 + .../europeana/api/translation/tests/web/TranslationRestIT.java | 1 + 2 files changed, 2 insertions(+) diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java index 6185f7c2..1dd263a7 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java @@ -118,6 +118,7 @@ void langDetectionMissingTextParam() throws Exception { JSONObject obj = new JSONObject(response); Assertions.assertEquals(obj.get("success"), false); Assertions.assertEquals(obj.get("status"), HttpStatus.BAD_REQUEST.value()); + Assertions.assertEquals(obj.get("code"), "mandatory_param_empty"); Assertions.assertTrue(obj.has("error")); Assertions.assertTrue(obj.has("message")); Assertions.assertTrue(obj.has("timestamp")); diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java index 608e5685..c2ecce34 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java @@ -245,6 +245,7 @@ void translateErrorNoTarget() throws Exception { JSONObject obj = new JSONObject(response); Assertions.assertEquals(obj.get("success"), false); Assertions.assertEquals(obj.get("status"), HttpStatus.BAD_REQUEST.value()); + Assertions.assertEquals(obj.get("code"), "mandatory_param_empty"); Assertions.assertTrue(obj.has("error")); Assertions.assertTrue(obj.has("message")); Assertions.assertTrue(obj.has("timestamp")); From cea3a6ae157208383dba1f4f9a9fd7d99f169946 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 14:24:07 +0100 Subject: [PATCH 16/25] apache-tika language detect small code improvements --- pom.xml | 1 + translation-service-apache-tika/pom.xml | 4 +- .../ApacheTikaLangDetectService.java | 57 +++++++++++-------- .../tests/web/LangDetectionRestIT.java | 7 ++- 4 files changed, 40 insertions(+), 29 deletions(-) diff --git a/pom.xml b/pom.xml index 9978d7fb..7372ca68 100644 --- a/pom.xml +++ b/pom.xml @@ -75,6 +75,7 @@ **/model/**/* translation-tests/target/site/jacoco-aggregate/jacoco.xml ${aggregate.report.xml} + 2.9.1 diff --git a/translation-service-apache-tika/pom.xml b/translation-service-apache-tika/pom.xml index ccb94694..3a14eb40 100644 --- a/translation-service-apache-tika/pom.xml +++ b/translation-service-apache-tika/pom.xml @@ -22,12 +22,12 @@ org.apache.tika tika-core - 2.9.1 + ${apache.tika.version} org.apache.tika tika-langdetect-optimaize - 2.9.1 + ${apache.tika.version} \ No newline at end of file diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index cbb7fb99..27cbeb47 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -1,6 +1,7 @@ package eu.europeana.translation.service.apachetika; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -36,11 +37,13 @@ public boolean isSupported(String srcLang) { @Override public List detectLang(List texts, String langHint) throws LanguageDetectionException { if (texts.isEmpty()) { - return new ArrayList<>(); + return Collections.emptyList(); } /* - * this code can be used for testing the lang hint, but the setPriors map cannot be sent empty or null + * this code can be used for testing the lang hint, but the setPriors map cannot be sent empty or null, + * and once it is set, it stays for the next call as well, so to empty it, the detector would probably + * need to be recreated */ // try { // Map languageProbabilities = new HashMap(); @@ -54,9 +57,10 @@ public List detectLang(List texts, String langHint) throws Langu // } List detectedLangs = new ArrayList(); + List tikaLanguages=null; for(String text : texts) { //returns all tika languages sorted by score - List tikaLanguages = this.detector.detectAll(text); + tikaLanguages = this.detector.detectAll(text); if(tikaLanguages.isEmpty()) { detectedLangs.add(null); continue; @@ -67,31 +71,36 @@ public List detectLang(List texts, String langHint) throws Langu continue; } - /* - * in case lang hint is not null, check if it myabe exists among the langs with the highest confidence, - * and if so return the langHint as a detected lang, if not return the first one - */ - String detectedLang=tikaLanguages.get(0).getLanguage(); - if(langHint.equals(detectedLang)) { - detectedLangs.add(langHint); - continue; - } - float confidence=tikaLanguages.get(0).getRawScore(); + detectedLangs.add(getDetectedLangByHint(tikaLanguages, langHint)); - for(int i=1;i=confidence) { - if(langHint.equals(tikaLanguages.get(i).getLanguage())) { - detectedLang=langHint; - break; - } - } - else { + } + return detectedLangs; + } + + /* + * In case lang hint is not null, check if it myabe exists among the langs with the highest confidence, + * and if so return the langHint as a detected lang, if not return the first one. + * The lang hint param cannot be null. + */ + private String getDetectedLangByHint(List tikaLanguages, String langHint) { + String detectedLang=tikaLanguages.get(0).getLanguage(); + if(langHint.equals(detectedLang)) { + return langHint; + } + + float confidence=tikaLanguages.get(0).getRawScore(); + for(int i=1;i=confidence) { + if(langHint.equals(tikaLanguages.get(i).getLanguage())) { + detectedLang=langHint; break; - } + } } - detectedLangs.add(detectedLang); + else { + break; + } } - return detectedLangs; + return detectedLang; } @Override diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java index fc471871..f62c962b 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.Collections; import java.util.List; +import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONObject; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -83,10 +84,10 @@ void langDetectionApacheTika() throws Exception { assertNotNull(result); JSONObject json = new JSONObject(result); - List langs = Collections.singletonList(json.getString(TranslationAppConstants.LANGS)); - assertTrue(langs.size()>0); + JSONArray langs = json.getJSONArray(TranslationAppConstants.LANGS); + assertTrue(langs.length()==3 && "hr".equals(langs.getString(0)) && "de".equals(langs.getString(1)) && "en".equals(langs.getString(2))); String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE); - assertNotNull(serviceFieldValue); + assertTrue("APACHE-TIKA".equals(serviceFieldValue)); } @Test From 192b5a4fc5432f72c20fc7094d53cdd2e1013dfd Mon Sep 17 00:00:00 2001 From: gsergiu <4517853+gsergiu@users.noreply.github.com> Date: Wed, 29 Nov 2023 14:56:35 +0100 Subject: [PATCH 17/25] Update PangeanicTranslationService.java removed the collectors indirection --- .../service/pangeanic/PangeanicTranslationService.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java index a5c37e74..3b87ffce 100644 --- a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java +++ b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpStatus; import org.apache.http.client.ClientProtocolException; @@ -122,7 +121,7 @@ private void computeTranslations(List translationObjs) throws JSONException, TranslationException { //collect source languages, they might be multiple - Set sourceLanguages = translationObjs.stream().map(to -> to.getSourceLang()).collect(Collectors.toSet()); + Set sourceLanguages = translationObjs.stream().map(to -> to.getSourceLang()).toSet(); List toTranslatePerLanguage; //the request has only one target language @@ -150,7 +149,7 @@ private void translateAndAccumulateResults(List toTranslatePerLa } // send the translation request - List translTexts = toTranslatePerLanguage.stream().map(to -> to.getText()).collect(Collectors.toList()); + List translTexts = toTranslatePerLanguage.stream().map(to -> to.getText()).toList(); HttpPost translateRequest = PangeanicTranslationUtils.createTranslateRequest( getExternalServiceEndPoint(), translTexts, targetLang, sourceLanguage, ""); @@ -161,7 +160,7 @@ private void translateAndAccumulateResults(List toTranslatePerLa private List getObjectsWithSourceLanguage(List translationObjs, String sourceLanguage) { return translationObjs.stream() - .filter(to -> sourceLanguage.equals(to.getSourceLang())).collect(Collectors.toList()); + .filter(to -> sourceLanguage.equals(to.getSourceLang())).toList(); } @@ -173,7 +172,7 @@ private void detectLanguages(List translationObjs) throws Transl // detect languages List texts = - translationObjs.stream().map(to -> to.getText()).collect(Collectors.toList()); + translationObjs.stream().map(to -> to.getText()).toList(); List detectedLanguages = null; try { detectedLanguages = langDetectService.detectLang(texts, null); From f0757324700f12239d61153f2ff58445b8e9be3a Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 15:03:51 +0100 Subject: [PATCH 18/25] small code improvement --- .../service/pangeanic/PangeanicTranslationService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java index 3b87ffce..b0c52725 100644 --- a/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java +++ b/translation-service-pangeanic/src/main/java/eu/europeana/api/translation/service/pangeanic/PangeanicTranslationService.java @@ -1,6 +1,7 @@ package eu.europeana.api.translation.service.pangeanic; import java.io.IOException; +import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -121,7 +122,7 @@ private void computeTranslations(List translationObjs) throws JSONException, TranslationException { //collect source languages, they might be multiple - Set sourceLanguages = translationObjs.stream().map(to -> to.getSourceLang()).toSet(); + Set sourceLanguages = new HashSet<>(translationObjs.stream().map(to -> to.getSourceLang()).toList()); List toTranslatePerLanguage; //the request has only one target language From 1c2773221491efa99d97ad1df15bd530f59b8b94 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 17:05:34 +0100 Subject: [PATCH 19/25] small refactoring of the apache tika lang detect --- .../ApacheTikaLangDetectService.java | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index 27cbeb47..9742f378 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -61,33 +61,30 @@ public List detectLang(List texts, String langHint) throws Langu for(String text : texts) { //returns all tika languages sorted by score tikaLanguages = this.detector.detectAll(text); - if(tikaLanguages.isEmpty()) { - detectedLangs.add(null); - continue; - } - //if langHint is null, return the first detected language (has the highest confidence) - if(StringUtils.isBlank(langHint)) { - detectedLangs.add(tikaLanguages.get(0).getLanguage()); - continue; - } - detectedLangs.add(getDetectedLangByHint(tikaLanguages, langHint)); + detectedLangs.add(chooseDetectedLang(tikaLanguages, langHint)); } return detectedLangs; } - /* + /** * In case lang hint is not null, check if it myabe exists among the langs with the highest confidence, * and if so return the langHint as a detected lang, if not return the first one. - * The lang hint param cannot be null. */ - private String getDetectedLangByHint(List tikaLanguages, String langHint) { + private String chooseDetectedLang(List tikaLanguages, String langHint) { + if(tikaLanguages.isEmpty()) { + return null; + } + //if langHint is null, return the first detected language (has the highest confidence) + if(StringUtils.isBlank(langHint)) { + return tikaLanguages.get(0).getLanguage(); + } + String detectedLang=tikaLanguages.get(0).getLanguage(); if(langHint.equals(detectedLang)) { return langHint; } - float confidence=tikaLanguages.get(0).getRawScore(); for(int i=1;i=confidence) { From 720a6c7e876101769850c90059709cb9ae6fb7b2 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 17:16:51 +0100 Subject: [PATCH 20/25] improved code smells --- .../ApacheTikaLangDetectService.java | 21 ++----------------- .../tests/web/LangDetectionRestIT.java | 3 ++- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index 9742f378..3b6fb1ce 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -39,24 +39,8 @@ public List detectLang(List texts, String langHint) throws Langu if (texts.isEmpty()) { return Collections.emptyList(); } - - /* - * this code can be used for testing the lang hint, but the setPriors map cannot be sent empty or null, - * and once it is set, it stays for the next call as well, so to empty it, the detector would probably - * need to be recreated - */ -// try { -// Map languageProbabilities = new HashMap(); -// if(! StringUtils.isBlank(langHint)) { -// languageProbabilities.put(langHint, (float) 1.0); -// } -// this.detector.setPriors(languageProbabilities); -// } catch (IOException e) { -// throw new LanguageDetectionException( -// "Invalid setting of the language hint for the Apache-Tika service!", -1, e); -// } - List detectedLangs = new ArrayList(); + List detectedLangs = new ArrayList<>(); List tikaLanguages=null; for(String text : texts) { //returns all tika languages sorted by score @@ -92,8 +76,7 @@ private String chooseDetectedLang(List tikaLanguages, String lan detectedLang=langHint; break; } - } - else { + } else { break; } } diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java index 0cfbcd15..850fb7d2 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/LangDetectionRestIT.java @@ -1,5 +1,6 @@ package eu.europeana.api.translation.tests.web; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; @@ -87,7 +88,7 @@ void langDetectionApacheTika() throws Exception { JSONArray langs = json.getJSONArray(TranslationAppConstants.LANGS); assertTrue(langs.length()==3 && "hr".equals(langs.getString(0)) && "de".equals(langs.getString(1)) && "en".equals(langs.getString(2))); String serviceFieldValue = json.getString(TranslationAppConstants.SERVICE); - assertTrue("APACHE-TIKA".equals(serviceFieldValue)); + assertEquals("APACHE-TIKA", serviceFieldValue); } @Test From 8a13b02ad40b079a39cd6a366c896c431cd4c637 Mon Sep 17 00:00:00 2001 From: SrdjanStevanetic Date: Wed, 29 Nov 2023 17:45:33 +0100 Subject: [PATCH 21/25] improved sonar bugs and smells --- .../definitions/exceptions/InvalidLanguageException.java | 4 ++++ .../api/translation/definitions/language/Language.java | 4 ++-- .../service/apachetika/ApacheTikaLangDetectService.java | 2 +- .../api/translation/web/service/RedisCacheService.java | 3 ++- .../api/translation/web/service/TranslationWebService.java | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/exceptions/InvalidLanguageException.java b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/exceptions/InvalidLanguageException.java index 144cc607..d04d18ff 100644 --- a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/exceptions/InvalidLanguageException.java +++ b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/exceptions/InvalidLanguageException.java @@ -10,4 +10,8 @@ public class InvalidLanguageException extends Exception{ public InvalidLanguageException(String message) { super(message); } + + public InvalidLanguageException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/Language.java b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/Language.java index f135f600..1e754405 100644 --- a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/Language.java +++ b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/Language.java @@ -45,7 +45,7 @@ public static Language validateSingle(String languageAbbrevation) throws Invalid try { result = Language.valueOf(languageAbbrevation.trim().toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { - throw new InvalidLanguageException("Language value '" + languageAbbrevation + "' is not valid"); + throw new InvalidLanguageException("Language value '" + languageAbbrevation + "' is not valid", e); } return result; } @@ -62,8 +62,8 @@ public static List validateMultiple(String languageAbbrevations) throw throw new InvalidLanguageException("Empty language value"); } - List result = new ArrayList<>(); String[] languages = languageAbbrevations.split(SEPARATOR); + List result = new ArrayList<>(languages.length); for (String language: languages) { result.add(validateSingle(language)); } diff --git a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java index 3b6fb1ce..14dd70bd 100644 --- a/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java +++ b/translation-service-apache-tika/src/main/java/eu/europeana/translation/service/apachetika/ApacheTikaLangDetectService.java @@ -40,7 +40,7 @@ public List detectLang(List texts, String langHint) throws Langu return Collections.emptyList(); } - List detectedLangs = new ArrayList<>(); + List detectedLangs = new ArrayList<>(texts.size()); List tikaLanguages=null; for(String text : texts) { //returns all tika languages sorted by score diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index b88ee46d..73906d03 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -53,8 +53,9 @@ public void fillWithCachedTranslations(List translationObjs) { if (redisResponse == null || redisResponse.size() != cacheableTranslations.size()) { // ensure that the response size corresponds to request size // this should not happen, but better use defensive programming + int redisSize=redisResponse==null ? 0 : redisResponse.size(); logger.warn("Redis response size {} doesn't match the request size{}, for keys: {}", - redisResponse.size(), cacheableTranslations.size(), cacheKeys); + redisSize, cacheableTranslations.size(), cacheKeys); return; } diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/TranslationWebService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/TranslationWebService.java index d4ac76b1..2f4c18af 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/TranslationWebService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/TranslationWebService.java @@ -125,7 +125,7 @@ CachedTranslationService instantiateCachedTranslationService(boolean useCaching, private List buildTranslationObjectList(TranslationRequest translationRequest) { // create a list of objects to be translated - List translObjs = new ArrayList(); + List translObjs = new ArrayList(translationRequest.getText().size()); for (String inputText : translationRequest.getText()) { TranslationObj newTranslObj = new TranslationObj(); newTranslObj.setSourceLang(translationRequest.getSource()); From 738beba9d7e0057be1bc038fb039ab447996b28c Mon Sep 17 00:00:00 2001 From: GordeaS Date: Thu, 30 Nov 2023 09:41:40 +0100 Subject: [PATCH 22/25] fix for isCacheable condition #EA-3484 --- .../api/translation/web/service/RedisCacheService.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index 73906d03..ac376f7d 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -83,9 +83,15 @@ private void updateFromCachedTranslation(TranslationObj translationObj, } } + /** + * verifies is the source language and translations are available in the object + * NOTE: currently we rely that the calling methods are verifying the availability of the target language and original text + * @param translationObj the translation object to verify if it should be cached + * @return true is source language and translation are available + */ private boolean isCacheable(TranslationObj translationObj) { return translationObj.getSourceLang() != null - && StringUtils.isNotEmpty(translationObj.getText()); + && StringUtils.isNotEmpty(translationObj.getTranslation()); } /** From c6523c803c1395d9f4d1ea3a9f3317e0c7bb5402 Mon Sep 17 00:00:00 2001 From: GordeaS Date: Thu, 30 Nov 2023 11:30:15 +0100 Subject: [PATCH 23/25] fisx for isCacheable amd added hasTranslation() #EA-3484 --- .../tests/web/TranslationRestIT.java | 4 +++- .../web/service/CachedTranslationService.java | 2 +- .../web/service/RedisCacheService.java | 20 +++++++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java index c2ecce34..c14ede40 100644 --- a/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java +++ b/translation-tests/src/integration-test/java/eu/europeana/api/translation/tests/web/TranslationRestIT.java @@ -181,7 +181,9 @@ void translationWithCaching() throws Exception { //check that there are data in the cache redisCacheService.fillWithCachedTranslations(translObjs); - assertTrue(translObjs.stream().filter(el -> el.getIsCached()).collect(Collectors.toList()).size()==2); + final List cachedTranslations = translObjs.stream().filter(el -> el.getIsCached()).toList(); + //check if all are availble in the cache + assertTrue(cachedTranslations.size() == translObjs.size()); String cachedResult = mockMvc .perform( diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/CachedTranslationService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/CachedTranslationService.java index b583350c..14517d37 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/CachedTranslationService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/CachedTranslationService.java @@ -48,7 +48,7 @@ public void translate(List translationObjs) throws TranslationEx } List toTranslate = translationObjs.stream().filter( - t -> t.getTranslation() == null).collect(Collectors.toList()); + t -> t.getTranslation() == null).toList(); if(toTranslate.isEmpty()) { //all entries retrieved from cache, processing complete diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index ac376f7d..712270f4 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -84,16 +84,28 @@ private void updateFromCachedTranslation(TranslationObj translationObj, } /** - * verifies is the source language and translations are available in the object + * verifies is the source language and text are available in the object + * This method is used both for for verifying the cacheability for retrieval and for storage * NOTE: currently we rely that the calling methods are verifying the availability of the target language and original text * @param translationObj the translation object to verify if it should be cached - * @return true is source language and translation are available + * @param checkTranslationAvailable indicate if the availability of the translation needs to be checked (use true when storing and false ) + * @return true is source language and text are available */ private boolean isCacheable(TranslationObj translationObj) { return translationObj.getSourceLang() != null - && StringUtils.isNotEmpty(translationObj.getTranslation()); + && StringUtils.isNotEmpty(translationObj.getText()); } + /** + * This method indicates if the object has the target language and the translation available + * @param translationObj object to verify + * @return true is both the target language and the translation are available + */ + private boolean hasTranslation(TranslationObj translationObj) { + return translationObj.getTargetLang() != null + && StringUtils.isNotEmpty(translationObj.getTranslation()); + } + /** * Method to store translations into the cache. Only objects that are not marked as existing in the cache and fullfiling the {@link #isCacheable(TranslationObj)} criteria will be written into the cache * @param translationObjs the translations to be written into the cache @@ -102,7 +114,7 @@ public void store(List translationObjs) { Map valueMap = new HashMap<>(); String key; for (TranslationObj translObj : translationObjs) { - if (isCacheable(translObj) && !translObj.getIsCached()) { + if (isCacheable(translObj) && hasTranslation(translObj) && !translObj.getIsCached()) { // String key = translObj.getCacheKey(); key = generateRedisKey(translObj.getText(), translObj.getSourceLang(), translObj.getTargetLang()); From 812b3eb1e867431beb05fc45694db7451548ab96 Mon Sep 17 00:00:00 2001 From: GordeaS Date: Thu, 30 Nov 2023 17:04:55 +0100 Subject: [PATCH 24/25] addressing code quality issues, remove warnings which are false positives #EA-3626 --- .../definitions/language/LanguagePair.java | 86 ++++++++++--------- .../google/GoogleTranslationService.java | 3 +- .../config/TranslationApiAutoconfig.java | 1 + .../translation/config/TranslationConfig.java | 2 +- .../api/translation/config/WebMvcConfig.java | 1 + .../serialization/JsonRedisSerializer.java | 2 +- .../web/TranslationErrorController.java | 3 +- .../web/service/RedisCacheService.java | 3 +- 8 files changed, 55 insertions(+), 46 deletions(-) diff --git a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java index 7bd91e89..d73b8876 100644 --- a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java +++ b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java @@ -5,61 +5,65 @@ import eu.europeana.api.translation.definitions.vocabulary.TranslationAppConstants; /** - * Class to hold the Language pair values supported by the Translation services - * Mostly for the future Translation API, when we have more than one translation service + * Class to hold the Language pair values supported by the Translation services Mostly for the + * future Translation API, when we have more than one translation service * * @author Hugo * @since 5 Apr 2023 */ public class LanguagePair implements Comparable { - private String srcLang; - private String targetLang; + private String srcLang; + private String targetLang; - public LanguagePair(String srcLang, @NotNull String targetLang) { - this.srcLang = srcLang; - this.targetLang = targetLang; + public LanguagePair(String srcLang, @NotNull String targetLang) { + this.srcLang = srcLang; + this.targetLang = targetLang; + } + + public String getSrcLang() { + return srcLang; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; } - public String getSrcLang() { - return srcLang; + if (!(this.getClass() == obj.getClass())) { + return false; } - @Override - public boolean equals(Object obj) { - if (!(obj instanceof LanguagePair)) { - return false; - } + LanguagePair pair = (LanguagePair) obj; + return StringUtils.equals(targetLang, pair.targetLang) + && StringUtils.equals(srcLang, pair.srcLang); + } - LanguagePair pair = (LanguagePair) obj; - return StringUtils.equals(targetLang, pair.targetLang) - && StringUtils.equals(srcLang, pair.srcLang); + @Override + public int compareTo(LanguagePair pair) { + int ret = targetLang.compareTo(pair.targetLang); + if (ret == 0) { + ret = StringUtils.compare(srcLang, pair.srcLang); } + return ret; + } - @Override - public int compareTo(LanguagePair pair) { - int ret = targetLang.compareTo(pair.targetLang); - if(ret == 0) { - ret = StringUtils.compare(srcLang, pair.srcLang); - } - return ret; - } - - @Override - public String toString() { - return generateKey(srcLang, targetLang); - } + @Override + public String toString() { + return generateKey(srcLang, targetLang); + } - public static String generateKey(String srcLang, String targetLang) { - return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang; - } - - @Override - public int hashCode() { - return srcLang==null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode(); - } + public static String generateKey(String srcLang, String targetLang) { + return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang; + } - public String getTargetLang() { - return targetLang; - } + @Override + public int hashCode() { + return srcLang == null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode(); + } + + public String getTargetLang() { + return targetLang; + } } diff --git a/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java b/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java index 8ae0666a..600f15ef 100644 --- a/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java +++ b/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java @@ -7,6 +7,7 @@ import com.google.cloud.translate.v3.TranslateTextRequest.Builder; import com.google.cloud.translate.v3.TranslateTextResponse; import com.google.cloud.translate.v3.Translation; +import com.google.cloud.translate.v3.TranslationOrBuilder; import eu.europeana.api.translation.definitions.model.TranslationObj; import eu.europeana.api.translation.service.AbstractTranslationService; import eu.europeana.api.translation.service.exception.TranslationException; @@ -71,7 +72,7 @@ public void translate(List translationObjs) throws TranslationEx } - private void updateFromTranslation( TranslationObj translationObj, Translation translation) { + private void updateFromTranslation( TranslationObj translationObj, TranslationOrBuilder translation) { if(translationObj.getSourceLang()==null) { translationObj.setSourceLang(translation.getDetectedLanguageCode()); } diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java index d87f62aa..5fda80ea 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java @@ -175,6 +175,7 @@ private LettuceConnectionFactory getRedisConnectionFactory() { // if redis secure protocol is used (rediss vs. redis) boolean sslEnabled = translationConfig.getRedisConnectionUrl().startsWith("rediss"); if (sslEnabled) { + @SuppressWarnings("external_findsecbugs:PATH_TRAVERSAL_IN") // the trustore path is not user input but application config final File truststore = new File(FilenameUtils.normalize(translationConfig.getTruststorePath())); SslOptions sslOptions = SslOptions.builder().jdkSslProvider() .truststore(truststore, diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java index 114c043d..d321005f 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java @@ -19,7 +19,7 @@ @PropertySource(value = "translation.user.properties", ignoreResourceNotFound = true)}) public class TranslationConfig{ - private final Logger LOG = LogManager.getLogger(TranslationConfig.class); + private static final Logger LOG = LogManager.getLogger(TranslationConfig.class); /** Matches spring.profiles.active property in test/resource application.properties file */ public static final String ACTIVE_TEST_PROFILE = "test"; diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java index 017d27fe..ced49a62 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java @@ -19,6 +19,7 @@ public class WebMvcConfig implements WebMvcConfigurer { /** Setup CORS for all GET, HEAD and OPTIONS, requests. */ @Override + @SuppressWarnings("external_findsecbugs:PERMISSIVE_CORS") //the API is public public void addCorsMappings(CorsRegistry registry) { registry .addMapping("/**") diff --git a/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java b/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java index 74ef586a..a67ecadf 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java @@ -39,7 +39,7 @@ public CachedTranslation deserialize(byte[] bytes) throws SerializationException try { return om.readValue(bytes, CachedTranslation.class); } catch (IOException e) { - throw new SerializationException(e.getMessage(), e); + throw new SerializationException("Cannot deserialize redis response: " + e.getMessage()); } } diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java b/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java index 596ce47b..216dfc71 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java @@ -6,6 +6,7 @@ import org.springframework.boot.web.error.ErrorAttributeOptions; import org.springframework.boot.web.servlet.error.ErrorAttributes; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.RestController; import eu.europeana.api.commons.web.http.HttpHeaders; @@ -21,7 +22,7 @@ public TranslationErrorController(ErrorAttributes errorAttributes) { } - @RequestMapping(value = "/error", produces = {HttpHeaders.CONTENT_TYPE_JSON_UTF8, HttpHeaders.CONTENT_TYPE_JSONLD}) + @RequestMapping(value = "/error", produces = {HttpHeaders.CONTENT_TYPE_JSON_UTF8, HttpHeaders.CONTENT_TYPE_JSONLD}, method = {RequestMethod.GET, RequestMethod.POST}) @ResponseBody public Map error(final HttpServletRequest request) { return this.getErrorAttributes(request, ErrorAttributeOptions.defaults()); diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index 712270f4..2f4bcb98 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -1,5 +1,6 @@ package eu.europeana.api.translation.web.service; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Base64; import java.util.HashMap; @@ -159,7 +160,7 @@ public void deleteAll() { public String generateRedisKey(String inputText, String sourceLang, String targetLang) { StringBuilder builder = (new StringBuilder()).append(sourceLang).append(targetLang); byte[] hash = Base64.getEncoder().withoutPadding().encode(Ints.toByteArray(inputText.hashCode())); - builder.append(new String(hash)); + builder.append(new String(hash, StandardCharsets.UTF_8)); return builder.toString(); } From 350992626e6904e61b2db611d1b5f2272a90cc72 Mon Sep 17 00:00:00 2001 From: GordeaS Date: Thu, 30 Nov 2023 17:04:55 +0100 Subject: [PATCH 25/25] addressing code issues #EA-3626 --- .../definitions/language/LanguagePair.java | 86 ++++++++++--------- .../google/GoogleTranslationService.java | 3 +- .../config/TranslationApiAutoconfig.java | 1 + .../translation/config/TranslationConfig.java | 2 +- .../api/translation/config/WebMvcConfig.java | 1 + .../serialization/JsonRedisSerializer.java | 3 +- .../web/TranslationErrorController.java | 4 +- .../web/service/RedisCacheService.java | 3 +- 8 files changed, 56 insertions(+), 47 deletions(-) diff --git a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java index 7bd91e89..d73b8876 100644 --- a/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java +++ b/translation-definitions/src/main/java/eu/europeana/api/translation/definitions/language/LanguagePair.java @@ -5,61 +5,65 @@ import eu.europeana.api.translation.definitions.vocabulary.TranslationAppConstants; /** - * Class to hold the Language pair values supported by the Translation services - * Mostly for the future Translation API, when we have more than one translation service + * Class to hold the Language pair values supported by the Translation services Mostly for the + * future Translation API, when we have more than one translation service * * @author Hugo * @since 5 Apr 2023 */ public class LanguagePair implements Comparable { - private String srcLang; - private String targetLang; + private String srcLang; + private String targetLang; - public LanguagePair(String srcLang, @NotNull String targetLang) { - this.srcLang = srcLang; - this.targetLang = targetLang; + public LanguagePair(String srcLang, @NotNull String targetLang) { + this.srcLang = srcLang; + this.targetLang = targetLang; + } + + public String getSrcLang() { + return srcLang; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; } - public String getSrcLang() { - return srcLang; + if (!(this.getClass() == obj.getClass())) { + return false; } - @Override - public boolean equals(Object obj) { - if (!(obj instanceof LanguagePair)) { - return false; - } + LanguagePair pair = (LanguagePair) obj; + return StringUtils.equals(targetLang, pair.targetLang) + && StringUtils.equals(srcLang, pair.srcLang); + } - LanguagePair pair = (LanguagePair) obj; - return StringUtils.equals(targetLang, pair.targetLang) - && StringUtils.equals(srcLang, pair.srcLang); + @Override + public int compareTo(LanguagePair pair) { + int ret = targetLang.compareTo(pair.targetLang); + if (ret == 0) { + ret = StringUtils.compare(srcLang, pair.srcLang); } + return ret; + } - @Override - public int compareTo(LanguagePair pair) { - int ret = targetLang.compareTo(pair.targetLang); - if(ret == 0) { - ret = StringUtils.compare(srcLang, pair.srcLang); - } - return ret; - } - - @Override - public String toString() { - return generateKey(srcLang, targetLang); - } + @Override + public String toString() { + return generateKey(srcLang, targetLang); + } - public static String generateKey(String srcLang, String targetLang) { - return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang; - } - - @Override - public int hashCode() { - return srcLang==null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode(); - } + public static String generateKey(String srcLang, String targetLang) { + return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang; + } - public String getTargetLang() { - return targetLang; - } + @Override + public int hashCode() { + return srcLang == null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode(); + } + + public String getTargetLang() { + return targetLang; + } } diff --git a/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java b/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java index 8ae0666a..600f15ef 100644 --- a/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java +++ b/translation-service-google/src/main/java/eu/europeana/api/translation/service/google/GoogleTranslationService.java @@ -7,6 +7,7 @@ import com.google.cloud.translate.v3.TranslateTextRequest.Builder; import com.google.cloud.translate.v3.TranslateTextResponse; import com.google.cloud.translate.v3.Translation; +import com.google.cloud.translate.v3.TranslationOrBuilder; import eu.europeana.api.translation.definitions.model.TranslationObj; import eu.europeana.api.translation.service.AbstractTranslationService; import eu.europeana.api.translation.service.exception.TranslationException; @@ -71,7 +72,7 @@ public void translate(List translationObjs) throws TranslationEx } - private void updateFromTranslation( TranslationObj translationObj, Translation translation) { + private void updateFromTranslation( TranslationObj translationObj, TranslationOrBuilder translation) { if(translationObj.getSourceLang()==null) { translationObj.setSourceLang(translation.getDetectedLanguageCode()); } diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java index d87f62aa..e31eac7a 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationApiAutoconfig.java @@ -168,6 +168,7 @@ public TranslationServiceProvider getTranslationServiceProvider() { * bean creation. Otherwise all these methods would need to be called manually which is not the * best solution. */ + @SuppressWarnings(["external_findsecbugs:PATH_TRAVERSAL_IN", "findsecbugs:PATH_TRAVERSAL_IN"]) // the trustore path is not user input but application config private LettuceConnectionFactory getRedisConnectionFactory() { // in case of integration tests, we do not need the SSL certificate LettuceClientConfiguration.LettuceClientConfigurationBuilder lettuceClientConfigurationBuilder = diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java index 114c043d..d321005f 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/TranslationConfig.java @@ -19,7 +19,7 @@ @PropertySource(value = "translation.user.properties", ignoreResourceNotFound = true)}) public class TranslationConfig{ - private final Logger LOG = LogManager.getLogger(TranslationConfig.class); + private static final Logger LOG = LogManager.getLogger(TranslationConfig.class); /** Matches spring.profiles.active property in test/resource application.properties file */ public static final String ACTIVE_TEST_PROFILE = "test"; diff --git a/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java b/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java index 017d27fe..3d41ca96 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/config/WebMvcConfig.java @@ -19,6 +19,7 @@ public class WebMvcConfig implements WebMvcConfigurer { /** Setup CORS for all GET, HEAD and OPTIONS, requests. */ @Override + @SuppressWarnings(["external_findsecbugs:PERMISSIVE_CORS", "findsecbugs:PERMISSIVE_CORS", "java:S5122"]) //the API is public public void addCorsMappings(CorsRegistry registry) { registry .addMapping("/**") diff --git a/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java b/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java index 74ef586a..86d1c310 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/serialization/JsonRedisSerializer.java @@ -32,6 +32,7 @@ public byte[] serialize(Object t) throws SerializationException { } @Override + @SuppressWarnings("external_fbcontrib:LEST_LOST_EXCEPTION_STACK_TRACE") // cannot change external interface public CachedTranslation deserialize(byte[] bytes) throws SerializationException { if(bytes == null){ return null; @@ -39,7 +40,7 @@ public CachedTranslation deserialize(byte[] bytes) throws SerializationException try { return om.readValue(bytes, CachedTranslation.class); } catch (IOException e) { - throw new SerializationException(e.getMessage(), e); + throw new SerializationException("Cannot deserialize redis response: " + e.getMessage(), e); } } diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java b/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java index 596ce47b..a49e0e35 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/TranslationErrorController.java @@ -5,7 +5,7 @@ import org.springframework.boot.autoconfigure.web.servlet.error.AbstractErrorController; import org.springframework.boot.web.error.ErrorAttributeOptions; import org.springframework.boot.web.servlet.error.ErrorAttributes; -import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.RestController; import eu.europeana.api.commons.web.http.HttpHeaders; @@ -21,7 +21,7 @@ public TranslationErrorController(ErrorAttributes errorAttributes) { } - @RequestMapping(value = "/error", produces = {HttpHeaders.CONTENT_TYPE_JSON_UTF8, HttpHeaders.CONTENT_TYPE_JSONLD}) + @GetMapping(value = "/error", produces = {HttpHeaders.CONTENT_TYPE_JSON_UTF8, HttpHeaders.CONTENT_TYPE_JSONLD}) @ResponseBody public Map error(final HttpServletRequest request) { return this.getErrorAttributes(request, ErrorAttributeOptions.defaults()); diff --git a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java index 712270f4..2f4bcb98 100644 --- a/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java +++ b/translation-web/src/main/java/eu/europeana/api/translation/web/service/RedisCacheService.java @@ -1,5 +1,6 @@ package eu.europeana.api.translation.web.service; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Base64; import java.util.HashMap; @@ -159,7 +160,7 @@ public void deleteAll() { public String generateRedisKey(String inputText, String sourceLang, String targetLang) { StringBuilder builder = (new StringBuilder()).append(sourceLang).append(targetLang); byte[] hash = Base64.getEncoder().withoutPadding().encode(Ints.toByteArray(inputText.hashCode())); - builder.append(new String(hash)); + builder.append(new String(hash, StandardCharsets.UTF_8)); return builder.toString(); }