Skip to content

Commit

Permalink
Merge pull request #2305 from IFRCGo/feature/fix-log-dump-and-tranlsa…
Browse files Browse the repository at this point in the history
…tion-error

Fix translation limit errors at 50000 chars
  • Loading branch information
szabozoltan69 authored Nov 12, 2024
2 parents 96f724f + 56f5b6e commit 892cf33
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 1 deletion.
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ RUN perl -pi -e 's/ is not -1 / != 1 /' ${AZUREROOT}blob/baseblobservice.py
RUN perl -pi -e "s/ is '' / == '' /" ${AZUREROOT}common/_connection.py
RUN perl -pi -e "s/ is '' / == '' /" ${AZUREROOT}_connection.py

# To avoid dump of "Queue is full. Dropping telemetry." messages in log, 20241111:
ENV OPENCENSUSINIT=/usr/local/lib/python3.11/site-packages/opencensus/common/schedule/__init__.py
RUN perl -pi -e "s/logger.warning.*/pass/" ${OPENCENSUSINIT} 2>/dev/null

COPY main/nginx.conf /etc/nginx/sites-available/
RUN \
ln -s /etc/nginx/sites-available/nginx.conf /etc/nginx/sites-enabled; \
Expand Down
38 changes: 37 additions & 1 deletion lang/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,43 @@ def __init__(self):
def is_text_html(cls, text):
return bool(BeautifulSoup(text, "html.parser").find())

@classmethod
def find_last_slashtable(cls, text, limit):
tag = "</table>"
truncate_here = text[:limit].rfind(tag)
if truncate_here != -1:
truncate_here += len(tag)
return truncate_here

@classmethod
def find_last_slashp(cls, text, limit):
tag = "</p>"
truncate_here = text[:limit].rfind(tag)
if truncate_here != -1:
truncate_here += len(tag)
return truncate_here

def translate_text(self, text, dest_language, source_language=None):
if settings.TESTING:
# NOTE: Mocking for test purpose
return self._fake_translation(text, dest_language, source_language)

# A dirty workaround to handle oversized HTML+CSS texts, usually tables:
textTail = ""
if len(text) > settings.AZURE_TRANSL_LIMIT:
truncate_here = self.find_last_slashtable(text, settings.AZURE_TRANSL_LIMIT)
if truncate_here != -1:
textTail = text[truncate_here:]
text = text[:truncate_here]
else:
truncate_here = self.find_last_slashp(text, settings.AZURE_TRANSL_LIMIT)
if truncate_here != -1:
textTail = text[truncate_here:]
text = text[:truncate_here]
else:
textTail = text[settings.AZURE_TRANSL_LIMIT :]
text = text[: settings.AZURE_TRANSL_LIMIT]

payload = {
"text": text,
"from": source_language,
Expand All @@ -96,7 +129,10 @@ def translate_text(self, text, dest_language, source_language=None):
headers=self.headers,
json=payload,
)
return response.json()[0]["translations"][0]["text"]

# Not using == 200 – it would break tests with MagicMock name=requests.post() results
if response.status_code != 500:
return response.json()[0]["translations"][0]["text"] + textTail


def get_translator_class():
Expand Down
2 changes: 2 additions & 0 deletions main/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,3 +702,5 @@ def decode_base64(env_key, fallback_env_key):

# Need to load this to overwrite modeltranslation module
import main.translation # noqa: F401 E402

AZURE_TRANSL_LIMIT = 49990

0 comments on commit 892cf33

Please sign in to comment.