From 5d0516e5ee8de5bd4369e8760b4a5c77259853db Mon Sep 17 00:00:00 2001 From: garikkh <77762367+garikkh@users.noreply.github.com> Date: Tue, 5 Nov 2024 23:36:50 -0800 Subject: [PATCH] feat(format-po-gettext): respect Plural-Forms header (#2070) --- packages/format-po-gettext/package.json | 1 + .../src/plural-samples.test.ts | 187 ++++++++++++++++++ .../format-po-gettext/src/plural-samples.ts | 107 ++++++++++ .../format-po-gettext/src/po-gettext.test.ts | 75 +++++++ packages/format-po-gettext/src/po-gettext.ts | 79 +++++++- yarn.lock | 8 + 6 files changed, 450 insertions(+), 7 deletions(-) create mode 100644 packages/format-po-gettext/src/plural-samples.test.ts create mode 100644 packages/format-po-gettext/src/plural-samples.ts diff --git a/packages/format-po-gettext/package.json b/packages/format-po-gettext/package.json index 234c026c2..8ea2cf9eb 100644 --- a/packages/format-po-gettext/package.json +++ b/packages/format-po-gettext/package.json @@ -45,6 +45,7 @@ "@lingui/format-po": "4.13.0", "@lingui/message-utils": "4.13.0", "@messageformat/parser": "^5.0.0", + "cldr-core": "^45.0.0", "node-gettext": "^3.0.0", "plurals-cldr": "^2.0.1", "pofile": "^1.1.4" diff --git a/packages/format-po-gettext/src/plural-samples.test.ts b/packages/format-po-gettext/src/plural-samples.test.ts new file mode 100644 index 000000000..45c5c2632 --- /dev/null +++ b/packages/format-po-gettext/src/plural-samples.test.ts @@ -0,0 +1,187 @@ +import { + createLocaleTest, + createSamples, + fillRange, + renameKeys, +} from "./plural-samples" + +describe("Plural samples generation util", () => { + test.each([ + [{ "pluralRule-count-zero": null }, { zero: null }], + [{ "pluralRule-count-one": null }, { one: null }], + [{ "pluralRule-count-two": null }, { two: null }], + [{ "pluralRule-count-few": null }, { few: null }], + [{ "pluralRule-count-many": null }, { many: null }], + [{ "pluralRule-count-other": null }, { other: null }], + ])("renameKeys", (original, expected) => { + expect(renameKeys(original)).toEqual(expected) + }) + + test("renameKeys multiple", () => { + const original = { + "pluralRule-count-zero": + "n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000", + "pluralRule-count-one": + "n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000", + "pluralRule-count-two": + "n = 2 @integer 2 @decimal 2.0, 2.00, 2.000, 2.0000", + "pluralRule-count-few": + "n % 100 = 3..10 @integer 3~10, 103~110, 1003, … @decimal 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 103.0, 1003.0, …", + "pluralRule-count-many": + "n % 100 = 11..99 @integer 11~26, 111, 1011, … @decimal 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 111.0, 1011.0, …", + "pluralRule-count-other": + " @integer 100~102, 200~202, 300~302, 400~402, 500~502, 600, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.1, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …", + } + expect(renameKeys(original)).toEqual({ + zero: "n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000", + one: "n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000", + two: "n = 2 @integer 2 @decimal 2.0, 2.00, 2.000, 2.0000", + few: "n % 100 = 3..10 @integer 3~10, 103~110, 1003, … @decimal 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 103.0, 1003.0, …", + many: "n % 100 = 11..99 @integer 11~26, 111, 1011, … @decimal 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 111.0, 1011.0, …", + other: + " @integer 100~102, 200~202, 300~302, 400~402, 500~502, 600, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.1, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …", + }) + }) + + test.each([ + ["0~1", [0, 1]], + ["2~19", [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]], + ["100~102", [100, 101, 102]], + ])("fillRange - integer ranges", (range, values) => { + expect(fillRange(range)).toEqual(values) + }) + + test.each([ + ["0.0~1.0", [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]], + // partials + [ + "0.4~1.6", + [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6], + ], + ["0.04~0.09", [0.04, 0.05, 0.06, 0.07, 0.08, 0.09]], + [ + "0.04~0.29", + [ + 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, + 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, + 0.28, 0.29, + ], + ], + ])("fillRange - decimal ranges", (range, values) => { + expect(fillRange(range)).toEqual(values) + }) + + test("createSamples - single values", () => { + expect(createSamples("0")).toEqual([0]) + expect(createSamples("0, 1, 2")).toEqual([0, 1, 2]) + expect(createSamples("0, 1.0, 2.0")).toEqual([0, 1, 2]) + }) + + test("createSamples - integer ranges", () => { + expect(createSamples("0~1")).toEqual([0, 1]) + expect(createSamples("0~2")).toEqual([0, 1, 2]) + expect(createSamples("0~10")).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + expect(createSamples("2~17, 100, 1000, 10000, 100000, 1000000")).toEqual([ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 100, 1000, 10000, + 100000, 1000000, + ]) + }) + + test("createSamples - mixed src", () => { + expect(createSamples("0.1~0.9")).toEqual([ + 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, + ]) + // with ... + expect( + createSamples("0, 2~16, 100, 1000, 10000, 100000, 1000000, …") + ).toEqual([ + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 100, 1000, 10000, + 100000, 1000000, + ]) + // mixed with integer ranges + expect( + createSamples("0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0") + ).toEqual([ + 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, + 1.7, 10, 100, 1000, 10000, 100000, + ]) + // trailing comma + expect( + createSamples("0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0,") + ).toEqual([ + 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, + 1.7, 10, 100, 1000, 10000, 100000, + ]) + }) + + test("Run on ruleset", () => { + // ruleset for cs + const ruleset = { + "pluralRule-count-one": "i = 1 and v = 0 @integer 1", + "pluralRule-count-few": "i = 2..4 and v = 0 @integer 2~4", + "pluralRule-count-many": + "v != 0 @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …", + "pluralRule-count-other": + " @integer 0, 5~19, 100, 1000, 10000, 100000, 1000000, …", + } + expect(createLocaleTest(ruleset)).toMatchInlineSnapshot(` + { + pluralRule-count-few: [ + 2, + 3, + 4, + ], + pluralRule-count-many: [ + 0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1, + 1.1, + 1.2, + 1.3, + 1.4, + 1.5, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + ], + pluralRule-count-one: [ + 1, + ], + pluralRule-count-other: [ + 0, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 100, + 1000, + 10000, + 100000, + 1000000, + ], + } + `) + }) +}) diff --git a/packages/format-po-gettext/src/plural-samples.ts b/packages/format-po-gettext/src/plural-samples.ts new file mode 100644 index 000000000..b790e624c --- /dev/null +++ b/packages/format-po-gettext/src/plural-samples.ts @@ -0,0 +1,107 @@ +import cardinals from "cldr-core/supplemental/plurals.json" + +/* +This script is heavily influenced by one that is used to generate plural samples +found here: https://github.com/nodeca/plurals-cldr/blob/master/support/generate.js + +Ordinals were removed, and the original script supported strings and numbers, +but for the use case of lingui-gettext formatter, we only want numbers. +*/ + +type PluralForm = "zero" | "one" | "two" | "few" | "many" | "other" +type FormattedRuleset = Record + +// Strip key prefixes to get clear names: zero / one / two / few / many / other +// pluralRule-count-other -> other +export function renameKeys(rules: Record): FormattedRuleset { + const result = {} + Object.keys(rules).forEach((k) => { + const newKey = k.match(/[^-]+$/)[0] + result[newKey] = rules[k] + }) + return result as FormattedRuleset +} + +// Create array of sample values for single range +// 5~16, 0.04~0.09. Both string & integer forms (when possible) +export function fillRange(value: string): number[] { + let [start, end] = value.split("~") + + const decimals = (start.split(".")[1] || "").length + // for example 0.1~0.9 has 10 values, need to add that many to list + // 0.004~0.009 has 100 values + let mult = Math.pow(10, decimals) + + const startNum = Number(start) + const endNum = Number(end) + + let range = Array(Math.ceil(endNum * mult - startNum * mult + 1)) + .fill(0) + .map((v, idx) => (idx + startNum * mult) / mult) + + let last = range[range.length - 1] + + // Number defined in the range should be the last one, i.e. 5~16 should have 16 + if (endNum !== last) { + throw new Error(`Range create error for ${value}: last value is ${last}`) + } + + return range.map((v) => Number(v)) +} + +// Create array of test values for @integer or @decimal +export function createSamples(src: string): number[] { + let result: number[] = [] + + src + .replace(/…/, "") + .trim() + .replace(/,$/, "") + .split(",") + .map(function (val) { + return val.trim() + }) + .forEach((val) => { + if (val.indexOf("~") !== -1) { + result = result.concat(fillRange(val)) + } else { + result.push(Number(val)) + } + }) + + return result +} + +// Create fixtures for single locale rules +export function createLocaleTest(rules) { + let result = {} + + Object.keys(rules).forEach((form) => { + let samples = rules[form].split(/@integer|@decimal/).slice(1) + + result[form] = [] + samples.forEach((sample) => { + result[form] = result[form].concat(createSamples(sample)) + }) + }) + + return result +} + +export function getCldrPluralSamples(): Record< + string, + Record +> { + const pluralRules = {} + + // Parse plural rules + Object.entries(cardinals.supplemental["plurals-type-cardinal"]).forEach( + ([loc, ruleset]) => { + let rules = renameKeys(ruleset) + + pluralRules[loc.toLowerCase()] = createLocaleTest(rules) + } + ) + + return pluralRules +} diff --git a/packages/format-po-gettext/src/po-gettext.test.ts b/packages/format-po-gettext/src/po-gettext.test.ts index 564203350..aa251ff22 100644 --- a/packages/format-po-gettext/src/po-gettext.test.ts +++ b/packages/format-po-gettext/src/po-gettext.test.ts @@ -231,6 +231,81 @@ msgstr[2] "# dní" expect(catalog).toMatchSnapshot() }) + test("should use respect Plural-Forms header", () => { + const po = ` +msgid "" +msgstr "" +"Language: fr\\n" +"Plural-Forms: nplurals=3; plural=(n == 0 || n == 1) ? 0 : n != 0 && n % 1000000 == 0 ? 1 : 2;\\n" + +#. js-lingui:icu=%7B0%2C+plural%2C+one+%7B%7Bcount%7D+day%7D+other+%7B%7Bcount%7D+days%7D%7D&pluralize_on=0 +msgid "{count} day" +msgid_plural "{count} days" +msgstr[0] "{count} jour" +msgstr[1] "{count} jours" +msgstr[2] "{count} jours" + ` + + const parsed = format.parse(po, defaultParseCtx) + + // Note that the last case must be `other` (the 4th CLDR case name) instead of `many` (the 3rd CLDR case name). + expect(parsed).toMatchInlineSnapshot(` + { + ZETJEQ: { + comments: [], + context: null, + extra: { + flags: [], + translatorComments: [], + }, + message: {0, plural, one {{count} day} other {{count} days}}, + obsolete: false, + origin: [], + translation: {0, plural, one {{count} jour} many {{count} jours} other {{count} jours}}, + }, + } + `) + }) + + it("should correctly handle skipped form", () => { + // in this test Plural-Forms header defines 4 forms via `nplurals=4` + // but expression never returns 2 form, only [0, 1, 3] + const po = ` +msgid "" +msgstr "" +"Language: cs\n" +"Plural-Forms: nplurals=4; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 3;\n" + +#. js-lingui:icu=%7Bcount%2C+plural%2C+one+%7B%7Bcount%7D+day%7D+few+%7B%7Bcount%7D+days%7D+many+%7B%7Bcount%7D+days%7D+other+%7B%7Bcount%7D+days%7D%7D&pluralize_on=# +msgid "# day" +msgid_plural "# days" +msgstr[0] "# den" +msgstr[1] "# dny" +msgstr[2] "# dne" +msgstr[3] "# dní" + ` + + const parsed = format.parse(po, defaultParseCtx) + + // Note that the last case must be `other` (the 4th CLDR case name) instead of `many` (the 3rd CLDR case name). + expect(parsed).toMatchInlineSnapshot(` + { + GMnlGy: { + comments: [], + context: null, + extra: { + flags: [], + translatorComments: [], + }, + message: {count, plural, one {{count} day} few {{count} days} many {{count} days} other {{count} days}}, + obsolete: false, + origin: [], + translation: {#, plural, one {# den} few {# dny} other {# dní}}, + }, + } + `) + }) + describe("using custom prefix", () => { it("parses plurals correctly", () => { const defaultProfile = fs diff --git a/packages/format-po-gettext/src/po-gettext.ts b/packages/format-po-gettext/src/po-gettext.ts index 63c1fb447..32f4d56f7 100644 --- a/packages/format-po-gettext/src/po-gettext.ts +++ b/packages/format-po-gettext/src/po-gettext.ts @@ -8,6 +8,7 @@ import type { CatalogFormatter, CatalogType, MessageType } from "@lingui/conf" import { generateMessageId } from "@lingui/message-utils/generateMessageId" import { formatter as poFormatter } from "@lingui/format-po" import type { PoFormatterOptions } from "@lingui/format-po" +import { getCldrPluralSamples } from "./plural-samples" type POItem = InstanceType @@ -16,6 +17,8 @@ export type PoGettextFormatterOptions = PoFormatterOptions & { customICUPrefix?: string } +const cldrSamples = getCldrPluralSamples() + // Attempts to turn a single tokenized ICU plural case back into a string. function stringifyICUCase(icuCase: SelectCase): string { return icuCase.tokens @@ -140,6 +143,10 @@ function serializePlurals( return item } +type GettextPluralsInfo = { + nplurals: number + pluralsFunc: (n: number) => number +} /** * Returns ICU case labels in the order that gettext lists localized messages, e.g. 0,1,2 => `["one", "two", "other"]`. * @@ -150,14 +157,67 @@ function serializePlurals( * This approach is heavily influenced by * https://github.com/LLK/po2icu/blob/9eb97f81f72b2fee02b77f1424702e019647e9b9/lib/po2icu.js#L148. */ -const getPluralCases = (lang: string): string[] | undefined => { +const getPluralCases = ( + lang: string, + pluralFormsHeader: string +): string[] | undefined => { + let gettextPluralsInfo: GettextPluralsInfo + + if (pluralFormsHeader) { + gettextPluralsInfo = parsePluralFormsFn(pluralFormsHeader) + } + // If users uses locale with underscore or slash, es-ES, es_ES, gettextplural is "es" not es-ES. const [correctLang] = lang.split(/[-_]/g) - const gettextPluralsInfo = gettextPlurals[correctLang] - return gettextPluralsInfo?.examples.map((pluralCase: any) => - pluralsCldr(correctLang, pluralCase.sample) - ) + if (!gettextPluralsInfo) { + gettextPluralsInfo = gettextPlurals[correctLang] + } + + if (!gettextPluralsInfo) { + if (lang !== "pseudo") { + console.warn( + `No plural rules found for language "${lang}". Please add a Plural-Forms header.` + ) + } + return undefined + } + + const cases: string[] = [...Array(pluralsCldr.forms(correctLang).length)] + + for (let form of pluralsCldr.forms(correctLang)) { + const samples = cldrSamples[correctLang][form] + // both need to cast to Number - funcs test with `===` and may return boolean + const pluralForm = Number( + gettextPluralsInfo.pluralsFunc(Number(samples[0])) + ) + + cases[pluralForm] = form + } + + return cases +} + +function parsePluralFormsFn(pluralFormsHeader: string): GettextPluralsInfo { + const [npluralsExpr, expr] = pluralFormsHeader.split(";") + + try { + const nplurals = new Function(npluralsExpr + "; return nplurals;")() + const pluralsFunc = new Function( + "n", + expr + "; return plural;" + ) as GettextPluralsInfo["pluralsFunc"] + + return { + nplurals, + pluralsFunc, + } + } catch (e) { + console.warn( + `Plural-Forms header has incorrect value: ${pluralFormsHeader}` + ) + return undefined + } } const convertPluralsToICU = ( @@ -225,7 +285,9 @@ const convertPluralsToICU = ( // Map each msgstr to a " {}" entry, joined by one space. const pluralClauses = item.msgstr - .map((str, index) => pluralForms[index] + " {" + str + "}") + .map((str, index) => + pluralForms[index] ? pluralForms[index] + " {" + str + "}" : "" + ) .join(" ") // Find out placeholder name from item's message context, defaulting to "count". @@ -262,7 +324,10 @@ export function formatter( // .po plurals are numbered 0-N and need to be mapped to ICU plural classes ("one", "few", "many"...). Different // languages can have different plural classes (some start with "zero", some with "one"), so read that data from CLDR. // `pluralForms` may be `null` if lang is not found. As long as no plural is used, don't report an error. - let pluralForms = getPluralCases(po.headers.Language) + let pluralForms = getPluralCases( + po.headers.Language, + po.headers["Plural-Forms"] + ) po.items.forEach((item) => { convertPluralsToICU( diff --git a/yarn.lock b/yarn.lock index 876649007..1e63cbb52 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2893,6 +2893,7 @@ __metadata: "@lingui/jest-mocks": "workspace:^" "@lingui/message-utils": 4.13.0 "@messageformat/parser": ^5.0.0 + cldr-core: ^45.0.0 mockdate: ^3.0.5 node-gettext: ^3.0.0 plurals-cldr: ^2.0.1 @@ -5799,6 +5800,13 @@ __metadata: languageName: node linkType: hard +"cldr-core@npm:^45.0.0": + version: 45.0.0 + resolution: "cldr-core@npm:45.0.0" + checksum: 06e39807a90483dfb7f2ba69c04e6cf15dd8ca7558cd772a3cd23db582ae95845a169917e070844b3b25d194c412f7e8b3d953ff88006b424803912c93603290 + languageName: node + linkType: hard + "clean-stack@npm:^2.0.0": version: 2.2.0 resolution: "clean-stack@npm:2.2.0"