Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stringtable: rapify #842

Merged
merged 7 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions bin/src/modules/stringtables/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{io::BufReader, sync::Arc};

use hemtt_stringtable::{
analyze::{lint_all, lint_check, lint_one},
rapify::convert_stringtable,
Project,
};
use hemtt_workspace::{
Expand Down Expand Up @@ -67,9 +68,12 @@ impl Module for Stringtables {
report.extend(lint_all(&stringtables, Some(ctx.config())));

for stringtable in stringtables {
report.extend(lint_one(&stringtable, Some(ctx.config())));
let codes = lint_one(&stringtable, Some(ctx.config()));
if codes.is_empty() {
convert_stringtable(&stringtable.0, &stringtable.1);
}
report.extend(codes);
}

Ok(report)
}
}
Expand Down
1 change: 1 addition & 0 deletions libs/stringtable/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ quick-xml = { version = "0.37.1", features = ["serialize"] }
serde = { workspace = true, features = ["derive"] }
toml = { workspace = true }
tracing = { workspace = true }
vfs = { workspace = true }

[dev-dependencies]
insta = { workspace = true }
16 changes: 16 additions & 0 deletions libs/stringtable/src/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ pub struct Key {
#[serde(skip_serializing_if = "Option::is_none", serialize_with = "min_escape")]
#[serde(alias = "dutch", alias = "DUTCH")]
dutch: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", serialize_with = "min_escape")]
#[serde(alias = "ukrainian", alias = "UKRAINIAN")]
ukrainian: Option<String>,
#[serde(skip_serializing_if = "Option::is_none", serialize_with = "min_escape")]
#[serde(alias = "danish", alias = "DANISH")]
danish: Option<String>,
}

impl Key {
Expand Down Expand Up @@ -215,4 +221,14 @@ impl Key {
pub fn dutch(&self) -> Option<&str> {
self.dutch.as_deref()
}

#[must_use]
pub fn ukrainian(&self) -> Option<&str> {
self.ukrainian.as_deref()
}

#[must_use]
pub fn danish(&self) -> Option<&str> {
self.danish.as_deref()
}
}
30 changes: 30 additions & 0 deletions libs/stringtable/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,43 @@ use serde::{Deserialize, Serialize};
pub mod analyze;
mod key;
mod package;
pub mod rapify;
mod totals;

pub use key::Key;
pub use package::Package;
pub use totals::Totals;
use tracing::error;

/// Languages in className format
static ALL_LANGUAGES: [&str; 25] = [
"English",
"Czech",
"French",
"Spanish",
"Italian",
"Polish",
"Portuguese",
"Russian",
"German",
"Korean",
"Japanese",
"Chinese",
"Chinesesimp",
"Turkish",
"Swedish",
"Slovak",
"SerboCroatian",
"Norwegian",
"Icelandic",
"Hungarian",
"Greek",
"Finnish",
"Dutch",
"Ukrainian",
"Danish",
];

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Project {
#[serde(rename = "@name")]
Expand Down
202 changes: 202 additions & 0 deletions libs/stringtable/src/rapify.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
use crate::{Key, Project, ALL_LANGUAGES};
use hemtt_workspace::WorkspacePath;
use tracing::trace;

#[derive(Default, Debug)]
pub struct XmlbLayout {
// 4 byte numbers, little end?, nul term strings
header: Vec<u8>, // Version
languages: Vec<u8>, // Language Count, [Languages]
offsets: Vec<u8>, // Offset Count, [Offsets]
keys: Vec<u8>, // Key Count, [Keys]
translations: Vec<Vec<u8>>, // [Translation Count, [Translations], ...] (size may be less than lang count)
}

#[derive(Clone, Debug)]
struct Translation {
phrases: Vec<String>,
have_unique: bool,
}

/// # Panics
pub fn convert_stringtable(project: &Project, xml_path: &WorkspacePath) {
let result = rapify(project);

if result.is_some() {
// Create stringtable.bin
let xmlb_path = xml_path.with_extension("bin").expect("vfs error");
let mut xmlb_file = xmlb_path.create_file().expect("vfs error");

// Remove Original stringtable.xml
xml_path.vfs().remove_file().expect("vfs error");

// Write data to virtual file
let data = result.expect("data struct valid");
xmlb_file.write_all(&data.header).expect("IO Error");
xmlb_file.write_all(&data.languages).expect("IO Error");
xmlb_file.write_all(&data.offsets).expect("IO Error");
xmlb_file.write_all(&data.keys).expect("IO Error");
for translation_buffer in &data.translations {
xmlb_file.write_all(translation_buffer).expect("IO Error");
}
trace!(
"binned stringtable{:?} [Unique {}]",
xmlb_path,
data.translations.len()
);
} else {
trace!("skpping binerization of stringtable{:?}", xml_path);
}
}

/// Write string with null-termination
fn write_string(buffer: &mut Vec<u8>, input: &str) {
buffer.extend(input.as_bytes());
buffer.push(0);
}
fn write_int(buffer: &mut Vec<u8>, input: i32) {
buffer.extend(&input.to_le_bytes());
}

#[must_use]
/// # Panics
pub fn rapify(project: &Project) -> Option<XmlbLayout> {
let mut data: XmlbLayout = XmlbLayout::default();

// Restructure translations: flat for each language
let mut all_keys: Vec<String> = Vec::with_capacity(20);
let mut all_translations: Vec<Translation> = vec![
Translation {
phrases: Vec::with_capacity(20),
have_unique: false
};
ALL_LANGUAGES.len()
];

for package in project.packages() {
for package_inner in package.containers() {
for key in package_inner.keys() {
all_keys.push(key.id().into());
if !get_translations(key, &mut all_translations) {
return None; // stop if we can't get some kind of translation
}
}
}
for key in package.keys() {
all_keys.push(key.id().into());
if !get_translations(key, &mut all_translations) {
return None; // stop if we can't get some kind of translation
}
}
}

// Header
write_int(&mut data.header, 1_481_460_802); // aka XMLB in LE

// Languages
write_int(
&mut data.languages,
i32::try_from(ALL_LANGUAGES.len()).expect("overflow"),
);
for language in ALL_LANGUAGES {
write_string(&mut data.languages, language);
}

// Keys
write_int(
&mut data.keys,
i32::try_from(all_keys.len()).expect("overflow"),
);
for key in &all_keys {
write_string(&mut data.keys, key);
}

// Offset
let offset_size = 4 + 4 * ALL_LANGUAGES.len();
let mut rolling_offset =
data.header.len() + data.languages.len() + offset_size + data.keys.len();
write_int(
&mut data.offsets,
i32::try_from(ALL_LANGUAGES.len()).expect("overflow"),
);

all_translations[0].have_unique = true; // Always write first set (english)
let first_offset = rolling_offset;

// Languages and their offsets
for translation in all_translations {
debug_assert_eq!(translation.phrases.len(), all_keys.len());

let offset = if translation.have_unique {
// we have some unique tranlation, write and use it's offset
let offset_start = rolling_offset;
let mut translation_buffer: Vec<u8> =
Vec::with_capacity(32 * translation.phrases.len());
write_int(
&mut translation_buffer,
i32::try_from(translation.phrases.len()).expect("overflow"),
);
for phrase in &translation.phrases {
write_string(&mut translation_buffer, phrase);
}
rolling_offset += translation_buffer.len();
data.translations.push(translation_buffer);
offset_start
} else {
// no unique translations, just use first offset (english)
first_offset
};

write_int(&mut data.offsets, i32::try_from(offset).expect("overflow"));
}
debug_assert_eq!(offset_size, data.offsets.len());

Some(data)
}

fn get_translations(key: &Key, languages: &mut [Translation]) -> bool {
let tranlations = [
key.english(),
key.czech(),
key.french(),
key.spanish(),
key.italian(),
key.polish(),
key.portuguese(),
key.russian(),
key.german(),
key.korean(),
key.japanese(),
key.chinese(),
key.chinesesimp(),
key.turkish(),
key.swedish(),
key.slovak(),
key.serbocroatian(),
key.norwegian(),
key.icelandic(),
key.hungarian(),
key.greek(),
key.finnish(),
key.dutch(),
key.ukrainian(),
key.danish(),
];
debug_assert_eq!(tranlations.len(), ALL_LANGUAGES.len()); // order needs to be synced // Todo: meta programing?

for (index, result) in tranlations.into_iter().enumerate() {
if let Some(native) = result {
languages[index].have_unique = true;
languages[index].phrases.push(native.into());
} else if let Some(original) = key.original() {
languages[index].phrases.push(original.into());
} else if let Some(english) = key.english() {
languages[index].phrases.push(english.into());
} else {
// If we don't have some kind of default value to use, we should just not do the conversion
return false;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BI's binarizer places an empty string here.
But for HEMTT its definitely better if it complains, but it should say where the missing string is? Probably the stringtable linter already does that?

}
}

true
}
Loading
Loading