Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(punctuator): convert digit separators #973

Merged
merged 1 commit into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 105 additions & 21 deletions src/rime/gear/punctuator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ void PunctConfig::LoadConfig(Engine* engine, bool load_symbols) {
if (load_symbols) {
symbols_ = config->GetMap("punctuator/symbols");
}
{
string configured;
if (config->GetString("punctuator/digit_separators", &configured)) {
digit_separators_ = configured;
}
}
}

an<ConfigItem> PunctConfig::GetPunctDefinition(const string key) {
Expand All @@ -49,15 +55,36 @@ Punctuator::Punctuator(const Ticket& ticket) : Processor(ticket) {
config_.LoadConfig(engine_);
}

static bool punctuation_is_translated(Context* ctx) {
static bool punctuation_is_translated(Context* ctx, const string& tag) {
Composition& comp = ctx->composition();
if (comp.empty() || !comp.back().HasTag("punct")) {
if (comp.empty() || !comp.back().HasTag(tag)) {
return false;
}
auto cand = comp.back().GetSelectedCandidate();
return cand && cand->type() == "punct";
}

inline static bool ends_with_digit(const string& text) {
auto len = text.length();
return len > 0 && isdigit(text[len - 1]);
}

// recognizes patterns like 3.14 12:30 1,000 1'000
static bool is_after_number(Context* ctx) {
const CommitHistory& history = ctx->commit_history();
if (history.empty()) {
return false;
}
const CommitRecord& cr = history.back();
return ends_with_digit(cr.text) & (cr.type == "thru" || cr.type == "raw");
}

static bool is_after_digit_separator(Context* ctx) {
const auto& comp = ctx->composition();
return !comp.empty() && comp[0].HasTag("punct_number") &&
comp[0].length == ctx->input().length();
}

ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) {
if (key_event.release() || key_event.ctrl() || key_event.alt() ||
key_event.super())
Expand All @@ -72,29 +99,70 @@ ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) {
if (!use_space_ && ch == XK_space && ctx->IsComposing()) {
return kNoop;
}
if (ch == '.' || ch == ':') { // 3.14, 12:30
const CommitHistory& history(ctx->commit_history());
if (!history.empty()) {
const CommitRecord& cr(history.back());
if (cr.type == "thru" && cr.text.length() == 1 && isdigit(cr.text[0])) {
return kRejected;
}
}
if (isdigit(ch) && is_after_digit_separator(ctx)) {
ctx->PushInput(ch) && ctx->Commit();
return kAccepted;
}
if (ConvertDigitSeparator(ch)) {
return kAccepted;
}
// sync with full_shape option
config_.LoadConfig(engine_);
string punct_key(1, ch);
auto punct_definition = config_.GetPunctDefinition(punct_key);
string key(1, ch);
auto punct_definition = config_.GetPunctDefinition(key);
if (!punct_definition)
return kNoop;
DLOG(INFO) << "punct key: '" << punct_key << "'";
if (!AlternatePunct(punct_key, punct_definition)) {
ctx->PushInput(ch) && punctuation_is_translated(ctx) &&
(ConfirmUniquePunct(punct_definition) ||
AutoCommitPunct(punct_definition) || PairPunct(punct_definition));
DLOG(INFO) << "punct key: '" << key << "'";
if (AlternatePunct(key, punct_definition)) {
return kAccepted;
}
if (ReconvertDigitSeparatorAsPunct(key) || ctx->PushInput(ch)) {
if (punctuation_is_translated(ctx, "punct")) {
ConfirmUniquePunct(punct_definition) ||
AutoCommitPunct(punct_definition) || PairPunct(punct_definition);
}
}
return kAccepted;
}

bool Punctuator::ConvertDigitSeparator(char ch) {
if (!config_.is_digit_separator(ch)) {
return false;
}
Context* ctx = engine_->context();
if (ctx->composition().empty() && is_after_number(ctx)) {
DLOG(INFO) << "convert punct in number: " << ch;
ctx->PushInput(ch) && punctuation_is_translated(ctx, "punct_number") &&
ctx->composition().Forward();
return true;
}
return false;
}

bool Punctuator::ReconvertDigitSeparatorAsPunct(const string& key) {
if (!config_.has_digit_separators()) {
return false;
}
Context* ctx = engine_->context();
// repeat the same punctuation key to access the original binding
if (ctx->input() != key) {
return false;
}
Composition& comp = ctx->composition();
if (!comp.empty()) {
Segment& segment = comp[0];
if (segment.HasTag("punct_number")) {
segment.tags.erase("punct_number");
segment.tags.insert("punct");
segment.status = Segment::kVoid;
DLOG(INFO) << "reconvert punct, key = " << key;
ctx->ReopenPreviousSegment();
return true;
}
}
return false;
}

bool Punctuator::AlternatePunct(const string& key,
const an<ConfigItem>& definition) {
if (!As<ConfigList>(definition))
Expand Down Expand Up @@ -170,22 +238,29 @@ bool PunctSegmentor::Proceed(Segmentation* segmentation) {
char ch = input[k];
if (ch < 0x20 || ch >= 0x7f)
return true;
// sync with full_shape option
config_.LoadConfig(engine_);
string punct_key(1, ch);
auto punct_definition = config_.GetPunctDefinition(punct_key);
string key(1, ch);
auto punct_definition = config_.GetPunctDefinition(key);
if (!punct_definition)
return true;
{
Segment segment(k, k + 1);
DLOG(INFO) << "add a punctuation segment [" << segment.start << ", "
<< segment.end << ")";
segment.tags.insert("punct");
if (k == 0 && config_.is_digit_separator(ch) &&
is_after_number(engine_->context())) {
segment.tags.insert("punct_number");
} else {
segment.tags.insert("punct");
}
segmentation->AddSegment(segment);
}
return false; // exclusive
}

PunctTranslator::PunctTranslator(const Ticket& ticket) : Translator(ticket) {
PunctTranslator::PunctTranslator(const Ticket& ticket)
: Translator(ticket), formatter_(ticket) {
const bool load_symbols = true;
config_.LoadConfig(engine_, load_symbols);
}
Expand Down Expand Up @@ -233,8 +308,17 @@ an<Candidate> CreatePunctCandidate(const string& punct,

an<Translation> PunctTranslator::Query(const string& input,
const Segment& segment) {
if (segment.HasTag("punct_number")) {
if (!input.empty()) {
string punct = input;
formatter_.Format(&punct);
return New<UniqueTranslation>(CreatePunctCandidate(punct, segment));
}
return nullptr;
}
if (!segment.HasTag("punct"))
return nullptr;
// sync with full_shape option
config_.LoadConfig(engine_);
auto definition = config_.GetPunctDefinition(input);
if (!definition)
Expand Down
13 changes: 12 additions & 1 deletion src/rime/gear/punctuator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <rime/processor.h>
#include <rime/segmentor.h>
#include <rime/translator.h>
#include <rime/gear/shape.h>

namespace rime {

Expand All @@ -23,10 +24,17 @@ class PunctConfig {
void LoadConfig(Engine* engine, bool load_symbols = false);
an<ConfigItem> GetPunctDefinition(const string key);

bool has_digit_separators() const { return !digit_separators_.empty(); }
bool is_digit_separator(char ch) const {
return digit_separators_.find(ch) != string::npos;
}

protected:
string shape_;
an<ConfigMap> mapping_;
an<ConfigMap> symbols_;

string digit_separators_ = ",.:'";
};

class Punctuator : public Processor {
Expand All @@ -35,8 +43,10 @@ class Punctuator : public Processor {
virtual ProcessResult ProcessKeyEvent(const KeyEvent& key_event);

protected:
bool ConfirmUniquePunct(const an<ConfigItem>& definition);
bool ConvertDigitSeparator(char ch);
bool ReconvertDigitSeparatorAsPunct(const string& key);
bool AlternatePunct(const string& key, const an<ConfigItem>& definition);
bool ConfirmUniquePunct(const an<ConfigItem>& definition);
bool AutoCommitPunct(const an<ConfigItem>& definition);
bool PairPunct(const an<ConfigItem>& definition);

Expand Down Expand Up @@ -73,6 +83,7 @@ class PunctTranslator : public Translator {
const Segment& segment,
const an<ConfigMap>& definition);

ShapeFormatter formatter_;
PunctConfig config_;
};

Expand Down
Loading