Skip to content

Commit

Permalink
feat(punctuator): convert punctuation in numbers
Browse files Browse the repository at this point in the history
fixes #972

use ascii punctuation ,.:' after numbers.
they are auto-committed if followed by a digit.
or commit manualy with space key.
double strike the key to access the original binding.

support half-shape and full-shape forms.
opt-out with `punctuator/convert_punct_in_number: false`.
  • Loading branch information
lotem committed Jan 17, 2025
1 parent 6f6f540 commit 1898701
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 22 deletions.
131 changes: 110 additions & 21 deletions src/rime/gear/punctuator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,47 @@ an<ConfigItem> PunctConfig::GetPunctDefinition(const string key) {
Punctuator::Punctuator(const Ticket& ticket) : Processor(ticket) {
Config* config = engine_->schema()->config();
if (config) {
config->GetBool("punctuator/convert_punct_in_number",
&convert_punct_in_number_);
config->GetBool("punctuator/use_space", &use_space_);
}
config_.LoadConfig(engine_);
}

static bool punctuation_is_translated(Context* ctx) {
static bool punctuation_is_translated(Context* ctx, const string& tag) {
Composition& comp = ctx->composition();
if (comp.empty() || !comp.back().HasTag("punct")) {
if (comp.empty() || !comp.back().HasTag(tag)) {
return false;
}
auto cand = comp.back().GetSelectedCandidate();
return cand && cand->type() == "punct";
}

inline static bool is_digit_separator(char ch) {
return ch == '.' || ch == ':' || ch == ',' || ch == '\'';
}

inline static bool ends_with_digit(const string& text) {
auto len = text.length();
return len > 0 && isdigit(text[len - 1]);
}

// recognizes patterns like 3.14 12:30 1,000 1'000
static bool is_after_number(Context* ctx) {
const CommitHistory& history = ctx->commit_history();
if (history.empty()) {
return false;
}
const CommitRecord& cr = history.back();
return ends_with_digit(cr.text) & (cr.type == "thru" || cr.type == "raw");
}

static bool is_after_digit_separator(Context* ctx) {
const auto& comp = ctx->composition();
return !comp.empty() && comp[0].HasTag("punct_number") &&
comp[0].length == ctx->input().length();
}

ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) {
if (key_event.release() || key_event.ctrl() || key_event.alt() ||
key_event.super())
Expand All @@ -72,29 +99,70 @@ ProcessResult Punctuator::ProcessKeyEvent(const KeyEvent& key_event) {
if (!use_space_ && ch == XK_space && ctx->IsComposing()) {
return kNoop;
}
if (ch == '.' || ch == ':') { // 3.14, 12:30
const CommitHistory& history(ctx->commit_history());
if (!history.empty()) {
const CommitRecord& cr(history.back());
if (cr.type == "thru" && cr.text.length() == 1 && isdigit(cr.text[0])) {
return kRejected;
}
}
if (convert_punct_in_number_ && isdigit(ch) &&
is_after_digit_separator(ctx)) {
ctx->PushInput(ch) && ctx->Commit();
return kAccepted;
}
if (ConvertPunctInNumber(ch)) {
return kAccepted;
}
// sync with full_shape option
config_.LoadConfig(engine_);
string punct_key(1, ch);
auto punct_definition = config_.GetPunctDefinition(punct_key);
string key(1, ch);
auto punct_definition = config_.GetPunctDefinition(key);
if (!punct_definition)
return kNoop;
DLOG(INFO) << "punct key: '" << punct_key << "'";
if (!AlternatePunct(punct_key, punct_definition)) {
ctx->PushInput(ch) && punctuation_is_translated(ctx) &&
(ConfirmUniquePunct(punct_definition) ||
AutoCommitPunct(punct_definition) || PairPunct(punct_definition));
DLOG(INFO) << "punct key: '" << key << "'";
if (AlternatePunct(key, punct_definition)) {
return kAccepted;
}
if (ReconvertPunct(key) || ctx->PushInput(ch)) {
if (punctuation_is_translated(ctx, "punct")) {
ConfirmUniquePunct(punct_definition) ||
AutoCommitPunct(punct_definition) || PairPunct(punct_definition);
}
}
return kAccepted;
}

bool Punctuator::ConvertPunctInNumber(char ch) {
if (!convert_punct_in_number_ || !is_digit_separator(ch)) {
return false;
}
Context* ctx = engine_->context();
if (ctx->composition.empty() && is_after_number(ctx)) {
ctx->PushInput(ch) && punctuation_is_translated(ctx, "punct_number") &&
ctx->composition().Forward();
return true;
}
}
return fasle;
}

bool Punctuator::ReconvertPunct(const string& key) {
if (!convert_punct_in_number_) {
return false;
}
Context* ctx = engine_->context();
if (ctx->input() != key) {
return false;
}
Composition& comp = ctx->composition();
if (!comp.empty()) {
Segment& segment = comp[0];
if (segment.HasTag("punct_number")) {
segment.tags.erase("punct_number");
segment.tags.insert("punct");
segment.status = Segment::kVoid;
DLOG(INFO) << "exit number mode, key = " << ch;
ctx->ReopenPreviousSegment();
return true;
}
}
return false;
}

bool Punctuator::AlternatePunct(const string& key,
const an<ConfigItem>& definition) {
if (!As<ConfigList>(definition))
Expand Down Expand Up @@ -159,6 +227,11 @@ bool Punctuator::PairPunct(const an<ConfigItem>& definition) {
}

PunctSegmentor::PunctSegmentor(const Ticket& ticket) : Segmentor(ticket) {
Config* config = engine_->schema()->config();
if (config) {
config->GetBool("punctuator/convert_punct_in_number",
&convert_punct_in_number_);
}
config_.LoadConfig(engine_);
}

Expand All @@ -170,22 +243,29 @@ bool PunctSegmentor::Proceed(Segmentation* segmentation) {
char ch = input[k];
if (ch < 0x20 || ch >= 0x7f)
return true;
// sync with full_shape option
config_.LoadConfig(engine_);
string punct_key(1, ch);
auto punct_definition = config_.GetPunctDefinition(punct_key);
string key(1, ch);
auto punct_definition = config_.GetPunctDefinition(key);
if (!punct_definition)
return true;
{
Segment segment(k, k + 1);
DLOG(INFO) << "add a punctuation segment [" << segment.start << ", "
<< segment.end << ")";
segment.tags.insert("punct");
if (k == 0 && convert_punct_in_number_ && is_digit_separator(ch) &&
is_after_number(engine_->context())) {
segment.tags.insert("punct_number");
} else {
segment.tags.insert("punct");
}
segmentation->AddSegment(segment);
}
return false; // exclusive
}

PunctTranslator::PunctTranslator(const Ticket& ticket) : Translator(ticket) {
PunctTranslator::PunctTranslator(const Ticket& ticket)
: Translator(ticket), formatter_(ticket) {
const bool load_symbols = true;
config_.LoadConfig(engine_, load_symbols);
}
Expand Down Expand Up @@ -233,8 +313,17 @@ an<Candidate> CreatePunctCandidate(const string& punct,

an<Translation> PunctTranslator::Query(const string& input,
const Segment& segment) {
if (segment.HasTag("punct_number")) {
if (input.length() == 1 && is_digit_separator(input[0])) {
string punct = input;
formatter_.Format(&punct);
return New<UniqueTranslation>(CreatePunctCandidate(punct, segment));
}
return nullptr;
}
if (!segment.HasTag("punct"))
return nullptr;
// sync with full_shape option
config_.LoadConfig(engine_);
auto definition = config_.GetPunctDefinition(input);
if (!definition)
Expand Down
8 changes: 7 additions & 1 deletion src/rime/gear/punctuator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <rime/processor.h>
#include <rime/segmentor.h>
#include <rime/translator.h>
#include <rime/gear/shape.h>

namespace rime {

Expand All @@ -35,12 +36,15 @@ class Punctuator : public Processor {
virtual ProcessResult ProcessKeyEvent(const KeyEvent& key_event);

protected:
bool ConfirmUniquePunct(const an<ConfigItem>& definition);
bool ConvertPunctInNumber(char ch);
bool ReconvertPunct(const string& key);
bool AlternatePunct(const string& key, const an<ConfigItem>& definition);
bool ConfirmUniquePunct(const an<ConfigItem>& definition);
bool AutoCommitPunct(const an<ConfigItem>& definition);
bool PairPunct(const an<ConfigItem>& definition);

PunctConfig config_;
bool convert_punct_in_number_ = true;
bool use_space_ = false;
map<an<ConfigItem>, int> oddness_;
};
Expand All @@ -52,6 +56,7 @@ class PunctSegmentor : public Segmentor {

protected:
PunctConfig config_;
bool convert_punct_in_number_ = true;
};

class PunctTranslator : public Translator {
Expand All @@ -73,6 +78,7 @@ class PunctTranslator : public Translator {
const Segment& segment,
const an<ConfigMap>& definition);

ShapeFormatter formatter_;
PunctConfig config_;
};

Expand Down

0 comments on commit 1898701

Please sign in to comment.