From 16e34b3c9fccf0048f8ab2987ee5b66caa05435b Mon Sep 17 00:00:00 2001 From: Tino Didriksen Date: Sat, 10 Aug 2024 15:45:43 +0200 Subject: [PATCH] Fix segfault when nested rules change their context (fixes #145, fixes #146, fixes #148) --- src/GrammarApplicator.hpp | 2 ++ src/GrammarApplicator_runRules.cpp | 44 ++++++++++++++++++++++++++++-- src/inlines.hpp | 21 ++++++++++++++ src/stdafx.hpp | 1 + src/version.hpp | 2 +- 5 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp index 03f49206..9039eed7 100644 --- a/src/GrammarApplicator.hpp +++ b/src/GrammarApplicator.hpp @@ -249,6 +249,8 @@ class GrammarApplicator { Cohort* merge_with = nullptr; Rule* current_rule = nullptr; std::vector context_stack; + std::vector cohortsets; + std::vector rocits; ReadingSpec get_attach_to(); Cohort* get_mark(); diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp index ce8b6821..fa790440 100644 --- a/src/GrammarApplicator_runRules.cpp +++ b/src/GrammarApplicator_runRules.cpp @@ -70,7 +70,38 @@ bool GrammarApplicator::updateRuleToCohorts(Cohort& c, const uint32_t& rsit) { indexSingleWindow(*current); } CohortSet& cohortset = current->rule_to_cohorts[rsit]; - cohortset.insert(&c); + std::vector csi; + for (size_t i = 0; i < cohortsets.size(); ++i) { + if (cohortsets[i] != &cohortset) { + continue; + } + csi.push_back(i); + } + if (!csi.empty()) { + auto cap = cohortset.capacity(); + std::vector ends; + std::vector> chs; + for (size_t i = 0; i < csi.size(); ++i) { + if (*rocits[csi[i]] == cohortset.end()) { + ends.push_back(rocits[csi[i]]); + } + else { + chs.push_back(std::pair(rocits[csi[i]], **rocits[csi[i]])); + } + } + cohortset.insert(&c); + for (auto it : ends) { + *it = cohortset.end(); + } + if (cap != cohortset.capacity()) { + for (auto& it : chs) { + *it.first = cohortset.find(it.second); + } + } + } + else { + cohortset.insert(&c); + } return current->valid_rules.insert(rsit); } @@ -333,11 +364,19 @@ bool GrammarApplicator::runSingleRule(SingleWindow& current, const Rule& rule, R } }; override_cohortset(); + cohortsets.push_back(cohortset); + rocits.push_back(nullptr); + + scope_guard popper([&]() { + cohortsets.pop_back(); + rocits.pop_back(); + }); if (debug_level > 1) { std::cerr << "DEBUG: " << cohortset->size() << "/" << current.cohorts.size() << " = " << double(cohortset->size()) / double(current.cohorts.size()) << std::endl; } - for (auto rocit = cohortset->cbegin(); rocit != cohortset->cend();) { + for (auto rocit = cohortset->cbegin(); (!cohortset->empty()) && (rocit != cohortset->cend());) { + rocits.back() = &rocit; Cohort* cohort = *rocit; ++rocit; @@ -483,6 +522,7 @@ bool GrammarApplicator::runSingleRule(SingleWindow& current, const Rule& rule, R auto reset_cohorts = [&]() { cohortset = ¤t.rule_to_cohorts[rule.number]; override_cohortset(); + cohortsets.back() = cohortset; if (get_apply_to().cohort->type & CT_REMOVED) { rocit = cohortset->lower_bound(current.cohorts[get_apply_to().cohort->local_number]); } diff --git a/src/inlines.hpp b/src/inlines.hpp index 3d8568d1..ea59d13c 100644 --- a/src/inlines.hpp +++ b/src/inlines.hpp @@ -777,6 +777,27 @@ class inc_dec { T* p; }; +class scope_guard { +public: + scope_guard(std::function func, bool good = true) + : func(func) + , good(good) + {} + + ~scope_guard() { + if (good) { + func(); + } + } + + void set(bool val = true) { + good = val; + } +private: + std::function func; + bool good = true; +}; + template inline T* reverse(T* head) { T* nr = nullptr; diff --git a/src/stdafx.hpp b/src/stdafx.hpp index 302a07ee..aacb0049 100644 --- a/src/stdafx.hpp +++ b/src/stdafx.hpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/src/version.hpp b/src/version.hpp index 9fbb7eda..3ffe1351 100644 --- a/src/version.hpp +++ b/src/version.hpp @@ -27,7 +27,7 @@ constexpr auto CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2024 GrammarSoft ApS. constexpr uint32_t CG3_VERSION_MAJOR = 1; constexpr uint32_t CG3_VERSION_MINOR = 4; -constexpr uint32_t CG3_VERSION_PATCH = 15; +constexpr uint32_t CG3_VERSION_PATCH = 16; constexpr uint32_t CG3_REVISION = 13898; constexpr uint32_t CG3_FEATURE_REV = 13898; constexpr uint32_t CG3_TOO_OLD = 10373;