diff --git a/builtin-functions/kphp-light/functions.txt b/builtin-functions/kphp-light/functions.txt index 0e88a80591..0df301ad85 100644 --- a/builtin-functions/kphp-light/functions.txt +++ b/builtin-functions/kphp-light/functions.txt @@ -7,11 +7,13 @@ require_once __DIR__ . '/file.txt'; require_once __DIR__ . '/hash.txt'; require_once __DIR__ . '/job-workers.txt'; require_once __DIR__ . '/rpc.txt'; +require_once __DIR__ . '/serialize.txt'; require_once __DIR__ . '/string.txt'; require_once __DIR__ . '/server.txt'; require_once __DIR__ . '/kphp-toggles.txt'; require_once __DIR__ . '/kphp_internal.txt'; require_once __DIR__ . '/time.txt'; +require_once __DIR__ . '/vkext.txt'; /** defined in runtime-core.h **/ function likely ($x ::: bool) ::: bool; diff --git a/builtin-functions/kphp-light/unsupported/serialize.txt b/builtin-functions/kphp-light/serialize.txt similarity index 51% rename from builtin-functions/kphp-light/unsupported/serialize.txt rename to builtin-functions/kphp-light/serialize.txt index cf02a6dfd5..b4240cef02 100644 --- a/builtin-functions/kphp-light/unsupported/serialize.txt +++ b/builtin-functions/kphp-light/serialize.txt @@ -1,37 +1,11 @@ ; -/** @kphp-extern-func-info cpp_template_call can_throw */ -function instance_deserialize_safe($serialized ::: string, $to_type ::: string) ::: instance<^2>; - define('JSON_UNESCAPED_UNICODE', 1); define('JSON_FORCE_OBJECT', 16); define('JSON_PRETTY_PRINT', 128); // TODO: add actual support define('JSON_PARTIAL_OUTPUT_ON_ERROR', 512); define('JSON_PRESERVE_ZERO_FRACTION', 1024); -/** @kphp-generate-stub-class */ class JsonEncoder { const rename_policy = 'none'; const visibility_policy = 'all'; @@ -52,4 +26,3 @@ class JsonEncoder { /** @kphp-extern-func-info cpp_template_call */ static function from_json_impl(string $encoder_tag, string $json, string $class_name) ::: instance<^3>; } - diff --git a/builtin-functions/kphp-light/unsupported-functions.txt b/builtin-functions/kphp-light/unsupported-functions.txt index 0a45b2ff15..e844405f58 100644 --- a/builtin-functions/kphp-light/unsupported-functions.txt +++ b/builtin-functions/kphp-light/unsupported-functions.txt @@ -10,8 +10,7 @@ require_once __DIR__ . '/unsupported/math.txt'; require_once __DIR__ . '/unsupported/memcache.txt'; require_once __DIR__ . '/unsupported/misc.txt'; require_once __DIR__ . '/unsupported/regex.txt'; -require_once __DIR__ . '/unsupported/serialize.txt'; +require_once __DIR__ . '/unsupported/unsupported-serialize.txt'; require_once __DIR__ . '/unsupported/spl.txt'; require_once __DIR__ . '/unsupported/uberh3.txt'; -require_once __DIR__ . '/unsupported/vkext.txt'; require_once __DIR__ . '/unsupported/unsupported-server.txt'; diff --git a/builtin-functions/kphp-light/unsupported/unsupported-serialize.txt b/builtin-functions/kphp-light/unsupported/unsupported-serialize.txt new file mode 100644 index 0000000000..3d5dca1d7f --- /dev/null +++ b/builtin-functions/kphp-light/unsupported/unsupported-serialize.txt @@ -0,0 +1,27 @@ +; +/** @kphp-extern-func-info cpp_template_call can_throw */ +function instance_deserialize_safe($serialized ::: string, $to_type ::: string) ::: instance<^2>; + diff --git a/builtin-functions/kphp-light/unsupported/vkext.txt b/builtin-functions/kphp-light/unsupported/vkext.txt deleted file mode 100644 index 263fbd64c7..0000000000 --- a/builtin-functions/kphp-light/unsupported/vkext.txt +++ /dev/null @@ -1,25 +0,0 @@ -get_name() << " "; } // add vendored statically linking libs - std::vector libs = split(RUNTIME_LINK_LIBS); + std::vector libs = split(RUNTIME_LINK_LIBS, ';'); std::for_each(libs.cbegin(), libs.cend(), [&ss](const auto &lib) noexcept { ss << lib << " "; }); return ss.str(); } diff --git a/runtime-common/stdlib/stdlib.cmake b/runtime-common/stdlib/stdlib.cmake index 89b7d4b5d5..b66413b5fd 100644 --- a/runtime-common/stdlib/stdlib.cmake +++ b/runtime-common/stdlib/stdlib.cmake @@ -1,5 +1,12 @@ -prepend(STDLIB_STRING stdlib/string/ string-functions.cpp - mbstring-functions.cpp) +prepend(STDLIB_STRING stdlib/string/ json-functions.cpp + json-writer.cpp + mbstring-functions.cpp + string-functions.cpp) prepend(STDLIB_SERVER stdlib/server/ url-functions.cpp) +prepend(STDLIB_VKEXT stdlib/vkext/ vkext.cpp vkext_stats.cpp string-processing.cpp) -set(STDLIB_SRC ${STDLIB_STRING} ${STDLIB_SERVER}) +if(COMPILER_CLANG) + set_source_files_properties(${RUNTIME_COMMON_DIR}/stdlib/vkext/string-processing.cpp PROPERTIES COMPILE_FLAGS -Wno-invalid-source-encoding) +endif() + +set(STDLIB_SRC ${STDLIB_STRING} ${STDLIB_SERVER} ${STDLIB_VKEXT}) diff --git a/runtime/from-json-processor.h b/runtime-common/stdlib/string/from-json-processor.h similarity index 85% rename from runtime/from-json-processor.h rename to runtime-common/stdlib/string/from-json-processor.h index c31c630bc8..fef3c8cf83 100644 --- a/runtime/from-json-processor.h +++ b/runtime-common/stdlib/string/from-json-processor.h @@ -4,11 +4,12 @@ #pragma once -#include +#include "runtime/context/runtime-context.h" #include "runtime-common/core/runtime-core.h" -#include "runtime/json-functions.h" -#include "runtime/json-processor-utils.h" +#include "runtime-common/stdlib/string/json-functions.h" +#include "runtime-common/stdlib/string/json-processor-utils.h" +#include "runtime-common/stdlib/string/string-context.h" template class FromJsonVisitor { @@ -91,13 +92,13 @@ class FromJsonVisitor { } void do_set(JsonRawString &value, const mixed &json) noexcept { - kphp_runtime_context.static_SB.clean(); - if (!impl_::JsonEncoder{0, false, get_json_obj_magic_key()}.encode(json)) { + RuntimeContext::get().static_SB.clean(); + if (!impl_::JsonEncoder{0, false, get_json_obj_magic_key()}.encode(json, RuntimeContext::get().static_SB)) { error_.append("failed to decode @kphp-json raw_string field "); error_.append(json_path_.to_string()); return; } - value.str = kphp_runtime_context.static_SB.str(); + value.str = RuntimeContext::get().static_SB.str(); } template @@ -169,14 +170,14 @@ class_instance from_json_impl(const mixed &json, JsonPath &json_path) noexcep FromJsonVisitor visitor{json, impl_::IsJsonFlattenClass::value, json_path}; instance.get()->accept(visitor); if (visitor.has_error()) { - JsonEncoderError::msg.append(visitor.get_error()); + StringLibContext::get().last_json_processor_error.append(visitor.get_error()); return {}; } } if constexpr (impl_::HasClassWakeupMethod::value) { instance.get()->wakeup(instance); } - return JsonEncoderError::msg.empty() ? instance : class_instance{}; + return StringLibContext::get().last_json_processor_error.empty() ? instance : class_instance{}; } template @@ -196,18 +197,19 @@ void FromJsonVisitor::do_set(class_instance &klass, const mixed &json) n template ClassName f$JsonEncoder$$from_json_impl(Tag /*tag*/, const string &json_string, const string &/*class_mame*/) noexcept { - JsonEncoderError::msg = {}; + auto &msg = StringLibContext::get().last_json_processor_error; + msg = {}; auto [json, success] = json_decode(json_string, FromJsonVisitor::get_json_obj_magic_key()); if (!success) { - JsonEncoderError::msg.append(json_string.empty() ? "provided empty json string" : "failed to parse json string"); + msg.append(json_string.empty() ? "provided empty json string" : "failed to parse json string"); return {}; } if constexpr (!impl_::IsJsonFlattenClass::value) { if (!json.is_array() || json.as_array().is_vector()) { - JsonEncoderError::msg.append("root element of json string must be an object type, got "); - JsonEncoderError::msg.append(json.get_type_c_str()); + msg.append("root element of json string must be an object type, got "); + msg.append(json.get_type_c_str()); return {}; } } @@ -216,4 +218,6 @@ ClassName f$JsonEncoder$$from_json_impl(Tag /*tag*/, const string &json_string, return from_json_impl(json, json_path); } -string f$JsonEncoder$$getLastError() noexcept; +inline string f$JsonEncoder$$getLastError() noexcept { + return StringLibContext::get().last_json_processor_error; +} diff --git a/runtime-light/utils/json-functions.cpp b/runtime-common/stdlib/string/json-functions.cpp similarity index 88% rename from runtime-light/utils/json-functions.cpp rename to runtime-common/stdlib/string/json-functions.cpp index d736393bd1..57efca34ff 100644 --- a/runtime-light/utils/json-functions.cpp +++ b/runtime-common/stdlib/string/json-functions.cpp @@ -2,16 +2,16 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime-light/utils/json-functions.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "common/algorithms/find.h" -//#include "runtime/string_functions.h" +#include "runtime-common/stdlib/string/string-functions.h" // note: json-functions.cpp is used for non-typed json implementation: for json_encode() and json_decode() // for classes, e.g. `JsonEncoder::encode(new A)`, see json-writer.cpp and from/to visitors namespace { -void json_append_one_char(unsigned int c, string_buffer & sb) noexcept { +void json_append_one_char(unsigned int c, string_buffer &sb) noexcept { sb.append_char('\\'); sb.append_char('u'); sb.append_char("0123456789abcdef"[c >> 12]); @@ -20,7 +20,7 @@ void json_append_one_char(unsigned int c, string_buffer & sb) noexcept { sb.append_char("0123456789abcdef"[c & 15]); } -bool json_append_char(unsigned int c, string_buffer & sb) noexcept { +bool json_append_char(unsigned int c, string_buffer &sb) noexcept { if (c < 0x10000) { if (0xD7FF < c && c < 0xE000) { return false; @@ -37,8 +37,7 @@ bool json_append_char(unsigned int c, string_buffer & sb) noexcept { return false; } - -bool do_json_encode_string_php(const JsonPath &json_path, const char *s, int len, int64_t options, string_buffer & sb) noexcept { +bool do_json_encode_string_php(const JsonPath &json_path, const char *s, int len, int64_t options, string_buffer &sb) noexcept { int begin_pos = sb.size(); if (options & JSON_UNESCAPED_UNICODE) { sb.reserve(2 * len + 2); @@ -176,7 +175,7 @@ string JsonPath::to_string() const { } unsigned num_parts = std::clamp(depth, 0U, static_cast(arr.size())); string result; - result.reserve_at_least((num_parts+1) * 8); + result.reserve_at_least((num_parts + 1) * 8); result.push_back('/'); for (unsigned i = 0; i < num_parts; i++) { const char *key = arr[i]; @@ -198,13 +197,12 @@ string JsonPath::to_string() const { namespace impl_ { -JsonEncoder::JsonEncoder(int64_t options, bool simple_encode, const char *json_obj_magic_key) noexcept: - options_(options), - simple_encode_(simple_encode), - json_obj_magic_key_(json_obj_magic_key) { -} +JsonEncoder::JsonEncoder(int64_t options, bool simple_encode, const char *json_obj_magic_key) noexcept + : options_(options) + , simple_encode_(simple_encode) + , json_obj_magic_key_(json_obj_magic_key) {} -bool JsonEncoder::encode(bool b, string_buffer & sb) noexcept { +bool JsonEncoder::encode(bool b, string_buffer &sb) noexcept { if (b) { sb.append("true", 4); } else { @@ -213,17 +211,17 @@ bool JsonEncoder::encode(bool b, string_buffer & sb) noexcept { return true; } -bool JsonEncoder::encode_null(string_buffer & sb) const noexcept { +bool JsonEncoder::encode_null(string_buffer &sb) const noexcept { sb.append("null", 4); return true; } -bool JsonEncoder::encode(int64_t i, string_buffer & sb) noexcept { +bool JsonEncoder::encode(int64_t i, string_buffer &sb) noexcept { sb << i; return true; } -bool JsonEncoder::encode(double d, string_buffer & sb) noexcept { +bool JsonEncoder::encode(double d, string_buffer &sb) noexcept { if (vk::any_of_equal(std::fpclassify(d), FP_INFINITE, FP_NAN)) { php_warning("%s: strange double %lf in function json_encode", json_path_.to_string().c_str(), d); if (options_ & JSON_PARTIAL_OUTPUT_ON_ERROR) { @@ -232,17 +230,16 @@ bool JsonEncoder::encode(double d, string_buffer & sb) noexcept { return false; } } else { - //todo:k2 implement f$number_format - sb << /*(simple_encode_ ? f$number_format(d, 6, string{"."}, string{}) : */ string{d}/*)*/; + sb << (simple_encode_ ? f$number_format(d, 6, string{"."}, string{}) : string{d}); } return true; } -bool JsonEncoder::encode(const string &s, string_buffer & sb) noexcept { +bool JsonEncoder::encode(const string &s, string_buffer &sb) noexcept { return do_json_encode_string_php(json_path_, s.c_str(), s.size(), options_, sb); } -bool JsonEncoder::encode(const mixed &v, string_buffer & sb) noexcept { +bool JsonEncoder::encode(const mixed &v, string_buffer &sb) noexcept { switch (v.get_type()) { case mixed::type::NUL: return encode_null(sb); @@ -278,29 +275,22 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json json_skip_blanks(s, i); switch (s[i]) { case 'n': - if (s[i + 1] == 'u' && - s[i + 2] == 'l' && - s[i + 3] == 'l') { + if (s[i + 1] == 'u' && s[i + 2] == 'l' && s[i + 3] == 'l') { i += 4; return true; } break; case 't': - if (s[i + 1] == 'r' && - s[i + 2] == 'u' && - s[i + 3] == 'e') { + if (s[i + 1] == 'r' && s[i + 2] == 'u' && s[i + 3] == 'e') { i += 4; - new(&v) mixed(true); + new (&v) mixed(true); return true; } break; case 'f': - if (s[i + 1] == 'a' && - s[i + 2] == 'l' && - s[i + 3] == 's' && - s[i + 4] == 'e') { + if (s[i + 1] == 'a' && s[i + 2] == 'l' && s[i + 3] == 's' && s[i + 4] == 'e') { i += 5; - new(&v) mixed(false); + new (&v) mixed(false); return true; } break; @@ -362,8 +352,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json } if (0xD7FF < num && num < 0xE000) { - if (s[i + 1] == '\\' && s[i + 2] == 'u' && - isxdigit(s[i + 3]) && isxdigit(s[i + 4]) && isxdigit(s[i + 5]) && isxdigit(s[i + 6])) { + if (s[i + 1] == '\\' && s[i + 2] == 'u' && isxdigit(s[i + 3]) && isxdigit(s[i + 4]) && isxdigit(s[i + 5]) && isxdigit(s[i + 6])) { i += 2; int u = 0; for (int t = 0; t < 4; t++) { @@ -417,7 +406,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json } value.shrink(l); - new(&v) mixed(value); + new (&v) mixed(value); i++; return true; } @@ -444,7 +433,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json i++; } - new(&v) mixed(res); + new (&v) mixed(res); return true; } case '{': { @@ -481,7 +470,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json res[string{json_obj_magic_key}] = true; } - new(&v) mixed(res); + new (&v) mixed(res); return true; } default: { @@ -493,7 +482,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json int64_t intval = 0; if (php_try_to_int(s + i, j - i, &intval)) { i = j; - new(&v) mixed(intval); + new (&v) mixed(intval); return true; } @@ -501,7 +490,7 @@ bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json double floatval = strtod(s + i, &end_ptr); if (end_ptr == s + j) { i = j; - new(&v) mixed(floatval); + new (&v) mixed(floatval); return true; } } diff --git a/runtime-light/utils/json-functions.h b/runtime-common/stdlib/string/json-functions.h similarity index 78% rename from runtime-light/utils/json-functions.h rename to runtime-common/stdlib/string/json-functions.h index 5ed8d3a38b..b2e56a209b 100644 --- a/runtime-light/utils/json-functions.h +++ b/runtime-common/stdlib/string/json-functions.h @@ -9,7 +9,6 @@ #include "common/mixin/not_copyable.h" #include "runtime-common/core/runtime-core.h" - constexpr int64_t JSON_UNESCAPED_UNICODE = 1; constexpr int64_t JSON_FORCE_OBJECT = 16; constexpr int64_t JSON_PRETTY_PRINT = 128; // TODO: add actual support to untyped @@ -22,7 +21,7 @@ constexpr int64_t JSON_AVAILABLE_FLAGS_TYPED = JSON_PRETTY_PRINT | JSON_PRESERVE struct JsonPath { constexpr static int MAX_DEPTH = 8; - std::array arr; + std::array arr; unsigned depth = 0; void enter(const char *key) noexcept { @@ -47,31 +46,30 @@ class JsonEncoder : vk::not_copyable { public: JsonEncoder(int64_t options, bool simple_encode, const char *json_obj_magic_key = nullptr) noexcept; - //todo:k2 change static_SB everywhere to string_buffer arg - bool encode(bool b, string_buffer & sb) noexcept; - bool encode(int64_t i, string_buffer & sb) noexcept; - bool encode(const string &s, string_buffer & sb) noexcept; - bool encode(double d, string_buffer & sb) noexcept; - bool encode(const mixed &v, string_buffer & sb) noexcept; + // todo:k2 change static_SB everywhere to string_buffer arg + bool encode(bool b, string_buffer &sb) noexcept; + bool encode(int64_t i, string_buffer &sb) noexcept; + bool encode(const string &s, string_buffer &sb) noexcept; + bool encode(double d, string_buffer &sb) noexcept; + bool encode(const mixed &v, string_buffer &sb) noexcept; template - bool encode(const array &arr, string_buffer & sb) noexcept; + bool encode(const array &arr, string_buffer &sb) noexcept; template - bool encode(const Optional &opt, string_buffer & sb) noexcept; + bool encode(const Optional &opt, string_buffer &sb) noexcept; private: - bool encode_null(string_buffer & sb) const noexcept; + bool encode_null(string_buffer &sb) const noexcept; JsonPath json_path_; const int64_t options_{0}; - //todo:k2 use simple_encode - [[maybe_unused]] const bool simple_encode_{false}; + const bool simple_encode_{false}; const char *json_obj_magic_key_{nullptr}; }; template -bool JsonEncoder::encode(const array &arr, string_buffer & sb) noexcept { +bool JsonEncoder::encode(const array &arr, string_buffer &sb) noexcept { bool is_vector = arr.is_vector(); const bool force_object = static_cast(JSON_FORCE_OBJECT & options_); if (!force_object && !is_vector && arr.is_pseudo_vector()) { @@ -142,7 +140,7 @@ bool JsonEncoder::encode(const array &arr, string_buffer & sb) noexcept { } template -bool JsonEncoder::encode(const Optional &opt, string_buffer & sb) noexcept { +bool JsonEncoder::encode(const Optional &opt, string_buffer &sb) noexcept { switch (opt.value_state()) { case OptionalState::has_value: return encode(opt.val(), sb); @@ -170,8 +168,6 @@ Optional f$json_encode(const T &v, int64_t options = 0, bool simple_enco return sb.c_str(); } -//todo:k2 implement string f$vk_json_encode_safe(const T &v, bool simple_encode = true) noexcept - template inline Optional f$vk_json_encode(const T &v) noexcept { return f$json_encode(v, 0, true); @@ -179,14 +175,3 @@ inline Optional f$vk_json_encode(const T &v) noexcept { std::pair json_decode(const string &v, const char *json_obj_magic_key = nullptr) noexcept; mixed f$json_decode(const string &v, bool assoc = false) noexcept; - -template -string f$JsonEncoder$$to_json_impl(Tag /*tag*/, const class_instance &, int64_t = 0, const array & = {}) noexcept { - php_critical_error("call to unsupported function"); -} - -template -ClassName f$JsonEncoder$$from_json_impl(Tag /*tag*/, const string &, const string & /*class_mame*/) noexcept { - php_critical_error("call to unsupported function"); -} - diff --git a/runtime/json-processor-utils.h b/runtime-common/stdlib/string/json-processor-utils.h similarity index 74% rename from runtime/json-processor-utils.h rename to runtime-common/stdlib/string/json-processor-utils.h index 015cf990d5..a27f5d1f86 100644 --- a/runtime/json-processor-utils.h +++ b/runtime-common/stdlib/string/json-processor-utils.h @@ -6,18 +6,20 @@ #include +#include "runtime-common/core/runtime-core.h" + namespace impl_ { template> -struct IsJsonFlattenClass : std::false_type{}; +struct IsJsonFlattenClass : std::false_type {}; template -struct IsJsonFlattenClass> : std::true_type{}; +struct IsJsonFlattenClass> : std::true_type {}; template> -struct HasClassWakeupMethod : std::false_type{}; +struct HasClassWakeupMethod : std::false_type {}; template -struct HasClassWakeupMethod> : std::true_type{}; +struct HasClassWakeupMethod> : std::true_type {}; } // namespace impl_ struct JsonRawString { @@ -25,7 +27,3 @@ struct JsonRawString { : str(s) {} string &str; }; - -struct JsonEncoderError { - static string msg; -}; diff --git a/runtime/json-writer.cpp b/runtime-common/stdlib/string/json-writer.cpp similarity index 74% rename from runtime/json-writer.cpp rename to runtime-common/stdlib/string/json-writer.cpp index 2e9ef7cdbc..321a12a3fd 100644 --- a/runtime/json-writer.cpp +++ b/runtime-common/stdlib/string/json-writer.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2022 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime/json-writer.h" +#include "runtime-common/stdlib/string/json-writer.h" #include "runtime/array_functions.h" #include "runtime/context/runtime-context.h" @@ -57,18 +57,18 @@ static void escape_json_string(string_buffer &buffer, std::string_view s) noexce JsonWriter::JsonWriter(bool pretty_print, bool preserve_zero_fraction) noexcept : pretty_print_(pretty_print) , preserve_zero_fraction_(preserve_zero_fraction) { - kphp_runtime_context.static_SB.clean(); + RuntimeContext::get().static_SB.clean(); } JsonWriter::~JsonWriter() noexcept { - kphp_runtime_context.static_SB.clean(); + RuntimeContext::get().static_SB.clean(); } bool JsonWriter::write_bool(bool b) noexcept { if (!register_value()) { return false; } - b ? kphp_runtime_context.static_SB.append("true", 4) : kphp_runtime_context.static_SB.append("false", 5); + b ? RuntimeContext::get().static_SB.append("true", 4) : RuntimeContext::get().static_SB.append("false", 5); return true; } @@ -76,7 +76,7 @@ bool JsonWriter::write_int(int64_t i) noexcept { if (!register_value()) { return false; } - kphp_runtime_context.static_SB << i; + RuntimeContext::get().static_SB << i; return true; } @@ -88,13 +88,13 @@ bool JsonWriter::write_double(double d) noexcept { d = 0.0; } if (double_precision_) { - kphp_runtime_context.static_SB << f$round(d, double_precision_); + RuntimeContext::get().static_SB << f$round(d, double_precision_); } else { - kphp_runtime_context.static_SB << d; + RuntimeContext::get().static_SB << d; } if (preserve_zero_fraction_) { if (double dummy = 0.0; std::modf(d, &dummy) == 0.0) { - kphp_runtime_context.static_SB << ".0"; + RuntimeContext::get().static_SB << ".0"; } } return true; @@ -104,10 +104,10 @@ bool JsonWriter::write_string(const string &s) noexcept { if (!register_value()) { return false; } - kphp_runtime_context.static_SB.reserve(2 * s.size() + 2); - kphp_runtime_context.static_SB.append_char('"'); - escape_json_string(kphp_runtime_context.static_SB, {s.c_str(), s.size()}); - kphp_runtime_context.static_SB.append_char('"'); + RuntimeContext::get().static_SB.reserve(2 * s.size() + 2); + RuntimeContext::get().static_SB.append_char('"'); + escape_json_string(RuntimeContext::get().static_SB, {s.c_str(), s.size()}); + RuntimeContext::get().static_SB.append_char('"'); return true; } @@ -115,7 +115,7 @@ bool JsonWriter::write_raw_string(const string &s) noexcept { if (!register_value()) { return false; } - kphp_runtime_context.static_SB << s; + RuntimeContext::get().static_SB << s; return true; } @@ -123,7 +123,7 @@ bool JsonWriter::write_null() noexcept { if (!register_value()) { return false; } - kphp_runtime_context.static_SB.append("null", 4); + RuntimeContext::get().static_SB.append("null", 4); return true; } @@ -133,22 +133,22 @@ bool JsonWriter::write_key(std::string_view key, bool escape) noexcept { return false; } if (stack_.back().values_count) { - kphp_runtime_context.static_SB << ','; + RuntimeContext::get().static_SB << ','; } if (pretty_print_) { - kphp_runtime_context.static_SB << '\n'; + RuntimeContext::get().static_SB << '\n'; write_indent(); } - kphp_runtime_context.static_SB << '"'; + RuntimeContext::get().static_SB << '"'; if (escape) { - escape_json_string(kphp_runtime_context.static_SB, key); + escape_json_string(RuntimeContext::get().static_SB, key); } else { - kphp_runtime_context.static_SB.append(key.data(), key.size()); + RuntimeContext::get().static_SB.append(key.data(), key.size()); } - kphp_runtime_context.static_SB << '"'; - kphp_runtime_context.static_SB << ':'; + RuntimeContext::get().static_SB << '"'; + RuntimeContext::get().static_SB << ':'; if (pretty_print_) { - kphp_runtime_context.static_SB << ' '; + RuntimeContext::get().static_SB << ' '; } return true; } @@ -178,7 +178,7 @@ string JsonWriter::get_error() const noexcept { } string JsonWriter::get_final_json() const noexcept { - return kphp_runtime_context.static_SB.str(); + return RuntimeContext::get().static_SB.str(); } bool JsonWriter::new_level(bool is_array) noexcept { @@ -187,7 +187,7 @@ bool JsonWriter::new_level(bool is_array) noexcept { } stack_.emplace_back(NestedLevel{.in_array = is_array}); - kphp_runtime_context.static_SB << (is_array ? '[' : '{'); + RuntimeContext::get().static_SB << (is_array ? '[' : '{'); indent_ += 4; return true; } @@ -209,11 +209,11 @@ bool JsonWriter::exit_level(bool is_array) noexcept { indent_ -= 4; if (pretty_print_ && cur_level.values_count) { - kphp_runtime_context.static_SB << '\n'; + RuntimeContext::get().static_SB << '\n'; write_indent(); } - kphp_runtime_context.static_SB << (is_array ? ']' : '}'); + RuntimeContext::get().static_SB << (is_array ? ']' : '}'); return true; } @@ -229,10 +229,10 @@ bool JsonWriter::register_value() noexcept { auto &top = stack_.back(); if (top.in_array) { if (top.values_count) { - kphp_runtime_context.static_SB << ','; + RuntimeContext::get().static_SB << ','; } if (pretty_print_) { - kphp_runtime_context.static_SB << '\n'; + RuntimeContext::get().static_SB << '\n'; write_indent(); } } @@ -243,9 +243,9 @@ bool JsonWriter::register_value() noexcept { void JsonWriter::write_indent() const noexcept { if (indent_) { - kphp_runtime_context.static_SB.reserve(indent_); + RuntimeContext::get().static_SB.reserve(indent_); for (std::size_t i = 0; i < indent_; ++i) { - kphp_runtime_context.static_SB.append_char(' '); + RuntimeContext::get().static_SB.append_char(' '); } } } diff --git a/runtime/json-writer.h b/runtime-common/stdlib/string/json-writer.h similarity index 100% rename from runtime/json-writer.h rename to runtime-common/stdlib/string/json-writer.h diff --git a/runtime-common/stdlib/string/string-context.h b/runtime-common/stdlib/string/string-context.h index d655fc18fe..37553df004 100644 --- a/runtime-common/stdlib/string/string-context.h +++ b/runtime-common/stdlib/string/string-context.h @@ -45,6 +45,9 @@ class StringLibContext final : vk::not_copyable { // mb_string context bool detect_incorrect_encoding_names{}; + // from-json-processor context + string last_json_processor_error; + static StringLibContext &get() noexcept; }; diff --git a/runtime/to-json-processor.h b/runtime-common/stdlib/string/to-json-processor.h similarity index 92% rename from runtime/to-json-processor.h rename to runtime-common/stdlib/string/to-json-processor.h index 3ba5c7f333..8e24c767a3 100644 --- a/runtime/to-json-processor.h +++ b/runtime-common/stdlib/string/to-json-processor.h @@ -5,8 +5,10 @@ #pragma once #include "runtime-common/core/runtime-core.h" -#include "runtime/json-processor-utils.h" -#include "runtime/json-writer.h" +#include "runtime-common/stdlib/string/json-functions.h" +#include "runtime-common/stdlib/string/json-processor-utils.h" +#include "runtime-common/stdlib/string/json-writer.h" +#include "runtime-common/stdlib/string/string-context.h" template class ToJsonVisitor { @@ -138,7 +140,7 @@ class ToJsonVisitor { template void to_json_impl(const class_instance &klass, impl_::JsonWriter &writer, const array &more, std::size_t depth = 0) noexcept { if (depth > 64) { - JsonEncoderError::msg.assign("allowed depth=64 of json object exceeded"); + StringLibContext::get().last_json_processor_error.assign("allowed depth=64 of json object exceeded"); return; } @@ -178,13 +180,13 @@ string f$JsonEncoder$$to_json_impl(Tag /*tag*/, const class_instance &klass, if (unlikely(has_unsupported_option)) { php_warning("Wrong parameter flags = %" PRIi64 " in function JsonEncoder::encode", flags); } - - JsonEncoderError::msg = {}; + auto &error_msg = StringLibContext::get().last_json_processor_error; + error_msg = {}; impl_::JsonWriter writer{(flags & JSON_PRETTY_PRINT) > 0, (flags & JSON_PRESERVE_ZERO_FRACTION) > 0}; to_json_impl(klass, writer, more); - if (!JsonEncoderError::msg.empty()) { + if (!error_msg.empty()) { return {}; } if (!writer.is_complete()) { diff --git a/runtime-common/stdlib/vkext/string-processing.cpp b/runtime-common/stdlib/vkext/string-processing.cpp new file mode 100644 index 0000000000..bb9a471863 --- /dev/null +++ b/runtime-common/stdlib/vkext/string-processing.cpp @@ -0,0 +1,366 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-common/stdlib/vkext/string-processing.h" + +#include + +#include "common/unicode/utf8-utils.h" + +static int cmp_char(const void *a, const void *b) noexcept { + return (int)(*(char *)a) - (int)(*(char *)b); +} + +string sp_sort(const string &s) noexcept { + string t = s.copy_and_make_not_shared(); + qsort(t.buffer(), t.size(), sizeof(char), cmp_char); + + return t; +} + +string sp_to_upper(const string &s) noexcept { + string t{s.size(), false}; + for (size_t i = 0; i <= t.size(); i++) { + switch ((unsigned char)s[i]) { + case 'a' ... 'z': + t[i] = s[i] + 'A' - 'a'; + break; + case 0xE0 ... 0xFF: + t[i] = s[i] - 32; + break; + case 0x83: + t[i] = 0x81; + break; + case 0xA2: + t[i] = 0xA1; + break; + case 0xB8: + t[i] = 0xA8; + break; + case 0xBC: + t[i] = 0xA3; + break; + case 0xB4: + t[i] = 0xA5; + break; + case 0xB3: + case 0xBE: + t[i] = s[i] - 1; + break; + case 0x98: + case 0xA0: + case 0xAD: + t[i] = ' '; + break; + case 0x90: + case 0x9A: + case 0x9C ... 0x9F: + case 0xBA: + case 0xBF: + t[i] = s[i] - 16; + break; + default: + t[i] = s[i]; + } + } + + return t; +} + +static char to_lower(const char c) noexcept { + switch ((unsigned char)c) { + case 'A' ... 'Z': + case 0xC0 ... 0xDF: + return c + 'a' - 'A'; + case 0x81: + return 0x83; + case 0xA3: + return 0xBC; + case 0xA5: + return 0xB4; + case 0xA1: + case 0xB2: + case 0xBD: + return c + 1; + case 0x98: + case 0xA0: + case 0xAD: + return ' '; + case 0x80: + case 0x8A: + case 0x8C ... 0x8F: + case 0xA8: + case 0xAA: + case 0xAF: + return c + 16; + } + return c; +} + +string sp_to_lower(const string &s) noexcept { + string t{s.size(), false}; + for (size_t i = 0; i <= s.size(); i++) { + t[i] = to_lower(s[i]); + } + + return t; +} + +static char simplify(const char c) noexcept { + unsigned char cc = to_lower(c); + switch (cc) { + case '0' ... '9': + case 'a' ... 'z': + case 0xE0 ... 0xFF: + case 0: + return cc; + case 0x83: // CYRILLIC SMALL LETTER GJE + case 0xB4: // CYRILLIC SMALL LETTER GHE WITH UPTURN + return 0xE3; // CYRILLIC SMALL LETTER GHE + case 0xA2: // CYRILLIC SMALL LETTER SHORT U + return 0xF3; // CYRILLIC SMALL LETTER U + case 0xB8: // CYRILLIC SMALL LETTER IO + return 0xE5; // CYRILLIC SMALL LETTER IE + case 0xB3: // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + case 0xBF: // CYRILLIC SMALL LETTER YI + return 'i'; + case 0xBE: // CYRILLIC SMALL LETTER DZE + return 's'; + case 0x9A: // CYRILLIC SMALL LETTER LJE + return 0xEB; // CYRILLIC SMALL LETTER EL + case 0x9C: // CYRILLIC SMALL LETTER NJE + return 0xED; // CYRILLIC SMALL LETTER EN + case 0x9D: // CYRILLIC SMALL LETTER KJE + return 0xEA; // CYRILLIC SMALL LETTER KA + case 0x90: // CYRILLIC SMALL LETTER DJE + case 0x9E: // CYRILLIC SMALL LETTER TSHE + return 'h'; + case 0xBA: // CYRILLIC SMALL LETTER UKRAINIAN IE + return 0xFD; // CYRILLIC SMALL LETTER E + case 0xBC: // CYRILLIC SMALL LETTER JE + return 'j'; + case 0xA9: // COPYRIGHT SIGN + return 'c'; + case 0xAE: // REGISTERED SIGN + return 'r'; + case 0xB5: // MICRO SIGN + return 'm'; + } + return 0; +} + +string sp_simplify(const string &s) noexcept { + string t{s.size(), false}; + size_t nl = 0; + for (size_t i = 0; i < s.size(); i++) { + char c = simplify(s[i]); + if (c != 0) { + t[nl++] = c; + } + } + t.shrink(nl); + + return t; +} + +static char conv_letter(const char c) noexcept { + switch (c) { + case 'a': + return 'à'; + case 'b': + return 'â'; + case 'c': + return 'ñ'; + case 'd': + return 'd'; + case 'e': + return 'å'; + case 'f': + return 'f'; + case 'g': + return 'g'; + case 'h': + return 'í'; + case 'i': + return 'i'; + case 'j': + return 'j'; + case 'k': + return 'ê'; + case 'l': + return 'l'; + case 'm': + return 'ò'; + case 'n': + return 'ï'; + case 'o': + return 'î'; + case 'p': + return 'ð'; + case 'q': + return 'q'; + case 'r': + return 'r'; + case 's': + return 's'; + case 't': + return 'ò'; + case 'u': + return 'è'; + case 'v': + return 'v'; + case 'w': + return 'w'; + case 'x': + return 'õ'; + case 'y': + return 'ó'; + case 'z': + return 'z'; + case 'ì': + return 'ò'; + case 'ú': + case 'ü': + return 'â'; + case 'à' ... 'ë': + case 'í' ... 'ù': + case 'û': + case 'ý' ... 'ÿ': + return c; + case '0': + return 'î'; + case '3': + return 'ç'; + case '4': + return '÷'; + case '6': + return 'á'; + case '1' ... '2': + case '5': + case '7' ... '9': + return c; + default: + return c; + } +} + +static char next_character(const char *s, size_t *_i) noexcept { + int i = *_i; + char cur = s[i]; + if (cur == '&') { + if (s[i + 1] == 'a' && s[i + 2] == 'm' && s[i + 3] == 'p' && s[i + 4] == ';') { + i += 4; + } else if (s[i + 1] == '#') { + int r = 0, ti = i; + for (i += 2; '0' <= s[i] && s[i] <= '9'; i++) { + r = r * 10 + s[i] - '0'; + } + if (s[i] == ';') { + int c = simplify_character(r); + if (c <= 255) { + cur = c; + } else { + cur = 0; + } + } else { + i = ti; + } + } else if (s[i + 1] == 'l' && s[i + 2] == 't' && s[i + 3] == ';') { + i += 3, cur = '<'; + } else if (s[i + 1] == 'g' && s[i + 2] == 't' && s[i + 3] == ';') { + i += 3, cur = '>'; + } else if (s[i + 1] == 'q' && s[i + 2] == 'u' && s[i + 3] == 'o' && s[i + 4] == 't' && s[i + 5] == ';') { + i += 5, cur = '"'; + } + } else if (cur == '<') { + if (s[i + 1] == 'b' && s[i + 2] == 'r' && s[i + 3] == '>') { + i += 3, cur = '\n'; + } + } + *_i = i; + + return cur; +} + +string sp_full_simplify(const string &s) noexcept { + string t{s.size(), false}; + + size_t nl = 0; + for (size_t i = 0; i < s.size(); i++) { + char c = next_character(s.c_str(), &i); + c = simplify(c); + if (c != 0) { + t[nl++] = conv_letter(c); + } + } + t.shrink(nl); + + return t; +} + +string sp_deunicode(const string &s) noexcept { + string t{s.size(), false}; + + size_t nl = 0; + for (size_t i = 0; i < s.size(); i++) { + char c = next_character(s.c_str(), &i); + if (c != 0) { + t[nl++] = c; + } + } + t.shrink(nl); + + return t; +} + +string sp_remove_repeats(const string &s) noexcept { + string t{s.size(), false}; + + size_t nl = 0; + for (size_t i = 0; i < s.size(); i++) { + if (i == 0 || s[i] != s[i - 1]) { + t[nl++] = s[i]; + } + } + t.shrink(nl); + + return t; +} + +string sp_to_cyrillic(const string &s) noexcept { + string t{s.size(), false}; + + for (size_t i = 0; i <= s.size(); i++) { + char c = simplify(s[i]); + if (c == 0) { + t[i] = s[i]; + } else { + t[i] = conv_letter(c); + } + } + + return t; +} + +string sp_words_only(const string &s) noexcept { + string t{2 + s.size(), false}; + + t[0] = ' '; + size_t nl = 1; + for (size_t i = 0; i < s.size(); i++) { + char c = simplify(s[i]); + if (c != 0) { + t[nl++] = c; + } else if (t[nl - 1] != ' ') { + t[nl++] = ' '; + } + } + if (t[nl - 1] != ' ') { + t[nl++] = ' '; + } + t[nl] = 0; + t.shrink(nl); + + return t; +} diff --git a/runtime-common/stdlib/vkext/string-processing.h b/runtime-common/stdlib/vkext/string-processing.h new file mode 100644 index 0000000000..70d94596a0 --- /dev/null +++ b/runtime-common/stdlib/vkext/string-processing.h @@ -0,0 +1,36 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include "runtime-common/core/runtime-core.h" + +// Returns sorted s string. +string sp_sort(const string &s) noexcept; + +// Returns upper/lower case string for s in cp1251. +string sp_to_upper(const string &s) noexcept; +string sp_to_lower(const string &s) noexcept; + +/** + * Simplifications: look to source code to see full list of replacements. + */ + +// Returns simplified s. +// Deletes all except digits, latin and russian letters in cp1251, lowercase letters. +string sp_simplify(const string &s) noexcept; + +// Returns ultra-simplified s. +// Recognizes unicode characters encoded in cp1251 and html-entities. Remove diacritics +// from unicode characters, delete all except digits, latin and russian letters, lowercase +// letters. Unifies similar russian and english characters (i.e. ('n'|'п') --> 'п') +string sp_full_simplify(const string &s) noexcept; + +// Converts all unicode characters encoded in cp1251 and html-entities into real cp1251, +// removing diacritics if possible. If converting is impossible - removes such characters. +string sp_deunicode(const string &s) noexcept; + +string sp_remove_repeats(const string &s) noexcept; +string sp_to_cyrillic(const string &s) noexcept; +string sp_words_only(const string &s) noexcept; diff --git a/runtime-common/stdlib/vkext/vkext.cpp b/runtime-common/stdlib/vkext/vkext.cpp new file mode 100644 index 0000000000..436c93643a --- /dev/null +++ b/runtime-common/stdlib/vkext/vkext.cpp @@ -0,0 +1,524 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-common/stdlib/vkext/vkext.h" + +#include + +#include "common/wrappers/string_view.h" +#include "flex/flex.h" +#include "runtime-common/stdlib/vkext/string-processing.h" + +constexpr int utf8_to_win_convert_0x400[256] = + {-1, 0xa8, 0x80, 0x81, 0xaa, 0xbd, 0xb2, 0xaf, 0xa3, 0x8a, 0x8c, 0x8e, 0x8d, -1, 0xa1, 0x8f, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, + 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, + 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, + 0xfe, 0xff, -1, 0xb8, 0x90, 0x83, 0xba, 0xbe, 0xb3, 0xbf, 0xbc, 0x9a, 0x9c, 0x9e, 0x9d, -1, 0xa2, 0x9f, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xa5, 0xb4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +constexpr int utf8_to_win_convert_0x2000[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x96, + 0x97, -1, -1, -1, 0x91, 0x92, 0x82, -1, 0x93, 0x94, 0x84, -1, 0x86, 0x87, 0x95, -1, -1, -1, 0x85, -1, + -1, -1, -1, 0xda, 0xda, -1, 0xda, -1, 0x89, -1, -1, -1, -1, -1, -1, -1, -1, 0x8b, 0x9b, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x88, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +constexpr int utf8_to_win_convert_0xff00[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x20, -1, -1}; +constexpr int utf8_to_win_convert_0x2100[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xb9, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0x99, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +constexpr int utf8_to_win_convert_0x000[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0xa0, -1, -1, -1, 0xa4, -1, 0xa6, 0xa7, -1, 0xa9, -1, 0xab, 0xac, 0xad, 0xae, -1, 0xb0, 0xb1, -1, -1, + -1, 0xb5, 0xb6, 0xb7, -1, -1, -1, 0xbb, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; +constexpr int win_to_utf8_convert[256] = + {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, + 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x402, 0x403, 0x201a, 0x453, 0x201e, 0x2026, 0x2020, 0x2021, 0x20ac, 0x2030, 0x409, 0x2039, + 0x40a, 0x40c, 0x40b, 0x40f, 0x452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0, 0x2122, 0x459, 0x203a, 0x45a, 0x45c, 0x45b, 0x45f, + 0xa0, 0x40e, 0x45e, 0x408, 0xa4, 0x490, 0xa6, 0xa7, 0x401, 0xa9, 0x404, 0xab, 0xac, 0xad, 0xae, 0x407, 0xb0, 0xb1, 0x406, 0x456, + 0x491, 0xb5, 0xb6, 0xb7, 0x451, 0x2116, 0x454, 0xbb, 0x458, 0x405, 0x455, 0x457, 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, + 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, 0x420, 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42a, 0x42b, + 0x42c, 0x42d, 0x42e, 0x42f, 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, + 0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f}; + +inline int utf8_to_win_char(int c) { + if (c < 0x80) { + return c; + } + switch (c & ~0xff) { + case 0x0400: + return utf8_to_win_convert_0x400[c & 0xff]; + case 0x2000: + return utf8_to_win_convert_0x2000[c & 0xff]; + case 0xff00: + return utf8_to_win_convert_0xff00[c & 0xff]; + case 0x2100: + return utf8_to_win_convert_0x2100[c & 0xff]; + case 0x0000: + return utf8_to_win_convert_0x000[c & 0xff]; + } + return -1; +} + +struct buffered_write { + string result; + + void write_buff(const char *s, size_t l) { + result.append(s, l); + } + + void write_buff_set_pos(size_t pos) { + result.shrink(pos); + } + + void write_buff_char_pos(char c, size_t pos) { + if (pos < result.size()) { + result[pos] = c; + return; + } + } + + void write_buff_char(char c) { + result.push_back(c); + } + + void write_buff_char_2(char c1, char c2) { + result.push_back(c1); + result.push_back(c2); + } + + void write_buff_char_3(char c1, char c2, char c3) { + result.push_back(c1); + result.push_back(c2); + result.push_back(c3); + } + + void write_buff_char_4(char c1, char c2, char c3, char c4) { + result.push_back(c1); + result.push_back(c2); + result.push_back(c3); + result.push_back(c4); + } + + void write_buff_char_5(char c1, char c2, char c3, char c4, char c5) { + result.push_back(c1); + result.push_back(c2); + result.push_back(c3); + result.push_back(c4); + result.push_back(c5); + } + + void write_buff_char_6(char c1, char c2, char c3, char c4, char c5, char c6) { + result.push_back(c1); + result.push_back(c2); + result.push_back(c3); + result.push_back(c4); + result.push_back(c5); + result.push_back(c6); + } + + void write_buff_int(int x) { + char buff[25]; + auto len = snprintf(buff, 25, "%d", x); + result.append(buff, len); + } + + int utf8_to_win(const char *s, int len, int64_t max_len, bool exit_on_error) { + int st = 0; + int acc = 0; + // if (max_len && len > 3 * max_len) { + // len = 3 * max_len; + // } + for (int i = 0; i < len; i++) { + if (max_len && result.size() >= max_len) { + break; + } + int c = static_cast(s[i]); + if (c < 0x80) { + if (st) { + if (exit_on_error) { + return -1; + } + write_buff("?1?", 3); + } + write_buff_char(static_cast(c)); + st = 0; + } else if ((c & 0xc0) == 0x80) { + if (!st) { + if (exit_on_error) { + return -1; + } + write_buff("?2?", 3); + continue; + } + acc <<= 6; + acc += c - 0x80; + if (!--st) { + if (acc < 0x80) { + if (exit_on_error) { + return -1; + } + write_buff("?3?", 3); + } else { + int d = utf8_to_win_char(acc); + if (d != -1 && d) { + write_buff_char(static_cast(d)); + } else { + write_buff_char_2('&', '#'); + write_buff_int(acc); + write_buff_char(';'); + } + } + } + } else { // if ((c & 0xc0) == 0xc0) + if (st) { + if (exit_on_error) { + return -1; + } + write_buff("?4?", 3); + } + c -= 0xc0; + st = 0; + if (c < 32) { + acc = c; + st = 1; + } else if (c < 48) { + acc = c - 32; + st = 2; + } else if (c < 56) { + acc = c - 48; + st = 3; + } else { + if (exit_on_error) { + return -1; + } + write_buff("?5?", 3); + } + } + } + if (st) { + if (exit_on_error) { + return -1; + } + write_buff("?6?", 3); + } + return 1; + } + + void write_char_utf8(int64_t c) { + if (!c) { + return; + } + if (c < 128) { + write_buff_char(static_cast(c)); + return; + } + // 2 bytes(11): 110x xxxx 10xx xxxx + if (c < 0x800) { + write_buff_char_2(static_cast(0xC0 + (c >> 6)), static_cast(0x80 + (c & 63))); + return; + } + + // 3 bytes(16): 1110 xxxx 10xx xxxx 10xx xxxx + if (c < 0x10000) { + write_buff_char_3(static_cast(0xE0 + (c >> 12)), static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + + // 4 bytes(21): 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x200000) { + write_buff_char_4(static_cast(0xF0 + (c >> 18)), static_cast(0x80 + ((c >> 12) & 63)), static_cast(0x80 + ((c >> 6) & 63)), + static_cast(0x80 + (c & 63))); + return; + } + + // 5 bytes(26): 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x4000000) { + write_buff_char_5(static_cast(0xF8 + (c >> 24)), static_cast(0x80 + ((c >> 18) & 63)), static_cast(0x80 + ((c >> 12) & 63)), + static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + + // 6 bytes(31): 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x80000000) { + write_buff_char_6(static_cast(0xFC + (c >> 30)), static_cast(0x80 + ((c >> 24) & 63)), static_cast(0x80 + ((c >> 18) & 63)), + static_cast(0x80 + ((c >> 12) & 63)), static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + + write_buff_char_2('$', '#'); + write_buff_int(c); + write_buff_char(';'); + } + + void write_char_utf8_no_escape(int64_t c) { + if (!c) { + return; + } + if (c < 128) { + write_buff_char(static_cast(c)); + return; + } + // 2 bytes(11): 110x xxxx 10xx xxxx + if (c < 0x800) { + write_buff_char_2(static_cast(0xC0 + (c >> 6)), static_cast(0x80 + (c & 63))); + return; + } + + // 3 bytes(16): 1110 xxxx 10xx xxxx 10xx xxxx + if (c < 0x10000) { + write_buff_char_3(static_cast(0xE0 + (c >> 12)), static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + + // 4 bytes(21): 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x200000) { + write_buff_char_4(static_cast(0xF0 + (c >> 18)), static_cast(0x80 + ((c >> 12) & 63)), static_cast(0x80 + ((c >> 6) & 63)), + static_cast(0x80 + (c & 63))); + return; + } + + // 5 bytes(26): 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x4000000) { + write_buff_char_5(static_cast(0xF8 + (c >> 24)), static_cast(0x80 + ((c >> 18) & 63)), static_cast(0x80 + ((c >> 12) & 63)), + static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + + // 6 bytes(31): 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx + if (c < 0x80000000) { + write_buff_char_6(static_cast(0xFC + (c >> 30)), static_cast(0x80 + ((c >> 24) & 63)), static_cast(0x80 + ((c >> 18) & 63)), + static_cast(0x80 + ((c >> 12) & 63)), static_cast(0x80 + ((c >> 6) & 63)), static_cast(0x80 + (c & 63))); + return; + } + } + + void win_to_utf8(const char *s, int len, bool escape) { + int state = 0; + int save_pos = -1; + int64_t cur_num = 0; + for (int i = 0; i < len; i++) { + if (state == 0 && s[i] == '&') { + save_pos = result.size(); + cur_num = 0; + state++; + } else if (state == 1 && s[i] == '#') { + state++; + } else if (state == 2 && s[i] >= '0' && s[i] <= '9') { + if (cur_num < 0x80000000) { + cur_num = s[i] - '0' + cur_num * 10; + } + } else if (state == 2 && s[i] == ';') { + state++; + } else { + state = 0; + } + if (state == 3 && 0xd800 <= cur_num && cur_num <= 0xdfff) { + cur_num = 32; + } + if (state == 3 + && (!escape + || (cur_num >= 32 && cur_num != 33 && cur_num != 34 && cur_num != 36 && cur_num != 39 && cur_num != 60 && cur_num != 62 && cur_num != 92 + && cur_num != 8232 && cur_num != 8233 && cur_num < 0x80000000))) { + write_buff_set_pos(save_pos); + php_assert(save_pos == result.size()); + if (escape) { + write_char_utf8(cur_num); + } else { + write_char_utf8_no_escape(cur_num); + }; + } else if (state == 3 && cur_num >= 0x80000000) { + write_char_utf8(win_to_utf8_convert[static_cast(s[i])]); + write_buff_char_pos('$', save_pos); + } else { + write_char_utf8(win_to_utf8_convert[static_cast(s[i])]); + } + if (state == 3) { + state = 0; + } + } + } +}; + +constexpr char ws[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + +static inline bool is_html_opt_symb(char c) { + return (c == '<' || c == '>' || c == '(' || c == ')' || c == '{' || c == '}' || c == '/' || c == '"' || c == ':' || c == ',' || c == ';'); +} + +static inline bool is_space(char c) { + return ws[static_cast(c)]; +} + +static inline bool is_linebreak(char c) { + return c == '\n'; +} + +static inline bool is_pre_tag(const char *s) { + if (s[0] == '<') { + if (s[1] == 'p') { + return s[2] == 'r' && s[3] == 'e' && s[4] == '>'; + } else if (s[1] == 'c') { + return s[2] == 'o' && s[3] == 'd' && s[4] == 'e' && s[5] == '>'; + } else if (s[1] == '/') { + if (s[2] == 'p') { + return -(s[3] == 'r' && s[4] == 'e' && s[5] == '>'); + } else if (s[2] == 'c') { + return -(s[3] == 'o' && s[4] == 'd' && s[5] == 'e' && s[6] == '>'); + } + } + } + return false; +} + +string f$vk_utf8_to_win(const string &text, int64_t max_len, bool exit_on_error) noexcept { + buffered_write buffered{}; + int r = buffered.utf8_to_win(text.c_str(), text.size(), max_len, exit_on_error); + if (r >= 0) { + return buffered.result; + } else { + if (!max_len || text.size() <= static_cast(max_len)) { + return text; + } + return {text.c_str(), static_cast(max_len)}; + } +} + +string f$vk_win_to_utf8(const string &text, bool escape) noexcept { + buffered_write buffered{}; + buffered.win_to_utf8(text.c_str(), text.size(), escape); + return buffered.result; +} + +string f$vk_flex(const string &name, const string &case_name, int64_t sex, const string &type, int64_t lang_id) noexcept { + constexpr size_t BUFF_LEN = (1 << 16); + string buff{BUFF_LEN, false}; + const size_t error_msg_buf_size = 1000; + char ERROR_MSG_BUF[error_msg_buf_size] = {'\0'}; + ERROR_MSG_BUF[0] = '\0'; + vk::string_view ref = flex(vk::string_view{name.c_str(), name.size()}, vk::string_view{case_name.c_str(), case_name.size()}, sex == 1, + vk::string_view{type.c_str(), type.size()}, lang_id, buff.buffer(), ERROR_MSG_BUF, error_msg_buf_size); + if (ERROR_MSG_BUF[0] != '\0') { + php_warning("%s", ERROR_MSG_BUF); + } + buff.shrink(ref.size()); + return buff; +} + +string f$vk_whitespace_pack(const string &str, bool html_opt) noexcept { + const char *text = str.c_str(); + int level = 0; + const char *ctext = text; + const char *start = text; + + buffered_write buffered{}; + while (*text) { + if (is_space(*text) && !level) { + int linebreak = 0; + while (is_space(*text)) { + if (is_linebreak(*text)) { + linebreak = 1; + } + text++; + } + if (!html_opt || (ctext != start && !is_html_opt_symb(ctext[-1]) && *text && !is_html_opt_symb(*text))) { + buffered.write_buff_char(linebreak ? '\n' : ' '); + } + } else { + while (true) { + while ((level || !is_space(*text)) && *text) { + level += is_pre_tag(text); + if (level < 0) { + level = 1000000000; + } + text++; + } + if (!html_opt && *text && !is_space(text[1])) { + text++; + } else { + break; + } + } + buffered.write_buff(ctext, text - ctext); + } + ctext = text; + } + return buffered.result; +} + +string f$vk_sp_simplify(const string &s) noexcept { + return sp_simplify(s); +} + +string f$vk_sp_full_simplify(const string &s) noexcept { + return sp_full_simplify(s); +} + +string f$vk_sp_deunicode(const string &s) noexcept { + return sp_deunicode(s); +} + +string f$vk_sp_to_upper(const string &s) noexcept { + return sp_to_upper(s); +} + +string f$vk_sp_to_lower(const string &s) noexcept { + return sp_to_lower(s); +} + +string f$vk_sp_to_sort(const string &s) noexcept { + return sp_sort(s); +} + +string f$vk_sp_remove_repeats(const string &s) noexcept { + return sp_remove_repeats(s); +} + +string f$vk_sp_to_cyrillic(const string &s) noexcept { + return sp_to_cyrillic(s); +} + +string f$vk_sp_words_only(const string &s) noexcept { + return sp_words_only(s); +} diff --git a/runtime-common/stdlib/vkext/vkext.h b/runtime-common/stdlib/vkext/vkext.h new file mode 100644 index 0000000000..3cb569bb74 --- /dev/null +++ b/runtime-common/stdlib/vkext/vkext.h @@ -0,0 +1,37 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include "runtime-common/core/runtime-core.h" + +string f$vk_utf8_to_win(const string &text, int64_t max_len = 0, bool exit_on_error = false) noexcept; + +string f$vk_win_to_utf8(const string &text, bool escape = true) noexcept; + +string f$vk_flex(const string &name, const string &case_name, int64_t sex, const string &type, int64_t lang_id = 0) noexcept; + +string f$vk_whitespace_pack(const string &str, bool html_opt = false) noexcept; + +string f$vk_sp_simplify(const string &s) noexcept; + +string f$vk_sp_full_simplify(const string &s) noexcept; + +string f$vk_sp_deunicode(const string &s) noexcept; + +string f$vk_sp_to_upper(const string &s) noexcept; + +string f$vk_sp_to_lower(const string &s) noexcept; + +string f$vk_sp_to_sort(const string &s) noexcept; + +string f$vk_sp_remove_repeats(const string &s) noexcept; + +string f$vk_sp_to_cyrillic(const string &s) noexcept; + +string f$vk_sp_words_only(const string &s) noexcept; + +inline string f$cp1251(const string &utf8_string) noexcept { + return f$vk_utf8_to_win(utf8_string); +} diff --git a/runtime/vkext_stats.cpp b/runtime-common/stdlib/vkext/vkext_stats.cpp similarity index 67% rename from runtime/vkext_stats.cpp rename to runtime-common/stdlib/vkext/vkext_stats.cpp index 9d38dd0984..4c018c33ff 100644 --- a/runtime/vkext_stats.cpp +++ b/runtime-common/stdlib/vkext/vkext_stats.cpp @@ -2,37 +2,39 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime/vkext_stats.h" +#include "runtime-common/stdlib/vkext/vkext_stats.h" -#include -#include -#include +#include +#include -#define HLL_FIRST_RANK_CHAR 0x30 -#define HLL_PACK_CHAR '!' -#define HLL_PACK_CHAR_V2 '$' -#define TO_HALF_BYTE(c) ((int)(((c > '9') ? (c - 7) : c) - '0')) -#define MAX_HLL_SIZE (1 << 14) -#define HLL_BUF_SIZE (MAX_HLL_SIZE + 1000) +#include "runtime-common/core/runtime-core.h" -static char hll_buf[HLL_BUF_SIZE]; +constexpr char HLL_FIRST_RANK_CHAR = 0x30; +constexpr char HLL_PACK_CHAR = '!'; +constexpr char HLL_PACK_CHAR_V2 = '$'; +constexpr size_t MAX_HLL_SIZE = 1 << 14; +constexpr size_t HLL_BUF_SIZE = MAX_HLL_SIZE + 1000; + +static constexpr char to_half_byte(char c) noexcept { + return ((c > '9') ? (c - 7) : c) - '0'; +} ////// -// hll fuctions +// hll functions ////// -static bool is_hll_unpacked(const string &hll) { +static bool is_hll_unpacked(const string &hll) noexcept { return hll.empty() || (hll[0] != HLL_PACK_CHAR && hll[0] != HLL_PACK_CHAR_V2); } -static int get_hll_size(const string &hll) { +static int get_hll_size(const string &hll) noexcept { if (is_hll_unpacked(hll)) { return hll.size(); } return hll[0] == HLL_PACK_CHAR ? (1 << 8) : (1 << (hll[1] - '0')); } -Optional f$vk_stats_hll_merge(const array &a) { +Optional f$vk_stats_hll_merge(const array &a) noexcept { string result; char *result_buff = nullptr; int result_len = -1; @@ -61,7 +63,7 @@ Optional f$vk_stats_hll_merge(const array &a) { while (i + 2 < cur.size()) { int p; if (cur[0] == HLL_PACK_CHAR) { - p = (TO_HALF_BYTE(cur[i]) << 4) + TO_HALF_BYTE(cur[i + 1]); + p = (to_half_byte(cur[i]) << 4) + to_half_byte(cur[i + 1]); } else { p = (((int)cur[i] - 1) & 0x7f) + (((int)cur[i + 1] - 1) << 7); } @@ -78,7 +80,7 @@ Optional f$vk_stats_hll_merge(const array &a) { return result; } -static int unpack_hll(const string &hll, char *res) { +static int unpack_hll(const string &hll, char *res) noexcept { assert(!is_hll_unpacked(hll)); int m = get_hll_size(hll); int pos = 1 + (hll[0] == HLL_PACK_CHAR_V2); @@ -86,7 +88,7 @@ static int unpack_hll(const string &hll, char *res) { while (pos + 2 < hll.size()) { int p; if (hll[0] == HLL_PACK_CHAR) { - p = (TO_HALF_BYTE(hll[pos]) << 4) + TO_HALF_BYTE(hll[pos + 1]); + p = (to_half_byte(hll[pos]) << 4) + to_half_byte(hll[pos + 1]); } else { p = (((int)hll[pos] - 1) & 0x7f) + (((int)hll[pos + 1] - 1) << 7); } @@ -104,11 +106,14 @@ static int unpack_hll(const string &hll, char *res) { return m; } - -static Optional hll_count(const string &hll, int m) { +static Optional hll_count(const string &hll, int m) noexcept { double pow_2_32 = (1LL << 32); double alpha_m = 0.7213 / (1.0 + 1.079 / m); char const *s; + + RuntimeContext::get().static_SB.clean().reserve(HLL_BUF_SIZE); + char *hll_buf = RuntimeContext::get().static_SB.buffer(); + if (!is_hll_unpacked(hll)) { if (unpack_hll(hll, hll_buf) != m) { php_warning("Bad HLL string"); @@ -134,11 +139,7 @@ static Optional hll_count(const string &hll, int m) { } } else if (m == (1 << 14)) { if (e < 72000) { - double bias = 5.9119 * 1.0e-18 * (e * e * e * e) - - 1.4253 * 1.0e-12 * (e * e * e) + - 1.2940 * 1.0e-7 * (e * e) - - 5.2921 * 1.0e-3 * e + - 83.3216; + double bias = 5.9119 * 1.0e-18 * (e * e * e * e) - 1.4253 * 1.0e-12 * (e * e * e) + 1.2940 * 1.0e-7 * (e * e) - 5.2921 * 1.0e-3 * e + 83.3216; e -= e * (bias / 100.0); } } else { @@ -152,8 +153,8 @@ static Optional hll_count(const string &hll, int m) { * Do not change implementation of this hash function, because hashes may be saved in a permanent storage. * A full copy of the same function exists in vkext-stats.c in vkext. */ -static long long dl_murmur64a_hash (const void *data, size_t len) { - assert ((len & 7) == 0); +static long long dl_murmur64a_hash(const void *data, size_t len) noexcept { + assert((len & 7) == 0); unsigned long long m = 0xc6a4a7935bd1e995; int r = 47; unsigned long long h = 0xcafebabeull ^ (m * len); @@ -173,14 +174,21 @@ static long long dl_murmur64a_hash (const void *data, size_t len) { start = (const unsigned char *)data; - switch(len & 7) { - case 7: h ^= (unsigned long long)start[6] << 48; /* fallthrough */ - case 6: h ^= (unsigned long long)start[5] << 40; /* fallthrough */ - case 5: h ^= (unsigned long long)start[4] << 32; /* fallthrough */ - case 4: h ^= (unsigned long long)start[3] << 24; /* fallthrough */ - case 3: h ^= (unsigned long long)start[2] << 16; /* fallthrough */ - case 2: h ^= (unsigned long long)start[1] << 8; /* fallthrough */ - case 1: h ^= (unsigned long long)start[0]; + switch (len & 7) { + case 7: + h ^= (unsigned long long)start[6] << 48; /* fallthrough */ + case 6: + h ^= (unsigned long long)start[5] << 40; /* fallthrough */ + case 5: + h ^= (unsigned long long)start[4] << 32; /* fallthrough */ + case 4: + h ^= (unsigned long long)start[3] << 24; /* fallthrough */ + case 3: + h ^= (unsigned long long)start[2] << 16; /* fallthrough */ + case 2: + h ^= (unsigned long long)start[1] << 8; /* fallthrough */ + case 1: + h ^= (unsigned long long)start[0]; h *= m; }; @@ -190,17 +198,17 @@ static long long dl_murmur64a_hash (const void *data, size_t len) { return h; } -static void hll_add_shifted (unsigned char *hll, int hll_size, long long value) { - unsigned long long hash = dl_murmur64a_hash (&(value), sizeof (long long)); +static void hll_add_shifted(unsigned char *hll, int hll_size, long long value) noexcept { + unsigned long long hash = dl_murmur64a_hash(&(value), sizeof(long long)); unsigned int idx = hash >> (64LL - hll_size); - unsigned char rank = (hash == 0) ? 0 : (unsigned char)fmin (__builtin_ctzll (hash) + 1, 64 - hll_size); + unsigned char rank = (hash == 0) ? 0 : (unsigned char)fmin(__builtin_ctzll(hash) + 1, 64 - hll_size); rank += HLL_FIRST_RANK_CHAR; if (hll[idx] < rank) { hll[idx] = rank; } } -Optional f$vk_stats_hll_add(const string &hll, const array &a) { +Optional f$vk_stats_hll_add(const string &hll, const array &a) noexcept { if (!is_hll_unpacked(hll)) { return false; } @@ -208,6 +216,10 @@ Optional f$vk_stats_hll_add(const string &hll, const array &a) { return false; } int hll_size = __builtin_ctz(get_hll_size(hll)); + + RuntimeContext::get().static_SB.clean().reserve(HLL_BUF_SIZE); + char *hll_buf = RuntimeContext::get().static_SB.buffer(); + memcpy(hll_buf, hll.c_str(), hll.size()); for (array::const_iterator it = a.begin(); it != a.end(); ++it) { hll_add_shifted((unsigned char *)hll_buf, hll_size, it.get_value().to_int()); @@ -215,14 +227,14 @@ Optional f$vk_stats_hll_add(const string &hll, const array &a) { return string(hll_buf, hll.size()); } -Optional f$vk_stats_hll_create(const array &a, int64_t size) { +Optional f$vk_stats_hll_create(const array &a, int64_t size) noexcept { if (size != (1 << 8) && size != (1 << 14)) { return false; } return f$vk_stats_hll_add(string((string::size_type)size, (char)HLL_FIRST_RANK_CHAR), a); } -Optional f$vk_stats_hll_count(const string &hll) { +Optional f$vk_stats_hll_count(const string &hll) noexcept { int size = get_hll_size(hll); if (size == (1 << 8) || size == (1 << 14)) { return hll_count(hll, size); @@ -232,7 +244,7 @@ Optional f$vk_stats_hll_count(const string &hll) { } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copypaste from common/statistics.c -string hll_pack(const string &s, int len) { +string hll_pack(const string &s, int len) noexcept { if (len > MAX_HLL_SIZE || len == 0 || s[0] == HLL_PACK_CHAR || s[0] == HLL_PACK_CHAR_V2) { return s; } @@ -240,7 +252,7 @@ string hll_pack(const string &s, int len) { int p = 0; buf[p++] = HLL_PACK_CHAR_V2; buf[p++] = (unsigned char)('0' + (unsigned char)(__builtin_ctz(len))); - assert (__builtin_popcount(len) == 1); + assert(__builtin_popcount(len) == 1); for (int i = 0; i < len; i++) { if (s[i] > HLL_FIRST_RANK_CHAR) { if (p + 2 >= len) { @@ -250,20 +262,20 @@ string hll_pack(const string &s, int len) { buf[p++] = (unsigned char)((i >> 7) + 1); buf[p++] = (unsigned char)s[i]; } - assert (p < HLL_BUF_SIZE); + assert(p < HLL_BUF_SIZE); } - return {(char*) buf, static_cast(p)}; + return {(char *)buf, static_cast(p)}; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Optional f$vk_stats_hll_pack(const string &hll) { +Optional f$vk_stats_hll_pack(const string &hll) noexcept { if (!is_hll_unpacked(hll)) { return false; } return hll_pack(hll, hll.size()); } -Optional f$vk_stats_hll_unpack(const string &hll) { +Optional f$vk_stats_hll_unpack(const string &hll) noexcept { if (is_hll_unpacked(hll)) { return false; } @@ -275,6 +287,6 @@ Optional f$vk_stats_hll_unpack(const string &hll) { return string(res, m); } -bool f$vk_stats_hll_is_packed(const string &hll) { +bool f$vk_stats_hll_is_packed(const string &hll) noexcept { return !is_hll_unpacked(hll); } diff --git a/runtime-common/stdlib/vkext/vkext_stats.h b/runtime-common/stdlib/vkext/vkext_stats.h new file mode 100644 index 0000000000..cd9de1bfce --- /dev/null +++ b/runtime-common/stdlib/vkext/vkext_stats.h @@ -0,0 +1,15 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2020 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include "runtime-common/core/runtime-core.h" + +Optional f$vk_stats_hll_merge(const array &a) noexcept; +Optional f$vk_stats_hll_count(const string &hll) noexcept; +Optional f$vk_stats_hll_create(const array &a = array(), int64_t size = (1 << 8)) noexcept; +Optional f$vk_stats_hll_add(const string &hll, const array &a) noexcept; +Optional f$vk_stats_hll_pack(const string &hll) noexcept; +Optional f$vk_stats_hll_unpack(const string &hll) noexcept; +bool f$vk_stats_hll_is_packed(const string &hll) noexcept; diff --git a/runtime-light/runtime-light.cmake b/runtime-light/runtime-light.cmake index 2b00ac5a36..0faac07339 100644 --- a/runtime-light/runtime-light.cmake +++ b/runtime-light/runtime-light.cmake @@ -51,8 +51,8 @@ target_compile_options( target_link_options(runtime-light PUBLIC -stdlib=libc++ -static-libstdc++) # add statically linking libraries set_property(TARGET runtime-light PROPERTY RUNTIME_LINK_LIBS - "${ZLIB_LIB_DIR}/libz.a") - + "${ZLIB_LIB_DIR}/libz.a" + "${OBJS_DIR}/flex/libvk-flex-data.a") if(APPLE) target_link_options(runtime-light PUBLIC -undefined dynamic_lookup) else() diff --git a/runtime-light/state/component-state.cpp b/runtime-light/state/component-state.cpp index 924fd6cbd7..b97d8d3c68 100644 --- a/runtime-light/state/component-state.cpp +++ b/runtime-light/state/component-state.cpp @@ -11,8 +11,8 @@ #include "common/php-functions.h" #include "runtime-common/core/runtime-core.h" #include "runtime-common/core/utils/kphp-assert-core.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime-light/k2-platform/k2-api.h" -#include "runtime-light/utils/json-functions.h" void ComponentState::parse_ini_arg(std::string_view key_view, std::string_view value_view) noexcept { if (key_view.size() <= INI_ARG_PREFIX.size()) [[unlikely]] { diff --git a/runtime-light/stdlib/confdata/confdata-functions.cpp b/runtime-light/stdlib/confdata/confdata-functions.cpp index aa2666c2c6..85e7e4cc49 100644 --- a/runtime-light/stdlib/confdata/confdata-functions.cpp +++ b/runtime-light/stdlib/confdata/confdata-functions.cpp @@ -11,13 +11,13 @@ #include "runtime-common/core/runtime-core.h" #include "runtime-common/core/utils/kphp-assert-core.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime-light/coroutine/task.h" #include "runtime-light/state/instance-state.h" #include "runtime-light/stdlib/component/component-api.h" #include "runtime-light/tl/tl-core.h" #include "runtime-light/tl/tl-functions.h" #include "runtime-light/tl/tl-types.h" -#include "runtime-light/utils/json-functions.h" namespace { diff --git a/runtime-light/stdlib/string/json-functions.h b/runtime-light/stdlib/string/json-functions.h new file mode 100644 index 0000000000..9df7a3e45c --- /dev/null +++ b/runtime-light/stdlib/string/json-functions.h @@ -0,0 +1 @@ +// todo:k2 implement string f$vk_json_encode_safe(const T &v, bool simple_encode = true) noexcept diff --git a/runtime-light/utils/utils.cmake b/runtime-light/utils/utils.cmake index b8e422732b..25d6c58900 100644 --- a/runtime-light/utils/utils.cmake +++ b/runtime-light/utils/utils.cmake @@ -1,2 +1 @@ -prepend(RUNTIME_LIGHT_UTILS_SRC utils/ panic.cpp php_assert.cpp - json-functions.cpp) +prepend(RUNTIME_LIGHT_UTILS_SRC utils/ panic.cpp php_assert.cpp) diff --git a/runtime/from-json-processor.cpp b/runtime/from-json-processor.cpp deleted file mode 100644 index 22c45df0f4..0000000000 --- a/runtime/from-json-processor.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2022 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include "runtime/from-json-processor.h" - -string JsonEncoderError::msg; - -string f$JsonEncoder$$getLastError() noexcept { - return JsonEncoderError::msg; -} diff --git a/runtime/json-functions.cpp b/runtime/json-functions.cpp deleted file mode 100644 index b4584092d6..0000000000 --- a/runtime/json-functions.cpp +++ /dev/null @@ -1,583 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include "runtime/json-functions.h" - -#include "common/algorithms/find.h" -#include "runtime-common/stdlib/string/string-functions.h" - -// note: json-functions.cpp is used for non-typed json implementation: for json_encode() and json_decode() -// for classes, e.g. `JsonEncoder::encode(new A)`, see json-writer.cpp and from/to visitors -namespace { - -void json_append_one_char(unsigned int c) noexcept { - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('u'); - kphp_runtime_context.static_SB.append_char("0123456789abcdef"[c >> 12]); - kphp_runtime_context.static_SB.append_char("0123456789abcdef"[(c >> 8) & 15]); - kphp_runtime_context.static_SB.append_char("0123456789abcdef"[(c >> 4) & 15]); - kphp_runtime_context.static_SB.append_char("0123456789abcdef"[c & 15]); -} - -bool json_append_char(unsigned int c) noexcept { - if (c < 0x10000) { - if (0xD7FF < c && c < 0xE000) { - return false; - } - json_append_one_char(c); - return true; - } - if (c <= 0x10ffff) { - c -= 0x10000; - json_append_one_char(0xD800 | (c >> 10)); - json_append_one_char(0xDC00 | (c & 0x3FF)); - return true; - } - return false; -} - - -bool do_json_encode_string_php(const JsonPath &json_path, const char *s, int len, int64_t options) noexcept { - int begin_pos = kphp_runtime_context.static_SB.size(); - if (options & JSON_UNESCAPED_UNICODE) { - kphp_runtime_context.static_SB.reserve(2 * len + 2); - } else { - kphp_runtime_context.static_SB.reserve(6 * len + 2); - } - kphp_runtime_context.static_SB.append_char('"'); - - auto fire_error = [json_path, begin_pos](int pos) { - php_warning("%s: Not a valid utf-8 character at pos %d in function json_encode", json_path.to_string().c_str(), pos); - kphp_runtime_context.static_SB.set_pos(begin_pos); - kphp_runtime_context.static_SB.append("null", 4); - return false; - }; - - for (int pos = 0; pos < len; pos++) { - switch (s[pos]) { - case '"': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('"'); - break; - case '\\': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('\\'); - break; - case '/': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('/'); - break; - case '\b': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('b'); - break; - case '\f': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('f'); - break; - case '\n': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('n'); - break; - case '\r': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('r'); - break; - case '\t': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('t'); - break; - case 0 ... 7: - case 11: - case 14 ... 31: - json_append_one_char(s[pos]); - break; - case -128 ... -1: { - const int a = s[pos]; - if ((a & 0x40) == 0) { - return fire_error(pos); - } - - const int b = s[++pos]; - if ((b & 0xc0) != 0x80) { - return fire_error(pos); - } - if ((a & 0x20) == 0) { - if ((a & 0x1e) <= 0) { - return fire_error(pos); - } - if (options & JSON_UNESCAPED_UNICODE) { - kphp_runtime_context.static_SB.append_char(static_cast(a)); - kphp_runtime_context.static_SB.append_char(static_cast(b)); - } else if (!json_append_char(((a & 0x1f) << 6) | (b & 0x3f))) { - return fire_error(pos); - } - break; - } - - const int c = s[++pos]; - if ((c & 0xc0) != 0x80) { - return fire_error(pos); - } - if ((a & 0x10) == 0) { - if (((a & 0x0f) | (b & 0x20)) <= 0) { - return fire_error(pos); - } - if (options & JSON_UNESCAPED_UNICODE) { - kphp_runtime_context.static_SB.append_char(static_cast(a)); - kphp_runtime_context.static_SB.append_char(static_cast(b)); - kphp_runtime_context.static_SB.append_char(static_cast(c)); - } else if (!json_append_char(((a & 0x0f) << 12) | ((b & 0x3f) << 6) | (c & 0x3f))) { - return fire_error(pos); - } - break; - } - - const int d = s[++pos]; - if ((d & 0xc0) != 0x80) { - return fire_error(pos); - } - if ((a & 0x08) == 0) { - if (((a & 0x07) | (b & 0x30)) <= 0) { - return fire_error(pos); - } - if (options & JSON_UNESCAPED_UNICODE) { - kphp_runtime_context.static_SB.append_char(static_cast(a)); - kphp_runtime_context.static_SB.append_char(static_cast(b)); - kphp_runtime_context.static_SB.append_char(static_cast(c)); - kphp_runtime_context.static_SB.append_char(static_cast(d)); - } else if (!json_append_char(((a & 0x07) << 18) | ((b & 0x3f) << 12) | ((c & 0x3f) << 6) | (d & 0x3f))) { - return fire_error(pos); - } - break; - } - - return fire_error(pos); - } - default: - kphp_runtime_context.static_SB.append_char(s[pos]); - break; - } - } - - kphp_runtime_context.static_SB.append_char('"'); - return true; -} - -bool do_json_encode_string_vkext(const char *s, int len) noexcept { - kphp_runtime_context.static_SB.reserve(2 * len + 2); - if (kphp_runtime_context.sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED) { - return false; - } - - kphp_runtime_context.static_SB.append_char('"'); - - for (int pos = 0; pos < len; pos++) { - char c = s[pos]; - if (unlikely (static_cast(c) < 32u)) { - switch (c) { - case '\b': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('b'); - break; - case '\f': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('f'); - break; - case '\n': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('n'); - break; - case '\r': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('r'); - break; - case '\t': - kphp_runtime_context.static_SB.append_char('\\'); - kphp_runtime_context.static_SB.append_char('t'); - break; - } - } else { - if (c == '"' || c == '\\' || c == '/') { - kphp_runtime_context.static_SB.append_char('\\'); - } - kphp_runtime_context.static_SB.append_char(c); - } - } - - kphp_runtime_context.static_SB.append_char('"'); - - return true; -} - -} // namespace - -string JsonPath::to_string() const { - // this function is called only when error is occurred, so it's not - // very performance-sensitive - - if (depth == 0) { - return string{"/", 1}; - } - unsigned num_parts = std::clamp(depth, 0U, static_cast(arr.size())); - string result; - result.reserve_at_least((num_parts+1) * 8); - result.push_back('/'); - for (unsigned i = 0; i < num_parts; i++) { - const char *key = arr[i]; - if (key == nullptr) { - // int key indexing - result.append("[.]"); - } else { - // string key indexing - result.append("['"); - result.append(arr[i]); - result.append("']"); - } - } - if (depth >= arr.size()) { - result.append("..."); - } - return result; -} - -namespace impl_ { - -JsonEncoder::JsonEncoder(int64_t options, bool simple_encode, const char *json_obj_magic_key) noexcept: - options_(options), - simple_encode_(simple_encode), - json_obj_magic_key_(json_obj_magic_key) { -} - -bool JsonEncoder::encode(bool b) noexcept { - if (b) { - kphp_runtime_context.static_SB.append("true", 4); - } else { - kphp_runtime_context.static_SB.append("false", 5); - } - return true; -} - -bool JsonEncoder::encode_null() const noexcept { - kphp_runtime_context.static_SB.append("null", 4); - return true; -} - -bool JsonEncoder::encode(int64_t i) noexcept { - kphp_runtime_context.static_SB << i; - return true; -} - -bool JsonEncoder::encode(double d) noexcept { - if (vk::any_of_equal(std::fpclassify(d), FP_INFINITE, FP_NAN)) { - php_warning("%s: strange double %lf in function json_encode", json_path_.to_string().c_str(), d); - if (options_ & JSON_PARTIAL_OUTPUT_ON_ERROR) { - kphp_runtime_context.static_SB.append("0", 1); - } else { - return false; - } - } else { - kphp_runtime_context.static_SB << (simple_encode_ ? f$number_format(d, 6, string{"."}, string{}) : string{d}); - } - return true; -} - -bool JsonEncoder::encode(const string &s) noexcept { - return simple_encode_ ? do_json_encode_string_vkext(s.c_str(), s.size()) : do_json_encode_string_php(json_path_, s.c_str(), s.size(), options_); -} - -bool JsonEncoder::encode(const mixed &v) noexcept { - switch (v.get_type()) { - case mixed::type::NUL: - return encode_null(); - case mixed::type::BOOLEAN: - return encode(v.as_bool()); - case mixed::type::INTEGER: - return encode(v.as_int()); - case mixed::type::FLOAT: - return encode(v.as_double()); - case mixed::type::STRING: - return encode(v.as_string()); - case mixed::type::ARRAY: - return encode(v.as_array()); - case mixed::type::OBJECT: - php_warning("Objects (%s) are not supported in JsonEncoder", v.get_type_or_class_name()); - return false; - default: - __builtin_unreachable(); - } -} - -} // namespace impl_ - -namespace { - -void json_skip_blanks(const char *s, int &i) noexcept { - while (vk::any_of_equal(s[i], ' ', '\t', '\r', '\n')) { - i++; - } -} - -bool do_json_decode(const char *s, int s_len, int &i, mixed &v, const char *json_obj_magic_key) noexcept { - if (!v.is_null()) { - v.destroy(); - } - json_skip_blanks(s, i); - switch (s[i]) { - case 'n': - if (s[i + 1] == 'u' && - s[i + 2] == 'l' && - s[i + 3] == 'l') { - i += 4; - return true; - } - break; - case 't': - if (s[i + 1] == 'r' && - s[i + 2] == 'u' && - s[i + 3] == 'e') { - i += 4; - new(&v) mixed(true); - return true; - } - break; - case 'f': - if (s[i + 1] == 'a' && - s[i + 2] == 'l' && - s[i + 3] == 's' && - s[i + 4] == 'e') { - i += 5; - new(&v) mixed(false); - return true; - } - break; - case '"': { - int j = i + 1; - int slashes = 0; - while (j < s_len && s[j] != '"') { - if (s[j] == '\\') { - slashes++; - j++; - } - j++; - } - if (j < s_len) { - int len = j - i - 1 - slashes; - - string value(len, false); - - i++; - int l; - for (l = 0; l < len && i < j; l++) { - char c = s[i]; - if (c == '\\') { - i++; - switch (s[i]) { - case '"': - case '\\': - case '/': - value[l] = s[i]; - break; - case 'b': - value[l] = '\b'; - break; - case 'f': - value[l] = '\f'; - break; - case 'n': - value[l] = '\n'; - break; - case 'r': - value[l] = '\r'; - break; - case 't': - value[l] = '\t'; - break; - case 'u': - if (isxdigit(s[i + 1]) && isxdigit(s[i + 2]) && isxdigit(s[i + 3]) && isxdigit(s[i + 4])) { - int num = 0; - for (int t = 0; t < 4; t++) { - char c = s[++i]; - if ('0' <= c && c <= '9') { - num = num * 16 + c - '0'; - } else { - c |= 0x20; - if ('a' <= c && c <= 'f') { - num = num * 16 + c - 'a' + 10; - } - } - } - - if (0xD7FF < num && num < 0xE000) { - if (s[i + 1] == '\\' && s[i + 2] == 'u' && - isxdigit(s[i + 3]) && isxdigit(s[i + 4]) && isxdigit(s[i + 5]) && isxdigit(s[i + 6])) { - i += 2; - int u = 0; - for (int t = 0; t < 4; t++) { - char c = s[++i]; - if ('0' <= c && c <= '9') { - u = u * 16 + c - '0'; - } else { - c |= 0x20; - if ('a' <= c && c <= 'f') { - u = u * 16 + c - 'a' + 10; - } - } - } - - if (0xD7FF < u && u < 0xE000) { - num = (((num & 0x3FF) << 10) | (u & 0x3FF)) + 0x10000; - } else { - i -= 6; - return false; - } - } else { - return false; - } - } - - if (num < 128) { - value[l] = static_cast(num); - } else if (num < 0x800) { - value[l++] = static_cast(0xc0 + (num >> 6)); - value[l] = static_cast(0x80 + (num & 63)); - } else if (num < 0xffff) { - value[l++] = static_cast(0xe0 + (num >> 12)); - value[l++] = static_cast(0x80 + ((num >> 6) & 63)); - value[l] = static_cast(0x80 + (num & 63)); - } else { - value[l++] = static_cast(0xf0 + (num >> 18)); - value[l++] = static_cast(0x80 + ((num >> 12) & 63)); - value[l++] = static_cast(0x80 + ((num >> 6) & 63)); - value[l] = static_cast(0x80 + (num & 63)); - } - break; - } - /* fallthrough */ - default: - return false; - } - i++; - } else { - value[l] = s[i++]; - } - } - value.shrink(l); - - new(&v) mixed(value); - i++; - return true; - } - break; - } - case '[': { - array res; - i++; - json_skip_blanks(s, i); - if (s[i] != ']') { - do { - mixed value; - if (!do_json_decode(s, s_len, i, value, json_obj_magic_key)) { - return false; - } - res.push_back(value); - json_skip_blanks(s, i); - } while (s[i++] == ','); - - if (s[i - 1] != ']') { - return false; - } - } else { - i++; - } - - new(&v) mixed(res); - return true; - } - case '{': { - array res; - i++; - json_skip_blanks(s, i); - if (s[i] != '}') { - do { - mixed key; - if (!do_json_decode(s, s_len, i, key, json_obj_magic_key) || !key.is_string()) { - return false; - } - json_skip_blanks(s, i); - if (s[i++] != ':') { - return false; - } - - if (!do_json_decode(s, s_len, i, res[key], json_obj_magic_key)) { - return false; - } - json_skip_blanks(s, i); - } while (s[i++] == ','); - - if (s[i - 1] != '}') { - return false; - } - } else { - i++; - } - - // it's impossible to distinguish whether empty php array was an json array or json object; - // to overcome it we add dummy key to php array that make array::is_vector() returning false, so we have difference - if (json_obj_magic_key && res.empty()) { - res[string{json_obj_magic_key}] = true; - } - - new(&v) mixed(res); - return true; - } - default: { - int j = i; - while (s[j] == '-' || ('0' <= s[j] && s[j] <= '9') || s[j] == 'e' || s[j] == 'E' || s[j] == '+' || s[j] == '.') { - j++; - } - if (j > i) { - int64_t intval = 0; - if (php_try_to_int(s + i, j - i, &intval)) { - i = j; - new(&v) mixed(intval); - return true; - } - - char *end_ptr; - double floatval = strtod(s + i, &end_ptr); - if (end_ptr == s + j) { - i = j; - new(&v) mixed(floatval); - return true; - } - } - break; - } - } - - return false; -} - -} // namespace - -std::pair json_decode(const string &v, const char *json_obj_magic_key) noexcept { - mixed result; - int i = 0; - if (do_json_decode(v.c_str(), v.size(), i, result, json_obj_magic_key)) { - json_skip_blanks(v.c_str(), i); - if (i == static_cast(v.size())) { - bool success = true; - return {result, success}; - } - } - - return {}; -} - -mixed f$json_decode(const string &v, bool assoc) noexcept { - // TODO It was a warning before (in case if assoc is false), but then it was disabled, should we enable it again? - static_cast(assoc); - return json_decode(v).first; -} diff --git a/runtime/json-functions.h b/runtime/json-functions.h index 722c9a7f21..4fbfed5598 100644 --- a/runtime/json-functions.h +++ b/runtime/json-functions.h @@ -5,189 +5,21 @@ #pragma once #include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime/context/runtime-context.h" #include "runtime/exception.h" -#include - -constexpr int64_t JSON_UNESCAPED_UNICODE = 1; -constexpr int64_t JSON_FORCE_OBJECT = 16; -constexpr int64_t JSON_PRETTY_PRINT = 128; // TODO: add actual support to untyped -constexpr int64_t JSON_PARTIAL_OUTPUT_ON_ERROR = 512; -constexpr int64_t JSON_PRESERVE_ZERO_FRACTION = 1024; - -constexpr int64_t JSON_AVAILABLE_OPTIONS = JSON_UNESCAPED_UNICODE | JSON_FORCE_OBJECT | JSON_PARTIAL_OUTPUT_ON_ERROR; -constexpr int64_t JSON_AVAILABLE_FLAGS_TYPED = JSON_PRETTY_PRINT | JSON_PRESERVE_ZERO_FRACTION; - -struct JsonPath { - constexpr static int MAX_DEPTH = 8; - - std::array arr; - unsigned depth = 0; - - void enter(const char *key) noexcept { - if (depth < arr.size()) { - arr[depth] = key; - } - depth++; - } - - void leave() noexcept { - depth--; - } - - string to_string() const; -}; - -namespace impl_ { -// note: this class in runtime is used for non-typed json implementation: for json_encode() and json_decode() -// for classes, e.g. `JsonEncoder::encode(new A)`, see json-writer.h and from/to visitors -// todo somewhen, unify this JsonEncoder and JsonWriter, and support JSON_PRETTY_PRINT then -class JsonEncoder : vk::not_copyable { -public: - JsonEncoder(int64_t options, bool simple_encode, const char *json_obj_magic_key = nullptr) noexcept; - - bool encode(bool b) noexcept; - bool encode(int64_t i) noexcept; - bool encode(const string &s) noexcept; - bool encode(double d) noexcept; - bool encode(const mixed &v) noexcept; - - template - bool encode(const array &arr) noexcept; - - template - bool encode(const Optional &opt) noexcept; - -private: - bool encode_null() const noexcept; - - JsonPath json_path_; - const int64_t options_{0}; - const bool simple_encode_{false}; - const char *json_obj_magic_key_{nullptr}; -}; - -template -bool JsonEncoder::encode(const array &arr) noexcept { - bool is_vector = arr.is_vector(); - const bool force_object = static_cast(JSON_FORCE_OBJECT & options_); - if (!force_object && !is_vector && arr.is_pseudo_vector()) { - if (arr.get_next_key() == arr.count()) { - is_vector = true; - } else { - php_warning("%s: Corner case in json conversion, [] could be easy transformed to {}", json_path_.to_string().c_str()); - } - } - is_vector &= !force_object; - - kphp_runtime_context.static_SB << "{["[is_vector]; - - if (is_vector) { - int i = 0; - json_path_.enter(nullptr); // similar key for all entries - for (auto p : arr) { - if (i != 0) { - kphp_runtime_context.static_SB << ','; - } - if (!encode(p.get_value())) { - if (!(options_ & JSON_PARTIAL_OUTPUT_ON_ERROR)) { - return false; - } - } - i++; - } - json_path_.leave(); - } else { - bool is_first = true; - for (auto p : arr) { - if (!is_first) { - kphp_runtime_context.static_SB << ','; - } - is_first = false; - const char *next_key = nullptr; - const auto key = p.get_key(); - if (array::is_int_key(key)) { - auto int_key = key.to_int(); - next_key = nullptr; - kphp_runtime_context.static_SB << '"' << int_key << '"'; - } else { - const string &str_key = key.as_string(); - // skip service key intended only for distinguish empty json object with empty json array - if (json_obj_magic_key_ && !strcmp(json_obj_magic_key_, str_key.c_str())) { - continue; - } - next_key = str_key.c_str(); - if (!encode(str_key)) { - if (!(options_ & JSON_PARTIAL_OUTPUT_ON_ERROR)) { - return false; - } - } - } - kphp_runtime_context.static_SB << ':'; - json_path_.enter(next_key); - if (!encode(p.get_value())) { - if (!(options_ & JSON_PARTIAL_OUTPUT_ON_ERROR)) { - return false; - } - } - json_path_.leave(); - } - } - - kphp_runtime_context.static_SB << "}]"[is_vector]; - return true; -} - -template -bool JsonEncoder::encode(const Optional &opt) noexcept { - switch (opt.value_state()) { - case OptionalState::has_value: - return encode(opt.val()); - case OptionalState::false_value: - return encode(false); - case OptionalState::null_value: - return encode_null(); - } - __builtin_unreachable(); -} - -} // namespace impl_ - -template -Optional f$json_encode(const T &v, int64_t options = 0, bool simple_encode = false) noexcept { - const bool has_unsupported_option = static_cast(options & ~JSON_AVAILABLE_OPTIONS); - if (unlikely(has_unsupported_option)) { - php_warning("Wrong parameter options = %" PRIi64 " in function json_encode", options); - return false; - } - - kphp_runtime_context.static_SB.clean(); - if (unlikely(!impl_::JsonEncoder(options, simple_encode).encode(v))) { - return false; - } - return kphp_runtime_context.static_SB.str(); -} - template string f$vk_json_encode_safe(const T &v, bool simple_encode = true) noexcept { kphp_runtime_context.static_SB.clean(); kphp_runtime_context.sb_lib_context.error_flag = STRING_BUFFER_ERROR_FLAG_ON; impl_::JsonEncoder(0, simple_encode).encode(v); - if (unlikely(kphp_runtime_context.sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { + if (unlikely(kphp_runtime_context.sb_lib_context.error_flag == STRING_BUFFER_ERROR_FLAG_FAILED)) { kphp_runtime_context.static_SB.clean(); - kphp_runtime_context.sb_lib_context.error_flag = STRING_BUFFER_ERROR_FLAG_OFF; - THROW_EXCEPTION (new_Exception(string(__FILE__), __LINE__, string("json_encode buffer overflow", 27))); + kphp_runtime_context.sb_lib_context.error_flag = STRING_BUFFER_ERROR_FLAG_OFF; + THROW_EXCEPTION(new_Exception(string(__FILE__), __LINE__, string("json_encode buffer overflow", 27))); return {}; } - kphp_runtime_context.sb_lib_context.error_flag = STRING_BUFFER_ERROR_FLAG_OFF; + kphp_runtime_context.sb_lib_context.error_flag = STRING_BUFFER_ERROR_FLAG_OFF; return kphp_runtime_context.static_SB.str(); } - -template -inline Optional f$vk_json_encode(const T &v) noexcept { - return f$json_encode(v, 0, true); -} - -std::pair json_decode(const string &v, const char *json_obj_magic_key = nullptr) noexcept; -mixed f$json_decode(const string &v, bool assoc = false) noexcept; diff --git a/runtime/memcache.cpp b/runtime/memcache.cpp index 595b5fd1e6..0bcc030bbf 100644 --- a/runtime/memcache.cpp +++ b/runtime/memcache.cpp @@ -5,8 +5,8 @@ #include "runtime/memcache.h" #include "runtime-common/core/utils/kphp-assert-core.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime/array_functions.h" -#include "runtime/json-functions.h" #include "runtime/net_events.h" #include "runtime/serialize-functions.h" #include "runtime/zlib.h" diff --git a/runtime/misc.cpp b/runtime/misc.cpp index a208ea55de..7882740bde 100644 --- a/runtime/misc.cpp +++ b/runtime/misc.cpp @@ -11,15 +11,14 @@ #include #include +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime/critical_section.h" #include "runtime/datetime/datetime_functions.h" #include "runtime/exception.h" #include "runtime/files.h" #include "runtime/interface.h" -#include "runtime/json-functions.h" #include "runtime/math_functions.h" #include "runtime/string_functions.h" -#include "runtime/vkext.h" #include "server/json-logger.h" #include "server/php-engine-vars.h" @@ -719,10 +718,6 @@ string f$var_export(const mixed &v, bool buffered) { return {}; } -string f$cp1251(const string &utf8_string) { - return f$vk_utf8_to_win(utf8_string); -} - void f$kphp_set_context_on_error(const array &tags, const array &extra_info, const string& env) { auto &json_logger = vk::singleton::get(); kphp_runtime_context.static_SB.clean(); diff --git a/runtime/runtime.cmake b/runtime/runtime.cmake index d2abdac47a..c4e188fc19 100644 --- a/runtime/runtime.cmake +++ b/runtime/runtime.cmake @@ -81,7 +81,6 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ exception.cpp exec.cpp files.cpp - from-json-processor.cpp instance-cache.cpp instance-copy-processor.cpp inter-process-mutex.cpp @@ -124,8 +123,6 @@ prepend(KPHP_RUNTIME_SOURCES ${BASE_DIR}/runtime/ tcp.cpp thread-pool.cpp url.cpp - vkext.cpp - vkext_stats.cpp ffi.cpp zlib.cpp zstd.cpp) diff --git a/runtime/vkext.cpp b/runtime/vkext.cpp deleted file mode 100644 index 5ed5feac53..0000000000 --- a/runtime/vkext.cpp +++ /dev/null @@ -1,663 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include "runtime/vkext.h" - -#include - -#include "common/string-processing.h" -#include "flex/flex.h" - -#include "runtime/misc.h" -#include "runtime/allocator.h" - -static int utf8_to_win_convert_0x400[256] = {-1, 0xa8, 0x80, 0x81, 0xaa, 0xbd, 0xb2, 0xaf, 0xa3, 0x8a, 0x8c, 0x8e, 0x8d, -1, 0xa1, 0x8f, 0xc0, 0xc1, 0xc2, 0xc3, - 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, - 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, - 0xfd, 0xfe, 0xff, -1, 0xb8, 0x90, 0x83, 0xba, 0xbe, 0xb3, 0xbf, 0xbc, 0x9a, 0x9c, 0x9e, 0x9d, -1, 0xa2, 0x9f, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xa5, 0xb4, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; -static int utf8_to_win_convert_0x2000[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x96, 0x97, -1, -1, -1, 0x91, 0x92, - 0x82, -1, 0x93, 0x94, 0x84, -1, 0x86, 0x87, 0x95, -1, -1, -1, 0x85, -1, -1, -1, -1, 0xda, 0xda, -1, 0xda, -1, - 0x89, -1, -1, -1, -1, -1, -1, -1, -1, 0x8b, 0x9b, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x88, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; -static int utf8_to_win_convert_0xff00[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 0x20, -1, -1}; -static int utf8_to_win_convert_0x2100[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xb9, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 0x99, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1}; -static int utf8_to_win_convert_0x000[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xa0, -1, -1, -1, 0xa4, -1, 0xa6, 0xa7, -1, 0xa9, -1, - 0xab, 0xac, 0xad, 0xae, -1, 0xb0, 0xb1, -1, -1, -1, 0xb5, 0xb6, 0xb7, -1, -1, -1, 0xbb, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1}; -static int win_to_utf8_convert[256] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, - 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, - 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, - 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, - 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, - 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, - 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x402, 0x403, 0x201a, 0x453, 0x201e, 0x2026, 0x2020, 0x2021, 0x20ac, 0x2030, 0x409, 0x2039, - 0x40a, 0x40c, 0x40b, 0x40f, 0x452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x0, 0x2122, 0x459, 0x203a, - 0x45a, 0x45c, 0x45b, 0x45f, 0xa0, 0x40e, 0x45e, 0x408, 0xa4, 0x490, 0xa6, 0xa7, 0x401, 0xa9, 0x404, 0xab, 0xac, 0xad, - 0xae, 0x407, 0xb0, 0xb1, 0x406, 0x456, 0x491, 0xb5, 0xb6, 0xb7, 0x451, 0x2116, 0x454, 0xbb, 0x458, 0x405, 0x455, 0x457, - 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417, 0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, 0x420, - 0x421, 0x422, 0x423, 0x424, 0x425, 0x426, 0x427, 0x428, 0x429, 0x42a, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, 0x430, 0x431, - 0x432, 0x433, 0x434, 0x435, 0x436, 0x437, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, 0x440, 0x441, 0x442, - 0x443, 0x444, 0x445, 0x446, 0x447, 0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f}; - -inline int utf8_to_win_char(int c) { - if (c < 0x80) { - return c; - } - switch (c & ~0xff) { - case 0x0400: - return utf8_to_win_convert_0x400[c & 0xff]; - case 0x2000: - return utf8_to_win_convert_0x2000[c & 0xff]; - case 0xff00: - return utf8_to_win_convert_0xff00[c & 0xff]; - case 0x2100: - return utf8_to_win_convert_0x2100[c & 0xff]; - case 0x0000: - return utf8_to_win_convert_0x000[c & 0xff]; - } - return -1; -} - -#define BUFF_LEN (1 << 16) -static char buff[BUFF_LEN]; -char *wptr; - -char *result_buff; -int result_buff_len; -int result_buff_pos; -#define cur_buff_len (int)((wptr - buff) + result_buff_pos) - -inline void init_buff() { - wptr = buff; - result_buff = nullptr; - result_buff_len = 0; - result_buff_pos = 0; -} - -inline void free_buff() { - if (result_buff_len) { - dl::deallocate(result_buff, result_buff_len); - } -} - -inline void realloc_buff() { - if (!result_buff_len) { - result_buff_len = 2 * BUFF_LEN; - result_buff = (char *)dl::allocate(result_buff_len); - } else { - php_assert (result_buff_len < (1 << 30)); - result_buff = (char *)dl::reallocate(result_buff, 2 * result_buff_len, result_buff_len); - result_buff_len *= 2; - } -} - -inline void flush_buff() { - while (cur_buff_len > result_buff_len) { - realloc_buff(); - } - memcpy(result_buff + result_buff_pos, buff, wptr - buff); - result_buff_pos += (int)(wptr - buff); - wptr = buff; -} - -inline string finish_buff(int64_t max_len) { - int64_t len = cur_buff_len; - if (max_len && max_len < len) { - len = max_len; - } - - if (result_buff_len) { - flush_buff(); - string res(result_buff, static_cast(len)); - free_buff(); - return res; - } - - return {buff, static_cast(len)}; -} - -inline void write_buff(const char *s, int l) { - while (l > 0) { - if (unlikely (wptr == buff + BUFF_LEN)) { - flush_buff(); - } - int ll = min(l, (int)(buff + BUFF_LEN - wptr)); - memcpy(wptr, s, ll); - wptr += ll; - s += ll; - l -= ll; - } -} - -inline void write_buff_set_pos(int pos) { - if (pos > cur_buff_len) { - return; - } - if (pos >= result_buff_pos) { - wptr = (pos - result_buff_pos) + buff; - return; - } - result_buff_pos = pos; - wptr = buff; -} - -inline void write_buff_char_pos(char c, int pos) { - if (pos > cur_buff_len) { - return; - } - if (pos >= result_buff_pos) { - *((pos - result_buff_pos) + buff) = c; - return; - } - *(result_buff + pos) = c; -} - - -inline void write_buff_char(char c) { - if (unlikely (wptr == buff + BUFF_LEN)) { - flush_buff(); - } - *wptr++ = c; -} - -inline void write_buff_char_2(char c1, char c2) { - if (unlikely (wptr >= buff + BUFF_LEN - 1)) { - flush_buff(); - } - *wptr++ = c1; - *wptr++ = c2; -} - -inline void write_buff_char_3(char c1, char c2, char c3) { - if (unlikely (wptr >= buff + BUFF_LEN - 2)) { - flush_buff(); - } - *wptr++ = c1; - *wptr++ = c2; - *wptr++ = c3; -} - -inline void write_buff_char_4(char c1, char c2, char c3, char c4) { - if (unlikely (wptr >= buff + BUFF_LEN - 3)) { - flush_buff(); - } - *wptr++ = c1; - *wptr++ = c2; - *wptr++ = c3; - *wptr++ = c4; -} - -inline void write_buff_char_5(char c1, char c2, char c3, char c4, char c5) { - if (unlikely (wptr >= buff + BUFF_LEN - 4)) { - flush_buff(); - } - *wptr++ = c1; - *wptr++ = c2; - *wptr++ = c3; - *wptr++ = c4; - *wptr++ = c5; -} - -inline void write_buff_char_6(char c1, char c2, char c3, char c4, char c5, char c6) { - if (unlikely (wptr >= buff + BUFF_LEN - 5)) { - flush_buff(); - } - *wptr++ = c1; - *wptr++ = c2; - *wptr++ = c3; - *wptr++ = c4; - *wptr++ = c5; - *wptr++ = c6; -} - -inline void write_buff_int(int x) { - if (unlikely (wptr + 25 > buff + BUFF_LEN)) { - flush_buff(); - } - wptr += snprintf(wptr, 25, "%d", x); -} - - -int utf8_to_win(const char *s, int len, int64_t max_len, bool exit_on_error) { - int st = 0; - int acc = 0; -// if (max_len && len > 3 * max_len) { -// len = 3 * max_len; -// } - for (int i = 0; i < len; i++) { - if (max_len && cur_buff_len >= max_len) { - break; - } - int c = static_cast(s[i]); - if (c < 0x80) { - if (st) { - if (exit_on_error) { - return -1; - } - write_buff("?1?", 3); - } - write_buff_char(static_cast(c)); - st = 0; - } else if ((c & 0xc0) == 0x80) { - if (!st) { - if (exit_on_error) { - return -1; - } - write_buff("?2?", 3); - continue; - } - acc <<= 6; - acc += c - 0x80; - if (!--st) { - if (acc < 0x80) { - if (exit_on_error) { - return -1; - } - write_buff("?3?", 3); - } else { - int d = utf8_to_win_char(acc); - if (d != -1 && d) { - write_buff_char(static_cast(d)); - } else { - write_buff_char_2('&', '#'); - write_buff_int(acc); - write_buff_char(';'); - } - } - } - } else { // if ((c & 0xc0) == 0xc0) - if (st) { - if (exit_on_error) { - return -1; - } - write_buff("?4?", 3); - } - c -= 0xc0; - st = 0; - if (c < 32) { - acc = c; - st = 1; - } else if (c < 48) { - acc = c - 32; - st = 2; - } else if (c < 56) { - acc = c - 48; - st = 3; - } else { - if (exit_on_error) { - return -1; - } - write_buff("?5?", 3); - } - } - } - if (st) { - if (exit_on_error) { - return -1; - } - write_buff("?6?", 3); - } - return 1; -} - -void write_char_utf8(int64_t c) { - if (!c) { - return; - } - if (c < 128) { - write_buff_char((char)c); - return; - } - // 2 bytes(11): 110x xxxx 10xx xxxx - if (c < 0x800) { - write_buff_char_2((char)(0xC0 + (c >> 6)), (char)(0x80 + (c & 63))); - return; - } - - // 3 bytes(16): 1110 xxxx 10xx xxxx 10xx xxxx - if (c < 0x10000) { - write_buff_char_3((char)(0xE0 + (c >> 12)), (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } - - // 4 bytes(21): 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x200000) { - write_buff_char_4((char)(0xF0 + (c >> 18)), (char)(0x80 + ((c >> 12) & 63)), (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } - - // 5 bytes(26): 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x4000000) { - write_buff_char_5((char)(0xF8 + (c >> 24)), (char)(0x80 + ((c >> 18) & 63)), (char)(0x80 + ((c >> 12) & 63)), (char)(0x80 + ((c >> 6) & 63)), - (char)(0x80 + (c & 63))); - return; - } - - // 6 bytes(31): 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x80000000) { - write_buff_char_6((char)(0xFC + (c >> 30)), (char)(0x80 + ((c >> 24) & 63)), (char)(0x80 + ((c >> 18) & 63)), (char)(0x80 + ((c >> 12) & 63)), - (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } - - write_buff_char_2('$', '#'); - write_buff_int(c); - write_buff_char(';'); -} - -void write_char_utf8_no_escape(int64_t c) { - if (!c) { - return; - } - if (c < 128) { - write_buff_char((char)c); - return; - } - // 2 bytes(11): 110x xxxx 10xx xxxx - if (c < 0x800) { - write_buff_char_2((char)(0xC0 + (c >> 6)), (char)(0x80 + (c & 63))); - return; - } - - // 3 bytes(16): 1110 xxxx 10xx xxxx 10xx xxxx - if (c < 0x10000) { - write_buff_char_3((char)(0xE0 + (c >> 12)), (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } - - // 4 bytes(21): 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x200000) { - write_buff_char_4((char)(0xF0 + (c >> 18)), (char)(0x80 + ((c >> 12) & 63)), (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } - - // 5 bytes(26): 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x4000000) { - write_buff_char_5((char)(0xF8 + (c >> 24)), (char)(0x80 + ((c >> 18) & 63)), (char)(0x80 + ((c >> 12) & 63)), (char)(0x80 + ((c >> 6) & 63)), - (char)(0x80 + (c & 63))); - return; - } - - // 6 bytes(31): 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx - if (c < 0x80000000) { - write_buff_char_6((char)(0xFC + (c >> 30)), (char)(0x80 + ((c >> 24) & 63)), (char)(0x80 + ((c >> 18) & 63)), (char)(0x80 + ((c >> 12) & 63)), - (char)(0x80 + ((c >> 6) & 63)), (char)(0x80 + (c & 63))); - return; - } -} - -static int win_to_utf8(const char *s, int len, bool escape) { - int state = 0; - int save_pos = -1; - int64_t cur_num = 0; - for (int i = 0; i < len; i++) { - if (state == 0 && s[i] == '&') { - save_pos = cur_buff_len; - cur_num = 0; - state++; - } else if (state == 1 && s[i] == '#') { - state++; - } else if (state == 2 && s[i] >= '0' && s[i] <= '9') { - if (cur_num < 0x80000000) { - cur_num = s[i] - '0' + cur_num * 10; - } - } else if (state == 2 && s[i] == ';') { - state++; - } else { - state = 0; - } - if (state == 3 && 0xd800 <= cur_num && cur_num <= 0xdfff) { - cur_num = 32; - } - if (state == 3 && (!escape || (cur_num >= 32 && cur_num != 33 && cur_num != 34 && cur_num != 36 && cur_num != 39 && cur_num != 60 && cur_num != 62 && cur_num != 92 && cur_num != 8232 && cur_num != 8233 && cur_num < 0x80000000))) { - write_buff_set_pos(save_pos); - php_assert (save_pos == cur_buff_len); - (escape ? write_char_utf8 : write_char_utf8_no_escape)(cur_num); - } else if (state == 3 && cur_num >= 0x80000000) { - write_char_utf8(win_to_utf8_convert[(unsigned char)s[i]]); - write_buff_char_pos('$', save_pos); - } else { - write_char_utf8(win_to_utf8_convert[(unsigned char)s[i]]); - } - if (state == 3) { - state = 0; - } - } - return cur_buff_len; -} - -char ws[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; - -static inline bool is_html_opt_symb(char c) { - return (c == '<' || c == '>' || c == '(' || c == ')' || c == '{' || c == '}' || c == '/' || c == '"' || c == ':' || c == ',' || c == ';'); -} - -static inline bool is_space(char c) { - return ws[(unsigned char)c]; -} - -static inline bool is_linebreak(char c) { - return c == '\n'; -} - -static inline bool is_pre_tag(const char *s) { - if (s[0] == '<') { - if (s[1] == 'p') { - return s[2] == 'r' && s[3] == 'e' && s[4] == '>'; - } else if (s[1] == 'c') { - return s[2] == 'o' && s[3] == 'd' && s[4] == 'e' && s[5] == '>'; - } else if (s[1] == '/') { - if (s[2] == 'p') { - return -(s[3] == 'r' && s[4] == 'e' && s[5] == '>'); - } else if (s[2] == 'c') { - return -(s[3] == 'o' && s[4] == 'd' && s[5] == 'e' && s[6] == '>'); - } - } - } - return false; -} - -string f$vk_utf8_to_win(const string &text, int64_t max_len, bool exit_on_error) { - init_buff(); - int r = utf8_to_win(text.c_str(), text.size(), max_len, exit_on_error); - if (r >= 0) { - return finish_buff(max_len); - } else { - if (!max_len || text.size() <= static_cast(max_len)) { - return text; - } - return {text.c_str(), static_cast(max_len)}; - } -} - -string f$vk_win_to_utf8(const string &text, bool escape) { - init_buff(); - win_to_utf8(text.c_str(), text.size(), escape); - return finish_buff(0); -} - -string f$vk_flex(const string &name, const string &case_name, int64_t sex, const string &type, int64_t lang_id) { - const size_t error_msg_buf_size = 1000; - static char ERROR_MSG_BUF[error_msg_buf_size] = {'\0'}; - ERROR_MSG_BUF[0] = '\0'; - vk::string_view res = flex(vk::string_view{name.c_str(), name.size()}, vk::string_view{case_name.c_str(), case_name.size()}, sex == 1, - vk::string_view{type.c_str(), type.size()}, lang_id, buff, ERROR_MSG_BUF, error_msg_buf_size); - if (ERROR_MSG_BUF[0] != '\0') { - php_warning("%s", ERROR_MSG_BUF); - } - if (res.data() == name.c_str()) { - return name; - } - return string{res.data(), static_cast(res.size())}; -} - -string f$vk_whitespace_pack(const string &str, bool html_opt) { - const char *text = str.c_str(); - int level = 0; - const char *ctext = text; - const char *start = text; - - init_buff(); - while (*text) { - if (is_space(*text) && !level) { - int linebreak = 0; - while (is_space(*text)) { - if (is_linebreak(*text)) { - linebreak = 1; - } - text++; - } - if (!html_opt || (ctext != start && !is_html_opt_symb(ctext[-1]) && *text && !is_html_opt_symb(*text))) { - write_buff_char(linebreak ? '\n' : ' '); - } - } else { - while (true) { - while ((level || !is_space(*text)) && *text) { - level += is_pre_tag(text); - if (level < 0) { - level = 1000000000; - } - text++; - } - if (!html_opt && *text && !is_space(text[1])) { - text++; - } else { - break; - } - } - write_buff(ctext, (int)(text - ctext)); - } - ctext = text; - } - return finish_buff(0); -} - - -string f$vk_sp_simplify(const string &s) { - sp_init(); - char *t = sp_simplify(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_full_simplify(const string &s) { - sp_init(); - char *t = sp_full_simplify(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_deunicode(const string &s) { - sp_init(); - char *t = sp_deunicode(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_to_upper(const string &s) { - sp_init(); - char *t = sp_to_upper(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_to_lower(const string &s) { - sp_init(); - char *t = sp_to_lower(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_to_sort(const string &s) { - sp_init(); - char *t = sp_sort(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_remove_repeats(const string &s) { - sp_init(); - char *t = sp_remove_repeats(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_to_cyrillic(const string &s) { - sp_init(); - char *t = sp_to_cyrillic(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} - -string f$vk_sp_words_only(const string &s) { - sp_init(); - char *t = sp_words_only(s.c_str()); - if (!t) { - return {}; - } - - return {t, (string::size_type)strlen(t)}; -} diff --git a/runtime/vkext.h b/runtime/vkext.h deleted file mode 100644 index c53a8ba6fc..0000000000 --- a/runtime/vkext.h +++ /dev/null @@ -1,33 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#pragma once - -#include "runtime-common/core/runtime-core.h" - -string f$vk_utf8_to_win(const string &text, int64_t max_len = 0, bool exit_on_error = false); - -string f$vk_win_to_utf8(const string &text, bool escape = true); - -string f$vk_flex(const string &name, const string &case_name, int64_t sex, const string &type, int64_t lang_id = 0); - -string f$vk_whitespace_pack(const string &str, bool html_opt = false); - -string f$vk_sp_simplify(const string &s); - -string f$vk_sp_full_simplify(const string &s); - -string f$vk_sp_deunicode(const string &s); - -string f$vk_sp_to_upper(const string &s); - -string f$vk_sp_to_lower(const string &s); - -string f$vk_sp_to_sort(const string &s); - -string f$vk_sp_remove_repeats(const string &s); - -string f$vk_sp_to_cyrillic(const string &s); - -string f$vk_sp_words_only(const string &s); diff --git a/runtime/vkext_stats.h b/runtime/vkext_stats.h deleted file mode 100644 index 41073c0008..0000000000 --- a/runtime/vkext_stats.h +++ /dev/null @@ -1,15 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2020 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#pragma once - -#include "runtime-common/core/runtime-core.h" - -Optional f$vk_stats_hll_merge(const array &a); -Optional f$vk_stats_hll_count(const string &hll); -Optional f$vk_stats_hll_create(const array &a = array(), int64_t size = (1 << 8)); -Optional f$vk_stats_hll_add(const string &hll, const array &a); -Optional f$vk_stats_hll_pack(const string &hll); -Optional f$vk_stats_hll_unpack(const string &hll); -bool f$vk_stats_hll_is_packed(const string &hll); diff --git a/server/php-engine.cpp b/server/php-engine.cpp index c19b6e7fb1..bfe11539b0 100644 --- a/server/php-engine.cpp +++ b/server/php-engine.cpp @@ -59,8 +59,8 @@ #include "net/net-tcp-rpc-client.h" #include "net/net-tcp-rpc-server.h" +#include "runtime-common/stdlib/string/json-functions.h" #include "runtime/interface.h" -#include "runtime/json-functions.h" #include "runtime/kphp_ml/kphp_ml_init.h" #include "runtime/profiler.h" #include "runtime/rpc.h" diff --git a/tests/cpp/runtime/flex-test.cpp b/tests/cpp/runtime/flex-test.cpp index f3e88cb55a..963ad60240 100644 --- a/tests/cpp/runtime/flex-test.cpp +++ b/tests/cpp/runtime/flex-test.cpp @@ -3,7 +3,7 @@ #include #include -#include "runtime/vkext.h" +#include "runtime-common/stdlib/vkext/vkext.h" using casing_table = std::array; diff --git a/tests/phpt/cl/178_fail_pass_class_to_hll_stats.php b/tests/phpt/cl/178_fail_pass_class_to_hll_stats.php index b411eb2c1c..8578eef218 100644 --- a/tests/phpt/cl/178_fail_pass_class_to_hll_stats.php +++ b/tests/phpt/cl/178_fail_pass_class_to_hll_stats.php @@ -1,4 +1,4 @@ -@kphp_should_fail k2_skip +@kphp_should_fail /conversion from A to mixed is forbidden/ ',"'bar'",'"baz"','&blong&', "\xc3\xa9", "\xF4\x8F\xBF\xBF", "\xF4\x90\x80\x80", "\xc3\xa9\xF4\x8F\xBF\xBF\xF4\x90\x80\x80", "\xD0\x9A", "\xE0\x90\x9A", "\xF0\x80\x90\x9A", "\xF8\x80\x80\x90\x9A", "\xFC\x80\x80\x80\x90\x9A",//character and its overlong encodings diff --git a/tests/phpt/dl/433_nl2br.php b/tests/phpt/dl/433_nl2br.php index c0bf9c958c..9b4a82db26 100644 --- a/tests/phpt/dl/433_nl2br.php +++ b/tests/phpt/dl/433_nl2br.php @@ -1,4 +1,4 @@ -@ok benchmark k2_skip +@ok benchmark <:<"><:><"><">:"<">"<:><>"<||||{}|{}|{}|[]\\[]\\[]\\[]\[\]\[\]\[\]\[]\\[\]\[]\[\]\\[]\[\\\\\\\\\\\\\\\\\\\\'); diff --git a/tests/phpt/dl/491_strpos.php b/tests/phpt/dl/491_strpos.php index 1f0ae2ff20..b535e94a53 100644 --- a/tests/phpt/dl/491_strpos.php +++ b/tests/phpt/dl/491_strpos.php @@ -1,4 +1,4 @@ -@ok k2_skip +@ok = $actual) && (($expected - $delta) <= $actual); +} + +var_dump(false === vk_stats_hll_create([], 1)); +var_dump('0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' === vk_stats_hll_create([1], 256)); +var_dump('0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' === vk_stats_hll_create([1, 1, 1], 256)); +var_dump('0000100000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' === vk_stats_hll_create(range(1, 3), 256)); +var_dump('0001230122301002380002031052112121204202311222110301201301010022205423100221204104410730005401011210130220011000001040101426000240121070030000220200002011230022010000230300110031212043601110020014=11261030114000201328111<00320333172203041043154030301021331' === vk_stats_hll_create(range(1, 253), 256)); + +var_dump(false === vk_stats_hll_count('1')); + +var_dump(eq_in_range(1, vk_stats_hll_count(vk_stats_hll_create([1])), 1E-1)); +var_dump(eq_in_range(1, vk_stats_hll_count(vk_stats_hll_create([1, 1])), 1E-1)); +var_dump(eq_in_range(2, vk_stats_hll_count(vk_stats_hll_create([1, 2])), 1E-1)); +var_dump(eq_in_range(253.77, vk_stats_hll_count(vk_stats_hll_create(range(1, 253))), 1E-1)); + +$hll1 = vk_stats_hll_create([1, 2]); +$hll2 = vk_stats_hll_create([1, 2, 3]); +var_dump(eq_in_range(3, vk_stats_hll_count(vk_stats_hll_merge([$hll1, $hll2])), 1E-1)); + +$hll3 = vk_stats_hll_create([4, 5, 6]); +var_dump(eq_in_range(6, vk_stats_hll_count(vk_stats_hll_merge([$hll2, $hll3])), 1E-1)); + +?> +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) diff --git a/tests/phpt/vkext/02_json_encode.php b/tests/phpt/vkext/02_json_encode.php new file mode 100644 index 0000000000..0230848a35 --- /dev/null +++ b/tests/phpt/vkext/02_json_encode.php @@ -0,0 +1,90 @@ +@ok + 1, "2" => true, 4 => "4", 5 => 5.))); +var_dump(vk_json_encode(array("1" => 1, "b" => true, 4 => "c", 5 => 5.))); +var_dump(vk_json_encode(array("1" => array(1, 2), "b" => true, 4 => "c", 5 => 5.))); + +$a = 1; +$b = 1; +$c = array(1 => $b); +var_dump(vk_json_encode($c)); + +$a = array(1 => 10, 3 => 20); +$b = array(4 => 10, 6 => 20, 7 => $a); +$a[4] = $b; +var_dump(vk_json_encode($a)); + +$r = [ + 'test' => 'test', + 'array' => [] +]; + +$r2 = [ + $r, + 'test' +]; + +$r['array'][] = $r; +$r['r2'] = $r2; +$r['test3'] = $r; + +$k = vk_json_encode($r); +var_dump($k); + +// fix for php7 +var_dump(vk_json_encode(['foo' => 1, '' => 2, 0 => 3, false => 4, null => 5])); +var_dump(vk_json_encode(['foo' => 1, 0 => 3, false => 4, null => 5])); +var_dump(vk_json_encode(['foo' => 1, false => 4, null => 5])); +var_dump(vk_json_encode(['foo' => 1, null => 5])); +var_dump(vk_json_encode([null => 5])); +var_dump(vk_json_encode([5])); +var_dump(vk_json_encode(['' => 2, false => 4, null => 5])); +var_dump(vk_json_encode(['' => 2])); + +?> + +--EXPECT-- +string(5) ""123"" +string(2) """" +string(3) "" "" +string(3) ""a"" +string(1) "1" +string(4) "true" +string(8) "1.100000" +string(2) "-1" +string(7) "[1,2,3]" +string(14) "[1.000000,2,3]" +string(10) "[true,2,3]" +string(17) "[true,0.100000,3]" +string(37) "{"1":1,"2":true,"4":"4","5":5.000000}" +string(37) "{"1":1,"b":true,"4":"c","5":5.000000}" +string(41) "{"1":[1,2],"b":true,"4":"c","5":5.000000}" +string(7) "{"1":1}" +string(37) "{"4":null,"6":20,"7":{"1":10,"3":20}}" +string(62) "{"test":"test","array":[null],"r2":[null,"test"],"test3":null}" +string(20) "{"foo":1,"":5,"0":4}" +string(20) "{"foo":1,"0":4,"":5}" +string(20) "{"foo":1,"0":4,"":5}" +string(14) "{"foo":1,"":5}" +string(6) "{"":5}" +string(3) "[5]" +string(12) "{"":5,"0":4}" +string(6) "{"":2}" diff --git a/tests/phpt/vkext/03_sp_deunicode.php b/tests/phpt/vkext/03_sp_deunicode.php new file mode 100644 index 0000000000..1a1220909c --- /dev/null +++ b/tests/phpt/vkext/03_sp_deunicode.php @@ -0,0 +1,16 @@ +@ok + + +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) diff --git a/tests/phpt/vkext/04_sp_full_simplify.php b/tests/phpt/vkext/04_sp_full_simplify.php new file mode 100644 index 0000000000..0b28ac70ef --- /dev/null +++ b/tests/phpt/vkext/04_sp_full_simplify.php @@ -0,0 +1,21 @@ +@ok +ïðèâåò') === 'âïðèâåòâ'); +var_dump(vk_sp_full_simplify('HELLO') === 'íållî'); +var_dump(vk_sp_full_simplify('?') === ''); + +?> + +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) diff --git a/tests/phpt/vkext/05_sp_simplify.php b/tests/phpt/vkext/05_sp_simplify.php new file mode 100644 index 0000000000..47b6a489f5 --- /dev/null +++ b/tests/phpt/vkext/05_sp_simplify.php @@ -0,0 +1,25 @@ +@ok + + +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) diff --git a/tests/phpt/vkext/06_sp_to_lower.php b/tests/phpt/vkext/06_sp_to_lower.php new file mode 100644 index 0000000000..fffc53d2d2 --- /dev/null +++ b/tests/phpt/vkext/06_sp_to_lower.php @@ -0,0 +1,35 @@ +@ok + + +--EXPECT-- +string(0) "" +string(1) "a" +string(1) "a" +string(3) "abc" +string(3) "abc" +string(3) "abc" +string(7) "abcdefg" +string(1) " " +string(7) "abcdefg" diff --git a/tests/phpt/vkext/07_sp_to_upper.php b/tests/phpt/vkext/07_sp_to_upper.php new file mode 100644 index 0000000000..317e74b029 --- /dev/null +++ b/tests/phpt/vkext/07_sp_to_upper.php @@ -0,0 +1,35 @@ +@ok + + +--EXPECT-- +string(0) "" +string(1) "A" +string(1) "A" +string(3) "ABC" +string(3) "ABC" +string(3) "ABC" +string(7) "ABCDEFG" +string(1) " " +string(7) "ABCDEFG" diff --git a/tests/phpt/vkext/09_utf8_to_win.php b/tests/phpt/vkext/09_utf8_to_win.php new file mode 100644 index 0000000000..568042a14e --- /dev/null +++ b/tests/phpt/vkext/09_utf8_to_win.php @@ -0,0 +1,27 @@ +@ok + + +--EXPECT-- +string(0) "" +string(1) " " +string(1) "ò" +string(4) "òåñò" +string(4) "ÒÅÑÒ" +string(4) "ÒÅÑÒ" diff --git a/tests/phpt/vkext/10_whispace_pack.php b/tests/phpt/vkext/10_whispace_pack.php new file mode 100644 index 0000000000..379bc442e5 --- /dev/null +++ b/tests/phpt/vkext/10_whispace_pack.php @@ -0,0 +1,55 @@ +@ok + + +--EXPECT-- +string(4) "test" +string(5) " test" +string(5) " test" +string(6) " ntest" +string(6) " ntest" +string(5) " +test" +string(5) " +test" +string(5) "test " +string(5) "test " +string(5) "test +" +string(5) "test " +string(5) "test +" +string(5) "test " +string(9) "test test" +string(9) "test test" +string(9) "test +test" +string(9) "test test" +string(9) "test +test" +string(9) "test test" +string(9) "test test" + diff --git a/tests/phpt/vkext/11_win_to_utf.php b/tests/phpt/vkext/11_win_to_utf.php new file mode 100644 index 0000000000..b408da175a --- /dev/null +++ b/tests/phpt/vkext/11_win_to_utf.php @@ -0,0 +1,26 @@ +@ok + + +--EXPECT-- +string(0) "" +string(1) " " +string(1) "t" +string(4) "test" +string(4) "TEST" +string(4) "TEST" diff --git a/tests/python/lib/kphp_run_once.py b/tests/python/lib/kphp_run_once.py index 025a8a9782..89ca779a77 100644 --- a/tests/python/lib/kphp_run_once.py +++ b/tests/python/lib/kphp_run_once.py @@ -138,7 +138,10 @@ def run_with_kphp_and_k2(self, runs_cnt=1, args=[]): k2_node_bin = self.k2_bin cmd = [k2_node_bin, "run-once", "--image", os.path.join(self._kphp_build_tmp_dir, "component.so"), "--runs-count={}".format(runs_cnt), "--crypto"] + args - env = {"RUST_LOG": "Warn"} + + env = os.environ.copy() + if "RUST_LOG" not in env: + env["RUST_LOG"] = "Warn" k2_runtime_proc = subprocess.Popen(cmd, cwd=self._kphp_runtime_tmp_dir, diff --git a/common/string-processing.cpp b/vkext/string-processing.cpp similarity index 99% rename from common/string-processing.cpp rename to vkext/string-processing.cpp index 8f1f102908..224e00bdd4 100644 --- a/common/string-processing.cpp +++ b/vkext/string-processing.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "common/string-processing.h" +#include "vkext/string-processing.h" #include #include diff --git a/common/string-processing.h b/vkext/string-processing.h similarity index 100% rename from common/string-processing.h rename to vkext/string-processing.h diff --git a/vkext/vkext-sp.cpp b/vkext/vkext-sp.cpp index 5d95c9addc..64c5a7dd53 100644 --- a/vkext/vkext-sp.cpp +++ b/vkext/vkext-sp.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "common/string-processing.h" +#include "vkext/string-processing.h" #include "common/unicode/unicode-utils.h" #include "vkext/vkext.h" diff --git a/vkext/vkext.cmake b/vkext/vkext.cmake index 04593b6985..afdfc39844 100644 --- a/vkext/vkext.cmake +++ b/vkext/vkext.cmake @@ -36,13 +36,13 @@ endfunction() prepend(VKEXT_COMMON_SOURCES ${COMMON_DIR}/ crc32.cpp crc32_${CMAKE_SYSTEM_PROCESSOR}.cpp - string-processing.cpp unicode/utf8-utils.cpp cpuid.cpp version-string.cpp rpc-headers.cpp) prepend(VKEXT_SOURCES ${VKEXT_DIR}/ + string-processing.cpp vkext.cpp vkext-iconv.cpp vkext-flex.cpp @@ -56,6 +56,11 @@ prepend(VKEXT_SOURCES ${VKEXT_DIR}/ vkext-stats.cpp vkext-sp.cpp) +if(COMPILER_CLANG) + set_source_files_properties(${VKEXT_DIR}/string-processing.cpp PROPERTIES COMPILE_FLAGS -Wno-invalid-source-encoding) +endif() + + foreach(PHP_VERSION IN ITEMS "" "7.4" "8.0" "8.1" "8.2" "8.3") find_program(PHP_CONFIG_EXEC${PHP_VERSION} php-config${PHP_VERSION}) set(PHP_CONFIG_EXEC ${PHP_CONFIG_EXEC${PHP_VERSION}})