From 2671d43d7e9d2d8983f505711d3f82f59b4c12bc Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 16 Dec 2024 23:50:12 +0100 Subject: [PATCH] json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance by up to 10% while serializing large OVN databases. Signed-off-by: Ilya Maximets --- include/openvswitch/json.h | 14 ++++++++++- lib/json.c | 48 ++++++++++++++++++++++++++++---------- python/ovs/db/data.py | 6 +++-- tests/test-json.c | 6 ++++- tests/test-ovsdb.c | 9 ++++--- 5 files changed, 64 insertions(+), 19 deletions(-) diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h index 55544076084..bd8e8d23546 100644 --- a/include/openvswitch/json.h +++ b/include/openvswitch/json.h @@ -63,16 +63,28 @@ struct json_array { struct json **elems; }; +/* Maximum string length that can be stored inline ('\0' is not included). */ +#define JSON_STRING_INLINE_LEN (sizeof(struct json_array) - 1) + +enum json_storage_type { + JSON_STRING_DYNAMIC = 0, /* JSON_STRING is stored via 'str_ptr'. */ + JSON_STRING_INLINE, /* JSON_STRING is stored in 'str' array. */ +}; + /* A JSON value. */ struct json { enum json_type type; + enum json_storage_type storage_type; size_t count; union { struct shash *object; /* Contains "struct json *"s. */ struct json_array array; long long int integer; double real; - char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ + union { + char str[JSON_STRING_INLINE_LEN + 1]; + char *str_ptr; + }; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ }; }; diff --git a/lib/json.c b/lib/json.c index ce6d4284b43..dc6f6f87f07 100644 --- a/lib/json.c +++ b/lib/json.c @@ -179,21 +179,33 @@ struct json * json_string_create_nocopy(char *s) { struct json *json = json_create(JSON_STRING); - json->string = s; + json->storage_type = JSON_STRING_DYNAMIC; + json->str_ptr = s; return json; } struct json * json_string_create(const char *s) { - return json_string_create_nocopy(xstrdup(s)); + struct json *json = json_create(JSON_STRING); + size_t length = strlen(s); + + if (length <= JSON_STRING_INLINE_LEN) { + json->storage_type = JSON_STRING_INLINE; + memcpy(json->str, s, length); + json->str[length] = '\0'; + } else { + json->storage_type = JSON_STRING_DYNAMIC; + json->str_ptr = xmemdup0(s, length); + } + return json; } struct json * json_serialized_object_create(const struct json *src) { struct json *json = json_create(JSON_SERIALIZED_OBJECT); - json->string = json_to_string(src, JSSF_SORT); + json->str_ptr = json_to_string(src, JSSF_SORT); return json; } @@ -201,7 +213,7 @@ struct json * json_serialized_object_create_with_yield(const struct json *src) { struct json *json = json_create(JSON_SERIALIZED_OBJECT); - json->string = json_to_string(src, JSSF_SORT | JSSF_YIELD); + json->str_ptr = json_to_string(src, JSSF_SORT | JSSF_YIELD); return json; } @@ -346,14 +358,16 @@ const char * json_string(const struct json *json) { ovs_assert(json->type == JSON_STRING); - return json->string; + return json->storage_type == JSON_STRING_DYNAMIC + ? json->str_ptr : json->str; } const char * json_serialized_object(const struct json *json) { ovs_assert(json->type == JSON_SERIALIZED_OBJECT); - return json->string; + ovs_assert(json->storage_type == JSON_STRING_DYNAMIC); + return json->str_ptr; } struct json_array * @@ -408,8 +422,13 @@ json_destroy__(struct json *json, bool yield) break; case JSON_STRING: + if (json->storage_type == JSON_STRING_DYNAMIC) { + free(json->str_ptr); + } + break; + case JSON_SERIALIZED_OBJECT: - free(json->string); + free(json->str_ptr); break; case JSON_NULL: @@ -577,9 +596,11 @@ json_hash(const struct json *json, size_t basis) return json_hash_array(&json->array, basis); case JSON_STRING: - case JSON_SERIALIZED_OBJECT: return hash_string(json_string(json), basis); + case JSON_SERIALIZED_OBJECT: + return hash_string(json->str_ptr, basis); + case JSON_NULL: case JSON_FALSE: case JSON_TRUE: @@ -653,9 +674,11 @@ json_equal(const struct json *a, const struct json *b) return json_equal_array(&a->array, &b->array); case JSON_STRING: - case JSON_SERIALIZED_OBJECT: return !strcmp(json_string(a), json_string(b)); + case JSON_SERIALIZED_OBJECT: + return !strcmp(a->str_ptr, b->str_ptr); + case JSON_NULL: case JSON_FALSE: case JSON_TRUE: @@ -989,7 +1012,8 @@ json_string_escape(const char *in, struct ds *out) { struct json json = { .type = JSON_STRING, - .string = CONST_CAST(char *, in), + .storage_type = JSON_STRING_DYNAMIC, + .str_ptr = CONST_CAST(char *, in), }; json_to_ds(&json, 0, out); } @@ -1053,7 +1077,7 @@ struct json * json_from_serialized_object(const struct json *json) { ovs_assert(json->type == JSON_SERIALIZED_OBJECT); - return json_from_string(json_string(json)); + return json_from_string(json->str_ptr); } /* Reads the file named 'file_name', parses its contents as a JSON object or @@ -1649,7 +1673,7 @@ json_serialize(const struct json *json, struct json_serializer *s) break; case JSON_SERIALIZED_OBJECT: - ds_put_cstr(ds, json_string(json)); + ds_put_cstr(ds, json->str_ptr); break; case JSON_N_TYPES: diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py index 3e9c5049f1a..30a34a0989d 100644 --- a/python/ovs/db/data.py +++ b/python/ovs/db/data.py @@ -573,7 +573,8 @@ def cDeclareDatum(self, name): % (name, n)] for key in sorted(self.values): s += [' { .type = JSON_STRING, ' - '.string = "%s", .count = 2 },' + '.storage_type = JSON_STRING_DYNAMIC, ' + '.str_ptr = "%s", .count = 2 },' % escapeCString(key.value)] s += ["};"] s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] @@ -592,7 +593,8 @@ def cDeclareDatum(self, name): % (name, n)] for k, v in sorted(self.values): s += [' { .type = JSON_STRING, ' - '.string = "%s", .count = 2 },' + '.storage_type = JSON_STRING_DYNAMIC, ' + '.str_ptr = "%s", .count = 2 },' % escapeCString(v.value)] s += ["};"] s += ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] diff --git a/tests/test-json.c b/tests/test-json.c index 2ba92d1191f..e7992e51061 100644 --- a/tests/test-json.c +++ b/tests/test-json.c @@ -100,11 +100,15 @@ test_json_equal(const struct json *a, const struct json *b, return; case JSON_STRING: - case JSON_SERIALIZED_OBJECT: ovs_assert(json_string(a) != json_string(b)); ovs_assert(!strcmp(json_string(a), json_string(b))); return; + case JSON_SERIALIZED_OBJECT: + ovs_assert(a->str_ptr != b->str_ptr); + ovs_assert(!strcmp(a->str_ptr, b->str_ptr)); + return; + case JSON_NULL: case JSON_FALSE: case JSON_TRUE: diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index adfb9597ece..3409386796b 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -2412,10 +2412,13 @@ substitute_uuids(struct json *json, const struct ovsdb_symbol_table *symtab) if (json->type == JSON_STRING) { const struct ovsdb_symbol *symbol; - symbol = ovsdb_symbol_table_get(symtab, json->string); + symbol = ovsdb_symbol_table_get(symtab, json_string(json)); if (symbol) { - free(json->string); - json->string = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid)); + if (json->storage_type == JSON_STRING_DYNAMIC) { + free(json->str_ptr); + } + json->storage_type = JSON_STRING_DYNAMIC; + json->str_ptr = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid)); } } else if (json->type == JSON_ARRAY) { size_t i;