-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an arena for allocating strings.
This reduces the amount of malloc traffic significantly, speeding up parsing. For a no-op build of Chromium (Linux, Zen 2), this reduces time spent from 4.61 to 4.08 seconds. However, note that it also increases RSS from 914 to 937 MB; I haven't looked deeply into why, but it's reasonable to assume that this is related to the fact that we no longer merge small strings together (since they are now immutable). We still use some time in actually copying the string into the arena, but it seems this is cheaper than just persisting the file contents wholesale and pointing into that.
- Loading branch information
Steinar H. Gunderson
committed
Nov 25, 2024
1 parent
b40052c
commit 231553e
Showing
17 changed files
with
484 additions
and
279 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright 2024 Google Inc. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "arena.h" | ||
|
||
#include <algorithm> | ||
|
||
char* Arena::AllocSlowPath(size_t num_bytes) | ||
{ | ||
size_t to_allocate = std::max(next_size_, num_bytes); | ||
|
||
blocks_.emplace_back(new char [to_allocate]); | ||
char* last_block = blocks_.back().get(); | ||
cur_ptr_ = last_block + num_bytes; | ||
cur_end_ = last_block + to_allocate; | ||
|
||
next_size_ += next_size_ / 2; | ||
|
||
return last_block; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// Copyright 2024 Google Inc. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <stddef.h> | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include "string_piece.h" | ||
|
||
// A simple bump allocator that gives very fast and tight memory allocation | ||
// for small values. It is primarily intended for StringPiece allocation, | ||
// but all values returned are 8-byte aligned, so you can allocate more | ||
// complex objects on it if you wish. | ||
// | ||
// All pointers returned by Alloc() are valid until the arena is destroyed, | ||
// at which point everything is deallocated all at once. No destructors | ||
// are run. | ||
// | ||
// The arena starts by allocating a single 4 kB block, and then increases by | ||
// 50% every time it needs a new block. This gives O(1) calls to malloc. | ||
|
||
struct Arena { | ||
public: | ||
char* Alloc(size_t num_bytes) { | ||
if (static_cast<size_t>(cur_end_ - cur_ptr_) >= num_bytes) { | ||
char *ret = cur_ptr_; | ||
cur_ptr_ += num_bytes; | ||
return ret; | ||
} | ||
|
||
return AllocSlowPath(num_bytes); | ||
} | ||
|
||
/// Make a new StringPiece with the same contents, that will live | ||
/// for as long as the arena does. | ||
StringPiece PersistStringPiece(StringPiece s) { | ||
char *mem = Alloc(s.len_); | ||
memcpy(mem, s.str_, s.len_); | ||
return StringPiece(mem, s.len_); | ||
} | ||
|
||
void Clear() { | ||
if (blocks_.empty()) { | ||
return; | ||
} | ||
if (blocks_.size() > 1) { | ||
blocks_.erase(blocks_.begin(), blocks_.begin() + blocks_.size() - 1); | ||
} | ||
cur_ptr_ = blocks_.back().get(); | ||
} | ||
|
||
private: | ||
char* AllocSlowPath(size_t num_bytes); | ||
|
||
std::vector<std::unique_ptr<char[]>> blocks_; | ||
char* cur_ptr_ = nullptr; | ||
char* cur_end_ = nullptr; | ||
size_t next_size_ = 4096; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Copyright 2024 Google Inc. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "arena.h" | ||
|
||
#include "test.h" | ||
|
||
TEST(ArenaTest, SimpleAlloc) { | ||
Arena arena; | ||
|
||
char *a = arena.Alloc(1); | ||
memcpy(a, "a", 1); | ||
char *b = arena.Alloc(2); | ||
memcpy(b, "bc", 2); | ||
char *c = arena.Alloc(8); | ||
memcpy(c, "defghijk", 8); | ||
char *d = arena.Alloc(8); | ||
memcpy(d, "12345678", 8); | ||
|
||
EXPECT_EQ("a", StringPiece(a, 1).AsString()); | ||
EXPECT_EQ("bc", StringPiece(b, 2).AsString()); | ||
EXPECT_EQ("defghijk", StringPiece(c, 8).AsString()); | ||
EXPECT_EQ("12345678", StringPiece(d, 8).AsString()); | ||
} | ||
|
||
TEST(ArenaTest, LargeAlloc) { | ||
Arena arena; | ||
|
||
char *small = arena.Alloc(1); | ||
memcpy(small, "a", 1); | ||
char *large = arena.Alloc(1048576); | ||
memset(large, 0x55, 1048576); | ||
char *small2 = arena.Alloc(1); | ||
memcpy(small2, "b", 1); | ||
|
||
EXPECT_EQ("a", StringPiece(small, 1).AsString()); | ||
EXPECT_EQ("b", StringPiece(small2, 1).AsString()); | ||
|
||
for (int i = 0; i < 1048576; ++i) { | ||
EXPECT_EQ(0x55, large[i]); | ||
} | ||
} | ||
|
||
TEST(ArenaTest, Persist) { | ||
Arena arena; | ||
|
||
char *str = strdup("some string that will go away"); | ||
StringPiece persisted = arena.PersistStringPiece(str); | ||
memset(str, 0x55, strlen(str)); | ||
free(str); | ||
|
||
EXPECT_EQ("some string that will go away", persisted.AsString()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.