Skip to content

Commit

Permalink
Fix release mode OOM crash
Browse files Browse the repository at this point in the history
- enable LAA in release mode, not just debug
- optimize memory usage so a big world (GRM) can be loaded in release mode without LAA:
  - remove big vector copy during loadZen return
  - split batches with very high vertCount along chunk boundaries
  - new constant vertCountPerBatch tries to maintain max vert count per batch (can be higher if a single chunk's vertCount goes over vertCountPerBatch)
  • Loading branch information
Katharsas committed Sep 15, 2024
1 parent 214b779 commit f73b104
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 22 deletions.
1 change: 1 addition & 0 deletions ZenRen/ZenRen.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>ntdll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Windows</SubSystem>
<LargeAddressAware>true</LargeAddressAware>
</Link>
<Manifest>
<EnableDpiAwareness>true</EnableDpiAwareness>
Expand Down
24 changes: 9 additions & 15 deletions ZenRen/src/assets/ZenLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ namespace assets
return statics;
}

RenderData loadZen(string& zenFilename, VDFS::FileIndex* vdf)
void loadZen(render::RenderData& out, string& zenFilename, VDFS::FileIndex* vdf)
{
const auto now = std::chrono::high_resolution_clock::now();

Expand All @@ -399,8 +399,7 @@ namespace assets

vector<InMemoryTexFile> lightmaps = loadZenLightmaps(worldMesh);

VERT_CHUNKS_BY_MAT worldMeshData;
loadWorldMesh(worldMeshData, parser.getWorldMesh());
loadWorldMesh(out.worldMesh, parser.getWorldMesh());

LOG(INFO) << "Zen parsed!";

Expand All @@ -410,7 +409,7 @@ namespace assets
bool isOutdoorLevel = world.bspTree.mode == ZenLoad::zCBspTreeData::TreeMode::Outdoor;
vector<StaticInstance> vobs;
if (loadStaticMeshes) {
vobs = loadVobs(world.rootVobs, worldMeshData, lightsStatic, isOutdoorLevel);
vobs = loadVobs(world.rootVobs, out.worldMesh, lightsStatic, isOutdoorLevel);
LOG(INFO) << "VOBs loaded!";
}
else {
Expand All @@ -421,34 +420,29 @@ namespace assets
for (auto& vob : vobs) {
auto& visualname = vob.meshName;

bool loaded = loadInstanceMesh(staticMeshData, *vdf, vob);
bool loaded = loadInstanceMesh(out.staticMeshes, *vdf, vob);
}

if (debugStaticLights) {
for (auto& light : lightsStatic) {
float scale = light.range / 10.f;
loadPointDebugVisual(staticMeshData, light.pos, { scale, scale, scale });
loadPointDebugVisual(out.staticMeshes, light.pos, { scale, scale, scale });
}
}
if (debugStaticLightRays) {
for (auto& ray : debugLightToVobRays) {
loadLineDebugVisual(staticMeshData, ray.posStart, ray.posEnd, ray.color);
loadLineDebugVisual(out.staticMeshes, ray.posStart, ray.posEnd, ray.color);
}
}

VERTEX_DATA_BY_MAT dynamicMeshData;
//VERTEX_DATA_BY_MAT dynamicMeshData;

LOG(INFO) << "Meshes loaded!";

const auto duration = std::chrono::high_resolution_clock::now() - now;
LOG(INFO) << "Loading finished in: " << duration / std::chrono::milliseconds(1) << " ms.";

return RenderData {
.isOutdoorLevel = isOutdoorLevel,
.worldMesh = worldMeshData,
.staticMeshes = staticMeshData,
.dynamicMeshes = dynamicMeshData,
.worldMeshLightmaps = lightmaps
};
out.isOutdoorLevel = isOutdoorLevel;
out.worldMeshLightmaps = lightmaps;
}
}
2 changes: 1 addition & 1 deletion ZenRen/src/assets/ZenLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@

namespace assets
{
render::RenderData loadZen(std::string& zenFilename, ZenLib::VDFS::FileIndex* vdf);
void loadZen(render::RenderData& out, std::string& zenFilename, ZenLib::VDFS::FileIndex* vdf);
}

2 changes: 1 addition & 1 deletion ZenRen/src/render/Renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace render
};

struct RenderData {
bool isOutdoorLevel;
bool isOutdoorLevel = false;
VERT_CHUNKS_BY_MAT worldMesh;
VERT_CHUNKS_BY_MAT staticMeshes;
VERTEX_DATA_BY_MAT dynamicMeshes;
Expand Down
52 changes: 47 additions & 5 deletions ZenRen/src/render/pass/PassWorldLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ namespace render::pass::world
};

const TEX_INDEX texturesPerBatch = 512;
const uint32_t vertCountPerBatch = (20 * 1024 * 1024) / sizeof(VERTEX_OTHER);// 20 MB divided by biggest buffer element size

World world;

Expand Down Expand Up @@ -164,7 +165,7 @@ namespace render::pass::world
target.push_back(batch);
}

vector<pair<TexInfo, vector<pair<Material, const VERT_CHUNKS*>>>> groupByTexId(D3d d3d, const VERT_CHUNKS_BY_MAT& meshData, TEX_INDEX maxTexturesPerBatch)
vector<pair<TexInfo, vector<pair<Material, const VERT_CHUNKS *>>>> groupByTexId(D3d d3d, const VERT_CHUNKS_BY_MAT& meshData, TEX_INDEX maxTexturesPerBatch)
{
// load and bucket all materials so textures that are texture-array-compatible are grouped in a single bucket
unordered_map<TexInfo, vector<Material>> texBuckets;
Expand Down Expand Up @@ -245,6 +246,38 @@ namespace render::pass::world
return result;
}

vector<pair<uint32_t, vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>>>> splitByVertCount(
const vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>>& batchData, uint32_t maxVertCount)
{
vector<pair<uint32_t, vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>>>> result;

vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>> currentBatch;
uint32_t currentBatchVertCount = 0;

for (const auto& [chunkIndex, vertDataByMat] : batchData) {

uint32_t chunkVertCount = 0;
for (const auto& [material, vertData] : vertDataByMat) {
chunkVertCount += vertData.vecPos.size();
}

// we never split a single chunk, so if the first chunk of a batch has more than maxVertCount verts we accept that
if (currentBatchVertCount != 0 && (currentBatchVertCount + chunkVertCount) > maxVertCount) {
result.push_back({ currentBatchVertCount, currentBatch });
currentBatch.clear();
currentBatchVertCount = 0;
}
currentBatch.push_back({ chunkIndex, vertDataByMat });
currentBatchVertCount += chunkVertCount;
}

if (currentBatchVertCount != 0) {
result.push_back({ currentBatchVertCount, currentBatch });
}

return result;
}

pair<VEC_VERTEX_DATA_BATCH, LoadResult> flattenIntoBatch(const vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>>& batchData)
{
LoadResult result;
Expand Down Expand Up @@ -302,11 +335,20 @@ namespace render::pass::world
for (const auto& [texInfo, batchData] : batchedMeshData) {

vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>> batchDataByChunk = groupAndSortByChunkIndex(batchData);

// split current batch into multiple smaller batches along chunk boundaries if it contains too many verts to prevent OOM crashes
vector<pair<uint32_t, vector<pair<ChunkIndex, vector<pair<Material, VEC_VERTEX_DATA>>>>>> batchDataSplit =
splitByVertCount(batchDataByChunk, vertCountPerBatch);

const auto [batchDataFlat, batchLoadResult] = flattenIntoBatch(batchDataByChunk);
result += batchLoadResult;
batchDataByChunk.clear();// lots of memory that are no longer needed

loadRenderBatch(d3d, target, texInfo, batchDataFlat);
for (const auto& [vertCount, batchData] : batchDataSplit) {
const auto [batchDataFlat, batchLoadResult] = flattenIntoBatch(batchData);

assert(vertCount == batchLoadResult.verts);
result += batchLoadResult;
loadRenderBatch(d3d, target, texInfo, batchDataFlat);
}
}

return result;
Expand Down Expand Up @@ -400,7 +442,7 @@ namespace render::pass::world
}
else if (optionalVdfIndex.has_value()) {
if (::util::endsWith(level, ".zen")) {
data = assets::loadZen(level, optionalVdfIndex.value());
assets::loadZen(data, level, optionalVdfIndex.value());
levelDataFound = true;
}
else {
Expand Down

0 comments on commit f73b104

Please sign in to comment.