Skip to content

Commit

Permalink
Merge branch 'main' into metal
Browse files Browse the repository at this point in the history
  • Loading branch information
SamoZ256 authored Jan 17, 2025
2 parents 9934213 + 4ac6515 commit 800aae4
Show file tree
Hide file tree
Showing 34 changed files with 938 additions and 954 deletions.
5 changes: 4 additions & 1 deletion BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ At Step 3 in [Build Cemu using cmake and clang](#build-cemu-using-cmake-and-clan
`cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang-15 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-15 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja`

#### For Fedora and derivatives:
`sudo dnf install bluez-libs clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel zlib-devel zlib-static`
`sudo dnf install bluez-libs-devel clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel wayland-protocols-devel zlib-devel zlib-static`

### Build Cemu

Expand Down Expand Up @@ -120,6 +120,9 @@ This section refers to running `cmake -S...` (truncated).
* Compiling failed during rebuild after `git pull` with an error that mentions RPATH
* Add the following and try running the command again:
* `-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON`
* Environment variable `VCPKG_FORCE_SYSTEM_BINARIES` must be set.
* Execute the folowing and then try running the command again:
* `export VCPKG_FORCE_SYSTEM_BINARIES=1`
* If you are getting a random error, read the [package-name-and-platform]-out.log and [package-name-and-platform]-err.log for the actual reason to see if you might be lacking the headers from a dependency.


Expand Down
17 changes: 0 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,23 +134,6 @@ if (WIN32)
endif()
option(ENABLE_CUBEB "Enabled cubeb backend" ON)

# usb hid backends
if (WIN32)
option(ENABLE_NSYSHID_WINDOWS_HID "Enables the native Windows HID backend for nsyshid" ON)
endif ()
# libusb and windows hid backends shouldn't be active at the same time; otherwise we'd see all devices twice!
if (NOT ENABLE_NSYSHID_WINDOWS_HID)
option(ENABLE_NSYSHID_LIBUSB "Enables the libusb backend for nsyshid" ON)
else ()
set(ENABLE_NSYSHID_LIBUSB OFF CACHE BOOL "" FORCE)
endif ()
if (ENABLE_NSYSHID_WINDOWS_HID)
add_compile_definitions(NSYSHID_ENABLE_BACKEND_WINDOWS_HID)
endif ()
if (ENABLE_NSYSHID_LIBUSB)
add_compile_definitions(NSYSHID_ENABLE_BACKEND_LIBUSB)
endif ()

option(ENABLE_WXWIDGETS "Build with wxWidgets UI (Currently required)" ON)

set(THREADS_PREFER_PTHREAD_FLAG true)
Expand Down
21 changes: 10 additions & 11 deletions src/Cafe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,6 @@ add_library(CemuCafe
OS/libs/nsyshid/BackendEmulated.h
OS/libs/nsyshid/BackendLibusb.cpp
OS/libs/nsyshid/BackendLibusb.h
OS/libs/nsyshid/BackendWindowsHID.cpp
OS/libs/nsyshid/BackendWindowsHID.h
OS/libs/nsyshid/Dimensions.cpp
OS/libs/nsyshid/Dimensions.h
OS/libs/nsyshid/Infinity.cpp
Expand Down Expand Up @@ -631,15 +629,16 @@ if (ENABLE_WAYLAND)
target_link_libraries(CemuCafe PUBLIC Wayland::Client)
endif()

if (ENABLE_NSYSHID_LIBUSB)
if (ENABLE_VCPKG)
find_package(PkgConfig REQUIRED)
pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0)
target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb)
else ()
find_package(libusb MODULE REQUIRED)
target_link_libraries(CemuCafe PRIVATE libusb::libusb)
endif ()
if (ENABLE_VCPKG)
if(WIN32)
set(PKG_CONFIG_EXECUTABLE "${VCPKG_INSTALLED_DIR}/x64-windows/tools/pkgconf/pkgconf.exe")
endif()
find_package(PkgConfig REQUIRED)
pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0)
target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb)
else ()
find_package(libusb MODULE REQUIRED)
target_link_libraries(CemuCafe PRIVATE libusb::libusb)
endif ()

if (ENABLE_WXWIDGETS)
Expand Down
2 changes: 1 addition & 1 deletion src/Cafe/GameProfile/GameProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ bool gameProfile_loadEnumOption(IniParser& iniParser, const char* optionName, T&
}

// test enum name
if(boost::iequals(fmt::format("{}", v), *option_value))
if(boost::iequals(fmt::format("{}", fmt::underlying(v)), *option_value))
{
option = v;
return true;
Expand Down
15 changes: 15 additions & 0 deletions src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ class DrawPassContext

void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx);

// called whenever the GPU runs out of commands or hits a wait condition (semaphores, HLE waits)
void LatteCP_signalEnterWait()
{
// based on the assumption that games won't do a rugpull and swap out buffer data in the middle of an uninterrupted sequence of drawcalls,
// we only flush caches when the GPU goes idle or has to wait for any operation
LatteIndices_invalidateAll();
}

/*
* Read a U32 from the command buffer
* If no data is available then wait in a busy loop
Expand Down Expand Up @@ -466,6 +474,8 @@ LatteCMDPtr LatteCP_itWaitRegMem(LatteCMDPtr cmd, uint32 nWords)
const uint32 GPU7_WAIT_MEM_OP_GREATER = 6;
const uint32 GPU7_WAIT_MEM_OP_NEVER = 7;

LatteCP_signalEnterWait();

bool stalls = false;
if ((word0 & 0x10) != 0)
{
Expand Down Expand Up @@ -594,6 +604,7 @@ LatteCMDPtr LatteCP_itMemSemaphore(LatteCMDPtr cmd, uint32 nWords)
else if(SEM_SIGNAL == 7)
{
// wait
LatteCP_signalEnterWait();
size_t loopCount = 0;
while (true)
{
Expand Down Expand Up @@ -1305,11 +1316,13 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
}
case IT_HLE_TRIGGER_SCANBUFFER_SWAP:
{
LatteCP_signalEnterWait();
LatteCP_itHLESwapScanBuffer(cmdData, nWords);
break;
}
case IT_HLE_WAIT_FOR_FLIP:
{
LatteCP_signalEnterWait();
LatteCP_itHLEWaitForFlip(cmdData, nWords);
break;
}
Expand Down Expand Up @@ -1594,12 +1607,14 @@ void LatteCP_ProcessRingbuffer()
}
case IT_HLE_TRIGGER_SCANBUFFER_SWAP:
{
LatteCP_signalEnterWait();
LatteCP_itHLESwapScanBuffer(cmd, nWords);
timerRecheck += CP_TIMER_RECHECK / 64;
break;
}
case IT_HLE_WAIT_FOR_FLIP:
{
LatteCP_signalEnterWait();
LatteCP_itHLEWaitForFlip(cmd, nWords);
timerRecheck += CP_TIMER_RECHECK / 1;
break;
Expand Down
114 changes: 74 additions & 40 deletions src/Cafe/HW/Latte/Core/LatteIndices.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Common/cpu_features.h"

#if defined(ARCH_X86_64) && defined(__GNUC__)
Expand All @@ -9,32 +10,53 @@

struct
{
const void* lastPtr;
uint32 lastCount;
LattePrimitiveMode lastPrimitiveMode;
LatteIndexType lastIndexType;
// output
uint32 indexMin;
uint32 indexMax;
Renderer::INDEX_TYPE renderIndexType;
uint32 outputCount;
uint32 indexBufferOffset;
uint32 indexBufferIndex;
struct CacheEntry
{
// input data
const void* lastPtr;
uint32 lastCount;
LattePrimitiveMode lastPrimitiveMode;
LatteIndexType lastIndexType;
uint64 lastUsed;
// output
uint32 indexMin;
uint32 indexMax;
Renderer::INDEX_TYPE renderIndexType;
uint32 outputCount;
Renderer::IndexAllocation indexAllocation;
};
std::array<CacheEntry, 8> entry;
uint64 currentUsageCounter{0};
}LatteIndexCache{};

void LatteIndices_invalidate(const void* memPtr, uint32 size)
{
if (LatteIndexCache.lastPtr >= memPtr && (LatteIndexCache.lastPtr < ((uint8*)memPtr + size)) )
for(auto& entry : LatteIndexCache.entry)
{
LatteIndexCache.lastPtr = nullptr;
LatteIndexCache.lastCount = 0;
if (entry.lastPtr >= memPtr && (entry.lastPtr < ((uint8*)memPtr + size)) )
{
if(entry.lastPtr != nullptr)
g_renderer->indexData_releaseIndexMemory(entry.indexAllocation);
entry.lastPtr = nullptr;
entry.lastCount = 0;
}
}
}

void LatteIndices_invalidateAll()
{
LatteIndexCache.lastPtr = nullptr;
LatteIndexCache.lastCount = 0;
for(auto& entry : LatteIndexCache.entry)
{
if (entry.lastPtr != nullptr)
g_renderer->indexData_releaseIndexMemory(entry.indexAllocation);
entry.lastPtr = nullptr;
entry.lastCount = 0;
}
}

uint64 LatteIndices_GetNextUsageIndex()
{
return LatteIndexCache.currentUsageCounter++;
}

uint32 LatteIndices_calculateIndexOutputSize(LattePrimitiveMode primitiveMode, LatteIndexType indexType, uint32 count)
Expand Down Expand Up @@ -585,7 +607,7 @@ void LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, LatteIn
}
}

void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex)
void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation)
{
// what this should do:
// [x] use fast SIMD-based index decoding
Expand All @@ -595,17 +617,18 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
// [ ] better cache implementation, allow to cache across frames

// reuse from cache if data didn't change
if (LatteIndexCache.lastPtr == indexData &&
LatteIndexCache.lastCount == count &&
LatteIndexCache.lastPrimitiveMode == primitiveMode &&
LatteIndexCache.lastIndexType == indexType)
{
indexMin = LatteIndexCache.indexMin;
indexMax = LatteIndexCache.indexMax;
renderIndexType = LatteIndexCache.renderIndexType;
outputCount = LatteIndexCache.outputCount;
indexBufferOffset = LatteIndexCache.indexBufferOffset;
indexBufferIndex = LatteIndexCache.indexBufferIndex;
auto cacheEntry = std::find_if(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [indexData, count, primitiveMode, indexType](const auto& entry)
{
return entry.lastPtr == indexData && entry.lastCount == count && entry.lastPrimitiveMode == primitiveMode && entry.lastIndexType == indexType;
});
if (cacheEntry != LatteIndexCache.entry.end())
{
indexMin = cacheEntry->indexMin;
indexMax = cacheEntry->indexMax;
renderIndexType = cacheEntry->renderIndexType;
outputCount = cacheEntry->outputCount;
indexAllocation = cacheEntry->indexAllocation;
cacheEntry->lastUsed = LatteIndices_GetNextUsageIndex();
return;
}

Expand All @@ -629,10 +652,12 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
indexMin = 0;
indexMax = std::max(count, 1u)-1;
renderIndexType = Renderer::INDEX_TYPE::NONE;
indexAllocation = {};
return; // no indices
}
// query index buffer from renderer
void* indexOutputPtr = g_renderer->indexData_reserveIndexMemory(indexOutputSize, indexBufferOffset, indexBufferIndex);
indexAllocation = g_renderer->indexData_reserveIndexMemory(indexOutputSize);
void* indexOutputPtr = indexAllocation.mem;

// decode indices
indexMin = std::numeric_limits<uint32>::max();
Expand Down Expand Up @@ -780,16 +805,25 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
// recalculate index range but filter out primitive restart index
LatteIndices_alternativeCalculateIndexMinMax(indexData, indexType, count, indexMin, indexMax);
}
g_renderer->indexData_uploadIndexMemory(indexBufferIndex, indexBufferOffset, indexOutputSize);
g_renderer->indexData_uploadIndexMemory(indexAllocation);
performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += indexOutputSize;
// get least recently used cache entry
auto lruEntry = std::min_element(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [](const auto& a, const auto& b)
{
return a.lastUsed < b.lastUsed;
});
// invalidate previous allocation
if(lruEntry->lastPtr != nullptr)
g_renderer->indexData_releaseIndexMemory(lruEntry->indexAllocation);
// update cache
LatteIndexCache.lastPtr = indexData;
LatteIndexCache.lastCount = count;
LatteIndexCache.lastPrimitiveMode = primitiveMode;
LatteIndexCache.lastIndexType = indexType;
LatteIndexCache.indexMin = indexMin;
LatteIndexCache.indexMax = indexMax;
LatteIndexCache.renderIndexType = renderIndexType;
LatteIndexCache.outputCount = outputCount;
LatteIndexCache.indexBufferOffset = indexBufferOffset;
LatteIndexCache.indexBufferIndex = indexBufferIndex;
lruEntry->lastPtr = indexData;
lruEntry->lastCount = count;
lruEntry->lastPrimitiveMode = primitiveMode;
lruEntry->lastIndexType = indexType;
lruEntry->indexMin = indexMin;
lruEntry->indexMax = indexMax;
lruEntry->renderIndexType = renderIndexType;
lruEntry->outputCount = outputCount;
lruEntry->indexAllocation = indexAllocation;
lruEntry->lastUsed = LatteIndices_GetNextUsageIndex();
}
2 changes: 1 addition & 1 deletion src/Cafe/HW/Latte/Core/LatteIndices.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

void LatteIndices_invalidate(const void* memPtr, uint32 size);
void LatteIndices_invalidateAll();
void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex);
void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation);
6 changes: 6 additions & 0 deletions src/Cafe/HW/Latte/Core/LatteOverlay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,13 @@ void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 directio
ImGui::Text("VRAM: %dMB / %dMB", g_state.vramUsage, g_state.vramTotal);

if (config.overlay.debug)
{
// general debug info
ImGui::Text("--- Debug info ---");
ImGui::Text("IndexUploadPerFrame: %dKB", (performanceMonitor.stats.indexDataUploadPerFrame+1023)/1024);
// backend specific info
g_renderer->AppendOverlayDebugInfo();
}

position.y += (ImGui::GetWindowSize().y + 10.0f) * direction;
}
Expand Down
3 changes: 1 addition & 2 deletions src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,14 @@ void LattePerformanceMonitor_frameEnd()
uniformBankDataUploadedPerFrame /= 1024ULL;
uint32 uniformBankCountUploadedPerFrame = (uint32)(uniformBankUploadedCount / (uint64)elapsedFrames);
uint64 indexDataUploadPerFrame = (indexDataUploaded / (uint64)elapsedFrames);
indexDataUploadPerFrame /= 1024ULL;

double fps = (double)elapsedFrames2S * 1000.0 / (double)totalElapsedTimeFPS;
uint32 shaderBindsPerFrame = shaderBindCounter / elapsedFrames;
passedCycles = passedCycles * 1000ULL / totalElapsedTime;
uint32 rlps = (uint32)((uint64)recompilerLeaveCount * 1000ULL / (uint64)totalElapsedTime);
uint32 tlps = (uint32)((uint64)threadLeaveCount * 1000ULL / (uint64)totalElapsedTime);
// set stats

performanceMonitor.stats.indexDataUploadPerFrame = indexDataUploadPerFrame;
// next counter cycle
sint32 nextCycleIndex = (performanceMonitor.cycleIndex + 1) % PERFORMANCE_MONITOR_TRACK_CYCLES;
performanceMonitor.cycle[nextCycleIndex].drawCallCounter = 0;
Expand Down
6 changes: 6 additions & 0 deletions src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ typedef struct
LattePerfStatCounter numDrawBarriersPerFrame;
LattePerfStatCounter numBeginRenderpassPerFrame;
}vk;

// calculated stats (per frame)
struct
{
uint32 indexDataUploadPerFrame;
}stats;
}performanceMonitor_t;

extern performanceMonitor_t performanceMonitor;
Expand Down
1 change: 0 additions & 1 deletion src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Cafe/GraphicPack/GraphicPack2.h"
#include "config/ActiveSettings.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#include "gui/guiWrapper.h"
#include "Cafe/OS/libs/erreula/erreula.h"
#include "input/InputManager.h"
Expand Down
Loading

0 comments on commit 800aae4

Please sign in to comment.