diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp index 44d11a3b7..05d169b30 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp @@ -9,7 +9,7 @@ MetalBufferChunkedHeap::~MetalBufferChunkedHeap() uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) { size_t allocationSize = std::max(m_minimumBufferAllocationSize, minimumAllocationSize); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, MTL::ResourceStorageModeShared); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options); cemu_assert_debug(buffer); cemu_assert_debug(m_chunkBuffers.size() == chunkIndex); m_chunkBuffers.emplace_back(buffer); @@ -36,7 +36,7 @@ void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeR AllocatorBuffer_t newBuffer{}; newBuffer.writeIndex = 0; newBuffer.basePtr = nullptr; - newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, MTL::ResourceStorageModeShared); + newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options); newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents(); newBuffer.size = bufferAllocSize; newBuffer.index = (uint32)m_buffers.size(); @@ -105,16 +105,10 @@ MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllo void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation) { - /* - cemu_assert_debug(m_bufferType == VKR_BUFFER_TYPE::STAGING); // only the staging buffer isn't coherent - // todo - use nonCoherentAtomSize for flush size (instead of hardcoded constant) - VkMappedMemoryRange flushedRange{}; - flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flushedRange.memory = uploadReservation.vkMem; - flushedRange.offset = uploadReservation.bufferOffset; - flushedRange.size = uploadReservation.size; - vkFlushMappedMemoryRanges(m_vkr->GetLogicalDevice(), 1, &flushedRange); - */ + if (RequiresFlush()) + { + uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size)); + } } void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) @@ -172,9 +166,6 @@ void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalB /* MetalSynchronizedHeapAllocator */ -MetalSynchronizedHeapAllocator::MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize) - : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, minimumBufferAllocSize) {}; - MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) { CHAddr addr = m_chunkedHeap.alloc(size, alignment); @@ -202,17 +193,10 @@ void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploa void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation) { - /* - if (m_chunkedHeap.RequiresFlush(uploadReservation->bufferIndex)) + if (m_chunkedHeap.RequiresFlush()) { - VkMappedMemoryRange flushedRange{}; - flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flushedRange.memory = uploadReservation->vkMem; - flushedRange.offset = uploadReservation->bufferOffset; - flushedRange.size = uploadReservation->size; - vkFlushMappedMemoryRanges(VulkanRenderer::GetInstance()->GetLogicalDevice(), 1, &flushedRange); + uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size)); } - */ } void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 1db06527b..2a62de19c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,15 +1,24 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Metal/MTLResource.hpp" #include "util/ChunkedHeap/ChunkedHeap.h" #include "util/helpers/MemoryPool.h" #include +inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options) +{ + if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged) + options |= MTL::ResourceCPUCacheModeWriteCombined; + + return options; +} + class MetalBufferChunkedHeap : private ChunkedHeap<> { public: - MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; + MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; ~MetalBufferChunkedHeap(); using ChunkedHeap::alloc; @@ -30,6 +39,11 @@ class MetalBufferChunkedHeap : private ChunkedHeap<> return m_chunkBuffers[index]; } + bool RequiresFlush() const + { + return m_options & MTL::ResourceStorageModeManaged; + } + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const { numBuffers = m_chunkBuffers.size(); @@ -42,15 +56,17 @@ class MetalBufferChunkedHeap : private ChunkedHeap<> const class MetalRenderer* m_mtlr; - std::vector m_chunkBuffers; + MTL::ResourceOptions m_options; size_t m_minimumBufferAllocationSize; + + std::vector m_chunkBuffers; }; // a circular ring-buffer which tracks and releases memory per command-buffer class MetalSynchronizedRingAllocator { public: - MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; + MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy struct BufferSyncPoint_t @@ -88,6 +104,11 @@ class MetalSynchronizedRingAllocator void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); MTL::Buffer* GetBufferByIndex(uint32 index) const; + bool RequiresFlush() const + { + return m_options & MTL::ResourceStorageModeManaged; + } + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; private: @@ -95,6 +116,8 @@ class MetalSynchronizedRingAllocator void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset); const class MetalRenderer* m_mtlr; + + MTL::ResourceOptions m_options; const uint32 m_minimumBufferAllocSize; std::vector m_buffers; @@ -110,7 +133,7 @@ class MetalSynchronizedHeapAllocator }; public: - MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize); + MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {} MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy struct AllocatorReservation diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 45a06139e..7b1dd53fc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -77,6 +77,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1); memcpy(allocation.memPtr, data, size); + m_stagingAllocator.FlushReservation(allocation); blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 4f0403374..4e55fa6f5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -7,7 +7,7 @@ class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 32u * 1024 * 1024), m_indexAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 4u * 1024 * 1024) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {} ~MetalMemoryManager(); MetalSynchronizedRingAllocator& GetStagingAllocator() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 45bc967cb..61e5c94a7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -702,6 +702,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s // Allocate a temporary buffer auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1); + bufferAllocator.FlushReservation(allocation); // Copy the data to the temporary buffer memcpy(allocation.memPtr, pixelData, compressedImageSize); @@ -2092,12 +2093,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE size_t size = shader->uniform.uniformRangeSize; auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); - auto supportBuffer = bufferAllocator.AllocateBufferMemory(size, 1); - memcpy(supportBuffer.memPtr, supportBufferData, size); - //if (!HasUnifiedMemory()) - // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + auto allocation = bufferAllocator.AllocateBufferMemory(size, 1); + memcpy(allocation.memPtr, supportBufferData, size); + bufferAllocator.FlushReservation(allocation); - SetBuffer(renderCommandEncoder, mtlShaderType, supportBuffer.mtlBuffer, supportBuffer.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); + SetBuffer(renderCommandEncoder, mtlShaderType, allocation.mtlBuffer, allocation.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers