diff --git a/rpcsx-gpu/Cache.cpp b/rpcsx-gpu/Cache.cpp index aca8be9..b7b0be7 100644 --- a/rpcsx-gpu/Cache.cpp +++ b/rpcsx-gpu/Cache.cpp @@ -137,6 +137,22 @@ static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId, return hasInvalidations; } +static void markHostInvalidated(bridge::BridgeHeader *bridge, int vmId, + std::uint64_t address, std::uint64_t size) { + auto firstPage = address / bridge::kHostPageSize; + auto lastPage = + (address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize; + + for (auto page = firstPage; page < lastPage; ++page) { + std::uint8_t prevValue = 0; + + while (!bridge->cachePages[vmId][page].compare_exchange_weak( + prevValue, prevValue | bridge::kPageInvalidated, + std::memory_order::relaxed)) { + } + } +} + static bool isPrimRequiresConversion(gnm::PrimitiveType primType) { switch (primType) { case gnm::PrimitiveType::PointList: @@ -1212,16 +1228,11 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) { auto &table = mParent->getTable(EntryType::HostVisibleBuffer); auto it = table.queryArea(range.beginAddress()); - if (it == table.end() || it.endAddress() < range.endAddress()) { - for (auto it = table.lowerBound(range.beginAddress()); it != table.end(); - ++it) { - if (!range.intersects(it.range())) { - break; - } - - static_cast(it->get())->flush(*this, getScheduler(), - it.range()); - } + if (it == table.end() || !it.range().contains(range)) { + mParent->flushImages(*this, range); + mScheduler->submit(); + mScheduler->wait(); + mParent->flushBuffers(range); it = table.map(range.beginAddress(), range.endAddress(), nullptr, false, true); @@ -1531,13 +1542,13 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { break; } - img->flush(*this, getScheduler(), it.range()); + img->flush(*this, getScheduler(), img->addressRange); getScheduler().wait(); it.get() = nullptr; break; } - img->flush(*this, getScheduler(), it.range()); + img->flush(*this, getScheduler(), img->addressRange); } getScheduler().submit(); @@ -1686,21 +1697,21 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) { } void Cache::Tag::readMemory(void *target, rx::AddressRange range) { - // mParent->flush(*mScheduler, address, size); + mParent->flush(*this, range); auto memoryPtr = RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress()); std::memcpy(target, memoryPtr, range.size()); } void Cache::Tag::writeMemory(const void *source, rx::AddressRange range) { - // mParent->invalidate(*mScheduler, address, size); + mParent->flush(*this, range); auto memoryPtr = RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress()); std::memcpy(memoryPtr, source, range.size()); } int Cache::Tag::compareMemory(const void *source, rx::AddressRange range) { - // mParent->flush(*mScheduler, address, size); + mParent->flush(*this, range); auto memoryPtr = RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress()); return std::memcmp(memoryPtr, source, range.size()); @@ -2303,83 +2314,16 @@ void Cache::removeFrameBuffer(Scheduler &scheduler, int index) {} VkImage Cache::getFrameBuffer(Scheduler &scheduler, int index) { return {}; } -static void invalidateCacheImpl( - Scheduler &scheduler, - rx::MemoryTableWithPayload> &table, - std::uint64_t beginAddress, std::uint64_t endAddress) { - table.unmap(beginAddress, endAddress); -} - -void Cache::invalidate(Scheduler &scheduler, std::uint64_t address, - std::uint64_t size) { - auto beginAddress = address; - auto endAddress = address + size; - - rx::dieIf(beginAddress >= endAddress, - "wrong flush range: address %lx, size %lx", address, size); - - // invalidateCacheImpl(scheduler, mBuffers, beginAddress, endAddress); - // invalidateCacheImpl(scheduler, mImages, beginAddress, endAddress); - - // invalidateCacheImpl(scheduler, mSyncTable, beginAddress, endAddress); +void Cache::invalidate(Tag &tag, rx::AddressRange range) { + flush(tag, range); + markHostInvalidated(mDevice->bridge, mVmId, range.beginAddress(), + range.size()); } - -void Cache::flush(Scheduler &scheduler, std::uint64_t address, - std::uint64_t size) { - // auto beginAddress = address; - // auto endAddress = address + size; - - // rx::dieIf(beginAddress >= endAddress, - // "wrong flush range: address %lx, size %lx", address, size); - - // auto tag = createTag(scheduler); - // flushCacheImpl(scheduler, tag, mBuffers, beginAddress, endAddress); - // flushCacheImpl(scheduler, tag, mImages, beginAddress, endAddress); - - // flushCacheImpl(scheduler, tag, mSyncTable, beginAddress, endAddress); - // scheduler.submit(); - // scheduler.wait(); -} - -static void flushImageCache( - Scheduler &scheduler, Cache::Tag &tag, - rx::MemoryTableWithPayload> &table, - rx::AddressRange range) { - auto beginIt = table.lowerBound(range.beginAddress()); - - while (beginIt != table.end()) { - auto cached = beginIt->get(); - if (!cached->addressRange.intersects(range)) { - break; - } - - static_cast(cached)->flush(tag, scheduler, range); - ++beginIt; - } -} - -static rx::AddressRange flushHostVisibleBufferCache( - Cache::Tag &tag, - rx::MemoryTableWithPayload> &table, - rx::AddressRange range) { - auto beginIt = table.lowerBound(range.beginAddress()); - - rx::AddressRange result; - while (beginIt != table.end()) { - auto cached = beginIt->get(); - if (!cached->addressRange.intersects(range)) { - break; - } - auto address = RemoteMemory{tag.getVmId()}.getPointer( - cached->addressRange.beginAddress()); - static_cast(cached)->flush(address, - cached->addressRange); - - result = result.merge(cached->addressRange); - ++beginIt; - } - - return result; +void Cache::flush(Tag &tag, rx::AddressRange range) { + flushImages(tag, range); + tag.getScheduler().submit(); + tag.getScheduler().wait(); + flushBuffers(range); } void Cache::trackUpdate(EntryType type, rx::AddressRange range, @@ -2431,9 +2375,12 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) { auto range = rx::AddressRange::fromBeginSize(address, bridge::kHostPageSize); auto tag = mDevice->getCacheTag(vmId, sched); - flushImageCache(sched, tag, getTable(EntryType::Image), range); - auto flushedRange = flushHostVisibleBufferCache( - tag, getTable(EntryType::HostVisibleBuffer), range); + + flushImages(tag, range); + sched.submit(); + sched.wait(); + + auto flushedRange = flushBuffers(range); assert(flushedRange.isValid() && flushedRange.size() > 0); unlockReadWrite(mDevice->bridge, vmId, flushedRange.beginAddress(), @@ -2442,6 +2389,48 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) { }}; } +rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) { + auto &table = getTable(EntryType::Image); + rx::AddressRange result; + auto beginIt = table.lowerBound(range.beginAddress()); + + while (beginIt != table.end()) { + auto cached = beginIt->get(); + if (!cached->addressRange.intersects(range)) { + break; + } + + static_cast(cached)->flush(tag, tag.getScheduler(), range); + result = result.merge(cached->addressRange); + ++beginIt; + } + + return result; +} + +rx::AddressRange Cache::flushBuffers(rx::AddressRange range) { + auto &table = getTable(EntryType::HostVisibleBuffer); + auto beginIt = table.lowerBound(range.beginAddress()); + + rx::AddressRange result; + while (beginIt != table.end()) { + auto cached = beginIt->get(); + if (!cached->addressRange.intersects(range)) { + break; + } + + auto address = + RemoteMemory{mVmId}.getPointer(cached->addressRange.beginAddress()); + static_cast(cached)->flush(address, + cached->addressRange); + + result = result.merge(cached->addressRange); + ++beginIt; + } + + return result; +} + std::shared_ptr Cache::getInSyncEntry(EntryType type, rx::AddressRange range) { auto &table = getTable(type); diff --git a/rpcsx-gpu/Cache.hpp b/rpcsx-gpu/Cache.hpp index e5473d3..24fbb44 100644 --- a/rpcsx-gpu/Cache.hpp +++ b/rpcsx-gpu/Cache.hpp @@ -464,16 +464,17 @@ struct Cache { TileMode tileMode); void removeFrameBuffer(Scheduler &scheduler, int index); VkImage getFrameBuffer(Scheduler &scheduler, int index); - void invalidate(Scheduler &scheduler, std::uint64_t address, - std::uint64_t size); - void invalidate(Scheduler &scheduler) { - invalidate(scheduler, 0, ~static_cast(0)); + void flush(Tag &tag, rx::AddressRange range); + void flush(Scheduler &sched, rx::AddressRange range) { + auto tag = createTag(sched); + flush(tag, range); } - void flush(Scheduler &scheduler, std::uint64_t address, std::uint64_t size); - void flush(Scheduler &scheduler) { - flush(scheduler, 0, ~static_cast(0)); + void invalidate(Tag &tag, rx::AddressRange range); + void invalidate(Scheduler &sched, rx::AddressRange range) { + auto tag = createTag(sched); + invalidate(tag, range); } [[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const { @@ -501,6 +502,8 @@ struct Cache { } auto &getTable(EntryType type) { return mTables[static_cast(type)]; } + rx::AddressRange flushImages(Tag &tag, rx::AddressRange range); + rx::AddressRange flushBuffers(rx::AddressRange range); private: std::shared_ptr getInSyncEntry(EntryType type, rx::AddressRange range); diff --git a/rpcsx-gpu/Pipe.cpp b/rpcsx-gpu/Pipe.cpp index e9870e1..5a91d6f 100644 --- a/rpcsx-gpu/Pipe.cpp +++ b/rpcsx-gpu/Pipe.cpp @@ -863,13 +863,14 @@ bool GraphicsPipe::eventWriteEos(Queue &queue) { auto size = sizeof(std::uint32_t) * sizeDw; auto gds = cache.getGdsBuffer().getData(); - cache.invalidate(scheduler, address, size); + cache.invalidate(scheduler, rx::AddressRange::fromBeginSize(address, size)); std::memcpy(pointer, gds + gdsIndexDw * sizeof(std::uint32_t), size); break; } case 2: // after GDS writes confirm, store 32 bit DATA to memory as fence - cache.invalidate(scheduler, address, sizeof(std::uint32_t)); + cache.invalidate(scheduler, rx::AddressRange::fromBeginSize( + address, sizeof(std::uint32_t))); *pointer = dataInfo; break; @@ -940,7 +941,8 @@ bool GraphicsPipe::dmaData(Queue &queue) { auto dstAddress = dstAddressLo | (static_cast(dstAddressHi) << 32); dst = amdgpu::RemoteMemory{queue.vmId}.getPointer(dstAddress); - device->caches[queue.vmId].invalidate(scheduler, dstAddress, size); + device->caches[queue.vmId].invalidate( + scheduler, rx::AddressRange::fromBeginSize(dstAddress, size)); } else { dst = getMmRegister(dstAddressLo / sizeof(std::uint32_t)); } @@ -963,7 +965,8 @@ bool GraphicsPipe::dmaData(Queue &queue) { auto srcAddress = srcAddressLo | (static_cast(srcAddressHi) << 32); src = amdgpu::RemoteMemory{queue.vmId}.getPointer(srcAddress); - device->caches[queue.vmId].flush(scheduler, srcAddress, size); + device->caches[queue.vmId].flush( + scheduler, rx::AddressRange::fromBeginSize(srcAddress, size)); } else { src = getMmRegister(srcAddressLo / sizeof(std::uint32_t)); }