Skip to content

Commit

Permalink
rpcsx-gpu: cache properly flush cache on memory access
Browse files Browse the repository at this point in the history
  • Loading branch information
DHrpcs3 committed Oct 9, 2024
1 parent 88fde9c commit 8e9711e
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 105 deletions.
177 changes: 83 additions & 94 deletions rpcsx-gpu/Cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,22 @@ static bool handleHostInvalidations(bridge::BridgeHeader *bridge, int vmId,
return hasInvalidations;
}

static void markHostInvalidated(bridge::BridgeHeader *bridge, int vmId,
std::uint64_t address, std::uint64_t size) {
auto firstPage = address / bridge::kHostPageSize;
auto lastPage =
(address + size + bridge::kHostPageSize - 1) / bridge::kHostPageSize;

for (auto page = firstPage; page < lastPage; ++page) {
std::uint8_t prevValue = 0;

while (!bridge->cachePages[vmId][page].compare_exchange_weak(
prevValue, prevValue | bridge::kPageInvalidated,
std::memory_order::relaxed)) {
}
}
}

static bool isPrimRequiresConversion(gnm::PrimitiveType primType) {
switch (primType) {
case gnm::PrimitiveType::PointList:
Expand Down Expand Up @@ -1212,16 +1228,11 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) {
auto &table = mParent->getTable(EntryType::HostVisibleBuffer);
auto it = table.queryArea(range.beginAddress());

if (it == table.end() || it.endAddress() < range.endAddress()) {
for (auto it = table.lowerBound(range.beginAddress()); it != table.end();
++it) {
if (!range.intersects(it.range())) {
break;
}

static_cast<CachedImage *>(it->get())->flush(*this, getScheduler(),
it.range());
}
if (it == table.end() || !it.range().contains(range)) {
mParent->flushImages(*this, range);
mScheduler->submit();
mScheduler->wait();
mParent->flushBuffers(range);

it = table.map(range.beginAddress(), range.endAddress(), nullptr, false,
true);
Expand Down Expand Up @@ -1531,13 +1542,13 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
break;
}

img->flush(*this, getScheduler(), it.range());
img->flush(*this, getScheduler(), img->addressRange);
getScheduler().wait();
it.get() = nullptr;
break;
}

img->flush(*this, getScheduler(), it.range());
img->flush(*this, getScheduler(), img->addressRange);
}

getScheduler().submit();
Expand Down Expand Up @@ -1686,21 +1697,21 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) {
}

void Cache::Tag::readMemory(void *target, rx::AddressRange range) {
// mParent->flush(*mScheduler, address, size);
mParent->flush(*this, range);
auto memoryPtr =
RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress());
std::memcpy(target, memoryPtr, range.size());
}

void Cache::Tag::writeMemory(const void *source, rx::AddressRange range) {
// mParent->invalidate(*mScheduler, address, size);
mParent->flush(*this, range);
auto memoryPtr =
RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress());
std::memcpy(memoryPtr, source, range.size());
}

int Cache::Tag::compareMemory(const void *source, rx::AddressRange range) {
// mParent->flush(*mScheduler, address, size);
mParent->flush(*this, range);
auto memoryPtr =
RemoteMemory{mParent->mVmId}.getPointer(range.beginAddress());
return std::memcmp(memoryPtr, source, range.size());
Expand Down Expand Up @@ -2303,83 +2314,16 @@ void Cache::removeFrameBuffer(Scheduler &scheduler, int index) {}

VkImage Cache::getFrameBuffer(Scheduler &scheduler, int index) { return {}; }

static void invalidateCacheImpl(
Scheduler &scheduler,
rx::MemoryTableWithPayload<std::shared_ptr<Cache::Entry>> &table,
std::uint64_t beginAddress, std::uint64_t endAddress) {
table.unmap(beginAddress, endAddress);
}

void Cache::invalidate(Scheduler &scheduler, std::uint64_t address,
std::uint64_t size) {
auto beginAddress = address;
auto endAddress = address + size;

rx::dieIf(beginAddress >= endAddress,
"wrong flush range: address %lx, size %lx", address, size);

// invalidateCacheImpl(scheduler, mBuffers, beginAddress, endAddress);
// invalidateCacheImpl(scheduler, mImages, beginAddress, endAddress);

// invalidateCacheImpl(scheduler, mSyncTable, beginAddress, endAddress);
void Cache::invalidate(Tag &tag, rx::AddressRange range) {
flush(tag, range);
markHostInvalidated(mDevice->bridge, mVmId, range.beginAddress(),
range.size());
}

void Cache::flush(Scheduler &scheduler, std::uint64_t address,
std::uint64_t size) {
// auto beginAddress = address;
// auto endAddress = address + size;

// rx::dieIf(beginAddress >= endAddress,
// "wrong flush range: address %lx, size %lx", address, size);

// auto tag = createTag(scheduler);
// flushCacheImpl(scheduler, tag, mBuffers, beginAddress, endAddress);
// flushCacheImpl(scheduler, tag, mImages, beginAddress, endAddress);

// flushCacheImpl(scheduler, tag, mSyncTable, beginAddress, endAddress);
// scheduler.submit();
// scheduler.wait();
}

static void flushImageCache(
Scheduler &scheduler, Cache::Tag &tag,
rx::MemoryTableWithPayload<std::shared_ptr<Cache::Entry>> &table,
rx::AddressRange range) {
auto beginIt = table.lowerBound(range.beginAddress());

while (beginIt != table.end()) {
auto cached = beginIt->get();
if (!cached->addressRange.intersects(range)) {
break;
}

static_cast<CachedImage *>(cached)->flush(tag, scheduler, range);
++beginIt;
}
}

static rx::AddressRange flushHostVisibleBufferCache(
Cache::Tag &tag,
rx::MemoryTableWithPayload<std::shared_ptr<Cache::Entry>> &table,
rx::AddressRange range) {
auto beginIt = table.lowerBound(range.beginAddress());

rx::AddressRange result;
while (beginIt != table.end()) {
auto cached = beginIt->get();
if (!cached->addressRange.intersects(range)) {
break;
}
auto address = RemoteMemory{tag.getVmId()}.getPointer(
cached->addressRange.beginAddress());
static_cast<CachedHostVisibleBuffer *>(cached)->flush(address,
cached->addressRange);

result = result.merge(cached->addressRange);
++beginIt;
}

return result;
void Cache::flush(Tag &tag, rx::AddressRange range) {
flushImages(tag, range);
tag.getScheduler().submit();
tag.getScheduler().wait();
flushBuffers(range);
}

void Cache::trackUpdate(EntryType type, rx::AddressRange range,
Expand Down Expand Up @@ -2431,9 +2375,12 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) {
auto range =
rx::AddressRange::fromBeginSize(address, bridge::kHostPageSize);
auto tag = mDevice->getCacheTag(vmId, sched);
flushImageCache(sched, tag, getTable(EntryType::Image), range);
auto flushedRange = flushHostVisibleBufferCache(
tag, getTable(EntryType::HostVisibleBuffer), range);

flushImages(tag, range);
sched.submit();
sched.wait();

auto flushedRange = flushBuffers(range);

assert(flushedRange.isValid() && flushedRange.size() > 0);
unlockReadWrite(mDevice->bridge, vmId, flushedRange.beginAddress(),
Expand All @@ -2442,6 +2389,48 @@ void Cache::trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory) {
}};
}

rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) {
auto &table = getTable(EntryType::Image);
rx::AddressRange result;
auto beginIt = table.lowerBound(range.beginAddress());

while (beginIt != table.end()) {
auto cached = beginIt->get();
if (!cached->addressRange.intersects(range)) {
break;
}

static_cast<CachedImage *>(cached)->flush(tag, tag.getScheduler(), range);
result = result.merge(cached->addressRange);
++beginIt;
}

return result;
}

rx::AddressRange Cache::flushBuffers(rx::AddressRange range) {
auto &table = getTable(EntryType::HostVisibleBuffer);
auto beginIt = table.lowerBound(range.beginAddress());

rx::AddressRange result;
while (beginIt != table.end()) {
auto cached = beginIt->get();
if (!cached->addressRange.intersects(range)) {
break;
}

auto address =
RemoteMemory{mVmId}.getPointer(cached->addressRange.beginAddress());
static_cast<CachedHostVisibleBuffer *>(cached)->flush(address,
cached->addressRange);

result = result.merge(cached->addressRange);
++beginIt;
}

return result;
}

std::shared_ptr<Cache::Entry> Cache::getInSyncEntry(EntryType type,
rx::AddressRange range) {
auto &table = getTable(type);
Expand Down
17 changes: 10 additions & 7 deletions rpcsx-gpu/Cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,16 +464,17 @@ struct Cache {
TileMode tileMode);
void removeFrameBuffer(Scheduler &scheduler, int index);
VkImage getFrameBuffer(Scheduler &scheduler, int index);
void invalidate(Scheduler &scheduler, std::uint64_t address,
std::uint64_t size);

void invalidate(Scheduler &scheduler) {
invalidate(scheduler, 0, ~static_cast<std::uint64_t>(0));
void flush(Tag &tag, rx::AddressRange range);
void flush(Scheduler &sched, rx::AddressRange range) {
auto tag = createTag(sched);
flush(tag, range);
}

void flush(Scheduler &scheduler, std::uint64_t address, std::uint64_t size);
void flush(Scheduler &scheduler) {
flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
void invalidate(Tag &tag, rx::AddressRange range);
void invalidate(Scheduler &sched, rx::AddressRange range) {
auto tag = createTag(sched);
invalidate(tag, range);
}

[[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const {
Expand Down Expand Up @@ -501,6 +502,8 @@ struct Cache {
}

auto &getTable(EntryType type) { return mTables[static_cast<int>(type)]; }
rx::AddressRange flushImages(Tag &tag, rx::AddressRange range);
rx::AddressRange flushBuffers(rx::AddressRange range);

private:
std::shared_ptr<Entry> getInSyncEntry(EntryType type, rx::AddressRange range);
Expand Down
11 changes: 7 additions & 4 deletions rpcsx-gpu/Pipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -863,13 +863,14 @@ bool GraphicsPipe::eventWriteEos(Queue &queue) {
auto size = sizeof(std::uint32_t) * sizeDw;

auto gds = cache.getGdsBuffer().getData();
cache.invalidate(scheduler, address, size);
cache.invalidate(scheduler, rx::AddressRange::fromBeginSize(address, size));
std::memcpy(pointer, gds + gdsIndexDw * sizeof(std::uint32_t), size);
break;
}

case 2: // after GDS writes confirm, store 32 bit DATA to memory as fence
cache.invalidate(scheduler, address, sizeof(std::uint32_t));
cache.invalidate(scheduler, rx::AddressRange::fromBeginSize(
address, sizeof(std::uint32_t)));
*pointer = dataInfo;
break;

Expand Down Expand Up @@ -940,7 +941,8 @@ bool GraphicsPipe::dmaData(Queue &queue) {
auto dstAddress =
dstAddressLo | (static_cast<std::uint64_t>(dstAddressHi) << 32);
dst = amdgpu::RemoteMemory{queue.vmId}.getPointer(dstAddress);
device->caches[queue.vmId].invalidate(scheduler, dstAddress, size);
device->caches[queue.vmId].invalidate(
scheduler, rx::AddressRange::fromBeginSize(dstAddress, size));
} else {
dst = getMmRegister(dstAddressLo / sizeof(std::uint32_t));
}
Expand All @@ -963,7 +965,8 @@ bool GraphicsPipe::dmaData(Queue &queue) {
auto srcAddress =
srcAddressLo | (static_cast<std::uint64_t>(srcAddressHi) << 32);
src = amdgpu::RemoteMemory{queue.vmId}.getPointer(srcAddress);
device->caches[queue.vmId].flush(scheduler, srcAddress, size);
device->caches[queue.vmId].flush(
scheduler, rx::AddressRange::fromBeginSize(srcAddress, size));
} else {
src = getMmRegister(srcAddressLo / sizeof(std::uint32_t));
}
Expand Down

0 comments on commit 8e9711e

Please sign in to comment.