From 9d5d6527ad126692e1e9837b7745eb4c4a49b3ad Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Sun, 25 Jul 2021 00:36:21 -0700 Subject: [PATCH] fix(swingset): disable metering around GC-sensitive operations When a swingset kernel is part of a consensus machine, the visible state must be a deterministic function of userspace activity. Every member kernel must perform the same set of operations. However we are not yet confident that the timing of garbage collection will remain identical between kernels that experience different patterns of snapshot+restart. In particular, up until recently, the amount of "headroom" in the XS memory allocator was reset upon snapshot reload: the new XS engine only allocates as much RAM as the snapshot needs, whereas before the snapshot was taken, the RAM footprint could have been larger (e.g. if a large number of objects we allocated and then released), leading to more "headroom". Automatic GC is triggered by an attempt to allocate space which cannot be satisfied by this headroom, so it will happen more frequently in the post-reload engine than before the snapshot. To accommodate differences in GC timing between kernels that are otherwise operating in consensus, this commit introduces the "unmetered box": a span of execution that does not count against the meter. We take all of the liveslots operations that might be sensitive to the engine's GC behavior and put them "in" the box. This includes any code that calls `deref()` on the WeakRefs used in `slotToVal`, because the WeakRef can change state from "live" to "dead" when GC is provoked, which is sensitive to more than just userspace behavior. closes #3458 --- packages/SwingSet/docs/liveslots.md | 12 ++- packages/SwingSet/src/kernel/kernel.js | 2 + packages/SwingSet/src/kernel/liveSlots.js | 82 ++++++++++++------- .../vatManager/supervisor-nodeworker.js | 2 + .../vatManager/supervisor-subprocess-node.js | 2 + .../vatManager/supervisor-subprocess-xsnap.js | 22 +++++ packages/SwingSet/test/liveslots-helpers.js | 2 + packages/SwingSet/test/test-liveslots.js | 2 + packages/SwingSet/test/test-marshal.js | 7 +- 9 files changed, 102 insertions(+), 31 deletions(-) diff --git a/packages/SwingSet/docs/liveslots.md b/packages/SwingSet/docs/liveslots.md index d79342c139e..77199a40a38 100644 --- a/packages/SwingSet/docs/liveslots.md +++ b/packages/SwingSet/docs/liveslots.md @@ -69,7 +69,7 @@ Some useful things cannot be serialized: they will trigger an error. Uncertain: * Maps: This might actually serialize as pass-by-presence, since it has no non-function properties (in fact it has no own properties at all, they all live on `Map.prototype`, whose properties are all functions). The receiving side gets a Presence, not a Map, but invoking e.g. `E(p).get(123)` will return a promise that will be fulfilled with the results of `m.get(123)` on the sending side. -* WeakMaps: same, except the values being passed into `get()` would be coming from the deserializer, and so they might not be that useful (especially since Liveslots does not implement distributed GC yet). +* WeakMaps: same, except the values being passed into `get()` would be coming from the deserializer, and so they might not be that useful. ## How things get serialized @@ -77,3 +77,13 @@ Uncertain: * local Promises: passed as a promise * promise returned by `E(p).foo()`: passes as a promise, with pipelining enabled * Function: rejected (todo: wrap) + +## Garbage Collection vs Metering + +When a swingset kernel is part of a consensus machine, the visible state must be a deterministic function of userspace activity. Every member kernel must perform the same set of operations. + +However we are not yet confident that the timing of garbage collection will remain identical between kernels that experience different patterns of snapshot+restart. In particular, up until recently, the amount of "headroom" in the XS memory allocator was reset upon snapshot reload: the new XS engine only allocates as much RAM as the snapshot needs, whereas before the snapshot was taken, the RAM footprint could have been larger (e.g. if a large number of objects we allocated and then released), leading to more "headroom". Automatic GC is triggered by an attempt to allocate space which cannot be satisfied by this headroom, so it will happen more frequently in the post-reload engine than before the snapshot. See issues #3428, #3458, and #3577 for details. + +To accommodate differences in GC timing between kernels that are otherwise operating in consensus, liveslots uses an "unmetered box": a span of execution that does not count against the meter. We take all of the liveslots operations that might be sensitive to the engine's GC behavior and put them "in" the box. + +This includes any code that calls `deref()` on the WeakRefs used in `slotToVal`, because the WeakRef can change state from "live" to "dead" when GC is provoked, which is sensitive to more than just userspace behavior. diff --git a/packages/SwingSet/src/kernel/kernel.js b/packages/SwingSet/src/kernel/kernel.js index f94c101355c..3583d85b5e2 100644 --- a/packages/SwingSet/src/kernel/kernel.js +++ b/packages/SwingSet/src/kernel/kernel.js @@ -826,6 +826,8 @@ export default function buildKernel( FinalizationRegistry, waitUntilQuiescent, gcAndFinalize, + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), }); const vatManagerFactory = makeVatManagerFactory({ allVatPowers, diff --git a/packages/SwingSet/src/kernel/liveSlots.js b/packages/SwingSet/src/kernel/liveSlots.js index 822717d8cd4..020f82ca2e9 100644 --- a/packages/SwingSet/src/kernel/liveSlots.js +++ b/packages/SwingSet/src/kernel/liveSlots.js @@ -31,7 +31,8 @@ const DEFAULT_VIRTUAL_OBJECT_CACHE_SIZE = 3; // XXX ridiculously small value to * @param {boolean} enableVatstore * @param {*} vatPowers * @param {*} vatParameters - * @param {*} gcTools { WeakRef, FinalizationRegistry, waitUntilQuiescent, gcAndFinalize } + * @param {*} gcTools { WeakRef, FinalizationRegistry, waitUntilQuiescent, gcAndFinalize, + * runWithoutMetering, runWithoutMeteringAsync } * @param {Console} console * @returns {*} { vatGlobals, inescapableGlobalProperties, dispatch, setBuildRootObject } * @@ -53,6 +54,7 @@ function build( console, ) { const { WeakRef, FinalizationRegistry } = gcTools; + const { runWithoutMetering, runWithoutMeteringAsync } = gcTools; const enableLSDebug = false; function lsdebug(...args) { if (enableLSDebug) { @@ -461,6 +463,7 @@ function build( valToSlot.set(val, slot); slotToVal.set(slot, new WeakRef(val)); if (type === 'object') { + // Set.delete() metering seems unaffected by presence/absence deadSet.delete(slot); droppedRegistry.register(val, slot, val); } @@ -489,6 +492,12 @@ function build( } } + // The meter usage of convertSlotToVal is strongly affected by GC, because + // it only creates a new Presence if one does not already exist. Userspace + // moves from REACHABLE to UNREACHABLE, but the JS engine then moves to + // COLLECTED (and maybe FINALIZED) on its own, and we must not allow the + // latter changes to affect metering. So every call to convertSlotToVal (or + // m.unserialize) must be wrapped by runWithoutMetering(). function convertSlotToVal(slot, iface = undefined) { const { type, allocatedByVat, virtual } = parseVatSlot(slot); const wr = slotToVal.get(slot); @@ -692,7 +701,7 @@ function build( lsdebug( `ls[${forVatID}].dispatch.deliver ${target}.${method} -> ${result}`, ); - const t = convertSlotToVal(target); + const t = runWithoutMetering(() => convertSlotToVal(target)); assert(t, X`no target ${target}`); // TODO: if we acquire new decision-making authority over a promise that // we already knew about ('result' is already in slotToVal), we should no @@ -708,7 +717,7 @@ function build( method = Symbol.asyncIterator; } - const args = m.unserialize(argsdata); + const args = runWithoutMetering(() => m.unserialize(argsdata)); // If the method is missing, or is not a Function, or the method throws a // synchronous exception, we notify the caller (by rejecting the result @@ -747,12 +756,22 @@ function build( function retirePromiseID(promiseID) { lsdebug(`Retiring ${forVatID}:${promiseID}`); importedPromisesByPromiseID.delete(promiseID); - const wr = slotToVal.get(promiseID); - const p = wr && wr.deref(); - if (p) { - valToSlot.delete(p); - pendingPromises.delete(p); - } + // TODO: I *think* this could do without runWithoutMetering, but I have + // to think more about it. Usually any vpid handed to retirePromiseID() + // will already be in both slotToVal and pendingPromises. We added this + // check for some good reason, but I no longer remember why. If + // pendingPromises is guaranteed to hold onto the Promise, then slotToVal + // should always have a live weakRef for it, and there won't be any + // GC-based variance in the execution of wr.deref() or the other code + // that it enables. + runWithoutMetering(() => { + const wr = slotToVal.get(promiseID); + const p = wr && wr.deref(); + if (p) { + valToSlot.delete(p); + pendingPromises.delete(p); + } + }); slotToVal.delete(promiseID); } @@ -802,7 +821,7 @@ function build( X`unknown promiseID '${promiseID}'`, ); const pRec = importedPromisesByPromiseID.get(promiseID); - const val = m.unserialize(data); + const val = runWithoutMetering(() => m.unserialize(data)); if (rejected) { pRec.reject(val); } else { @@ -824,6 +843,7 @@ function build( const imports = finishCollectingPromiseImports(); for (const slot of imports) { if (slotToVal.get(slot)) { + // we'll only subscribe to new promises, which is within consensus syscall.subscribe(slot); } } @@ -835,15 +855,17 @@ function build( vrefs.map(vref => assert(parseVatSlot(vref).allocatedByVat)); // console.log(`-- liveslots acting upon dropExports ${vrefs.join(',')}`); for (const vref of vrefs) { - const wr = slotToVal.get(vref); - const o = wr && wr.deref(); - if (o) { - exportedRemotables.delete(o); - } - const { virtual } = parseVatSlot(vref); - if (virtual) { - vom.setExported(vref, false); - } + runWithoutMetering(() => { + const wr = slotToVal.get(vref); + const o = wr && wr.deref(); + if (o) { + exportedRemotables.delete(o); + } + const { virtual } = parseVatSlot(vref); + if (virtual) { + vom.setExported(vref, false); + } + }); } } @@ -890,7 +912,7 @@ function build( function retireExports(vrefs) { assert(Array.isArray(vrefs)); - vrefs.forEach(retireOneExport); + runWithoutMetering(() => vrefs.forEach(retireOneExport)); } function retireImports(vrefs) { @@ -898,6 +920,7 @@ function build( vrefs.map(vref => insistVatType('object', vref)); vrefs.map(vref => assert(!parseVatSlot(vref).allocatedByVat)); // console.log(`-- liveslots ignoring retireImports ${vrefs.join(',')}`); + // use runWithoutMetering() here too } // TODO: when we add notifyForward, guard against cycles @@ -1032,6 +1055,15 @@ function build( const { waitUntilQuiescent, gcAndFinalize } = gcTools; + async function finish() { + await gcAndFinalize(); + const doMore = processDeadSet(); + if (doMore) { + return finish(); + } + return undefined; + } + /** * This low-level liveslots code is responsible for deciding when userspace * is done with a crank. Userspace code can use Promises, so it can add as @@ -1060,15 +1092,7 @@ function build( // Now that userspace is idle, we can drive GC until we think we've // stopped. - async function finish() { - await gcAndFinalize(); - const doMore = processDeadSet(); - if (doMore) { - return finish(); - } - return undefined; - } - return finish(); + return runWithoutMeteringAsync(finish); } harden(dispatch); diff --git a/packages/SwingSet/src/kernel/vatManager/supervisor-nodeworker.js b/packages/SwingSet/src/kernel/vatManager/supervisor-nodeworker.js index da20a680bb9..fcf8557ddcd 100644 --- a/packages/SwingSet/src/kernel/vatManager/supervisor-nodeworker.js +++ b/packages/SwingSet/src/kernel/vatManager/supervisor-nodeworker.js @@ -75,6 +75,8 @@ parentPort.on('message', ([type, ...margs]) => { FinalizationRegistry, waitUntilQuiescent, gcAndFinalize: makeGcAndFinalize(engineGC), + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), }); const ls = makeLiveSlots( syscall, diff --git a/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-node.js b/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-node.js index 6db0e04781a..329302cebc3 100644 --- a/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-node.js +++ b/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-node.js @@ -91,6 +91,8 @@ fromParent.on('data', ([type, ...margs]) => { FinalizationRegistry, waitUntilQuiescent, gcAndFinalize: makeGcAndFinalize(engineGC), + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), }); const ls = makeLiveSlots( syscall, diff --git a/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-xsnap.js b/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-xsnap.js index 29e612174a0..455d70f0818 100644 --- a/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-xsnap.js +++ b/packages/SwingSet/src/kernel/vatManager/supervisor-subprocess-xsnap.js @@ -30,6 +30,26 @@ function workerLog(first, ...args) { workerLog(`supervisor started`); +function runWithoutMetering(thunk) { + const limit = globalThis.currentMeterLimit(); + const before = globalThis.resetMeter(0, 0); + try { + return thunk(); + } finally { + globalThis.resetMeter(limit, before); + } +} + +async function runWithoutMeteringAsync(thunk) { + const limit = globalThis.currentMeterLimit(); + const before = globalThis.resetMeter(0, 0); + try { + return await thunk(); + } finally { + globalThis.resetMeter(limit, before); + } +} + /** * Wrap byte-level protocols with tagged array codec. * @@ -180,6 +200,8 @@ function makeWorker(port) { // FIXME(mfig): Here is where GC-per-crank is silently disabled. // We need to do a better analysis of the tradeoffs. gcAndFinalize: makeGcAndFinalize(gcEveryCrank && globalThis.gc), + runWithoutMetering, + runWithoutMeteringAsync, }); const ls = makeLiveSlots( diff --git a/packages/SwingSet/test/liveslots-helpers.js b/packages/SwingSet/test/liveslots-helpers.js index d6d7b1648b0..e0bc0407469 100644 --- a/packages/SwingSet/test/liveslots-helpers.js +++ b/packages/SwingSet/test/liveslots-helpers.js @@ -46,6 +46,8 @@ export function makeDispatch( FinalizationRegistry, waitUntilQuiescent, gcAndFinalize: makeGcAndFinalize(engineGC), + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), }); const { setBuildRootObject, dispatch } = makeLiveSlots( syscall, diff --git a/packages/SwingSet/test/test-liveslots.js b/packages/SwingSet/test/test-liveslots.js index 29fcdec3664..c36602264a9 100644 --- a/packages/SwingSet/test/test-liveslots.js +++ b/packages/SwingSet/test/test-liveslots.js @@ -906,6 +906,8 @@ function makeMockGC() { getAllFRs, waitUntilQuiescent, gcAndFinalize: mockGCAndFinalize, + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), }); } diff --git a/packages/SwingSet/test/test-marshal.js b/packages/SwingSet/test/test-marshal.js index 03da6611d82..c7eb257e402 100644 --- a/packages/SwingSet/test/test-marshal.js +++ b/packages/SwingSet/test/test-marshal.js @@ -10,7 +10,12 @@ import { makeMarshaller } from '../src/kernel/liveSlots.js'; import { buildVatController } from '../src/index.js'; -const gcTools = harden({ WeakRef, FinalizationRegistry }); +const gcTools = harden({ + WeakRef, + FinalizationRegistry, + runWithoutMetering: thunk => thunk(), + runWithoutMeteringAsync: async thunk => thunk(), +}); async function prep() { const config = {};