Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix core sharing and make use of scheduling_lookahead #4724

Merged
merged 37 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b64eb22
Zombienet test for two parachains sharing a core
tdimitrov May 30, 2024
4deb507
fixes
alindima Jun 3, 2024
c94c398
further simplify the fix
alindima Jun 3, 2024
732a74f
prospective-parachains: hack fetch_upcoming_paras
tdimitrov Jun 5, 2024
49193f3
Use claim queue in the validator side of collator protocol
tdimitrov Jun 6, 2024
00eed19
Use claim queue in the collator side of collator protocol
tdimitrov Jun 6, 2024
4ddb874
Use claim queue in `fetch_upcoming_paras` from `prospective-parachains`
tdimitrov Jun 6, 2024
8190687
add prospective parachains tests
alindima Jun 11, 2024
a2f0a25
fix collator side and collator side tests
alindima Jun 11, 2024
b979563
Merge remote-tracking branch 'origin/master' into tsv-ct-core-sharing
alindima Jun 11, 2024
c996a90
clippy
alindima Jun 11, 2024
c874cf1
it works!!
alindima Jun 12, 2024
ec70d6b
Merge remote-tracking branch 'origin/master' into tsv-ct-core-sharing
alindima Jun 12, 2024
b88f4e8
properly fix backing
alindima Jun 12, 2024
8afe82f
fix unit tests
alindima Jun 12, 2024
7050c2e
update comments
alindima Jun 13, 2024
e23187b
add proper zombienet test
alindima Jun 13, 2024
a3560a3
review comments
alindima Jun 17, 2024
2fe2420
symlink assign-core.js
alindima Jun 17, 2024
8f1d8e0
try fixing zombienet
alindima Jun 17, 2024
3469f2d
fix compilation
alindima Jun 17, 2024
889cb32
clippy again
alindima Jun 17, 2024
a4387a2
add prdoc
alindima Jun 17, 2024
8532f7d
try fixing prdoc
alindima Jun 17, 2024
7c1fc91
Merge remote-tracking branch 'origin/master' into tsv-ct-core-sharing
alindima Jun 17, 2024
e85b19f
try fixing zombienet
alindima Jun 17, 2024
4d69a3a
more zombienet
alindima Jun 17, 2024
829af4d
semver
alindima Jun 18, 2024
a6998be
use relative symlinks
alindima Jun 18, 2024
21be690
CI stuff
alindima Jun 18, 2024
2cb6bf2
small review comment
alindima Jun 18, 2024
8f860c1
add copy command to CI file
alindima Jun 18, 2024
e343989
fix gitlab yaml
alindima Jun 18, 2024
69d16cd
lower glutton compute
alindima Jun 18, 2024
4c3635b
Merge remote-tracking branch 'origin/master' into tsv-ct-core-sharing
alindima Jun 18, 2024
9eea0c6
relax zombienet constraint
alindima Jun 18, 2024
9c118cb
don't compute the validator group for an unscheduled core when using …
alindima Jun 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 39 additions & 24 deletions polkadot/node/core/prospective-parachains/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use polkadot_node_subsystem_util::{
inclusion_emulator::{Constraints, RelayChainBlockInfo},
request_session_index_for_child,
runtime::{prospective_parachains_mode, ProspectiveParachainsMode},
vstaging::fetch_claim_queue,
};
use polkadot_primitives::{
async_backing::CandidatePendingAvailability, BlockNumber, CandidateHash,
Expand Down Expand Up @@ -871,33 +872,47 @@ async fn fetch_upcoming_paras<Context>(
ctx: &mut Context,
relay_parent: Hash,
) -> JfyiErrorResult<Vec<ParaId>> {
let (tx, rx) = oneshot::channel();

// This'll have to get more sophisticated with parathreads,
// but for now we can just use the `AvailabilityCores`.
ctx.send_message(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::AvailabilityCores(tx),
))
.await;

let cores = rx.await.map_err(JfyiError::RuntimeApiRequestCanceled)??;
let mut upcoming = HashSet::new();
for core in cores {
match core {
CoreState::Occupied(occupied) => {
if let Some(next_up_on_available) = occupied.next_up_on_available {
upcoming.insert(next_up_on_available.para_id);

match fetch_claim_queue(ctx.sender(), relay_parent).await? {
Some(claim_queue) => {
// Runtime supports claim queue - use it
for (_, claims) in claim_queue.iter_all_claims() {
for claim in claims {
upcoming.insert(*claim);
alindima marked this conversation as resolved.
Show resolved Hide resolved
}
if let Some(next_up_on_time_out) = occupied.next_up_on_time_out {
upcoming.insert(next_up_on_time_out.para_id);
}
},
None => {
// fallback to availability cores - remove this branch once claim queue is released
// everywhere
let (tx, rx) = oneshot::channel();
ctx.send_message(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::AvailabilityCores(tx),
))
.await;

let cores = rx.await.map_err(JfyiError::RuntimeApiRequestCanceled)??;
for core in cores {
match core {
CoreState::Occupied(occupied) => {
// core sharing won't work optimally with this branch because the collations
// can't be prepared in advance.
if let Some(next_up_on_available) = occupied.next_up_on_available {
upcoming.insert(next_up_on_available.para_id);
}
if let Some(next_up_on_time_out) = occupied.next_up_on_time_out {
upcoming.insert(next_up_on_time_out.para_id);
}
},
CoreState::Scheduled(scheduled) => {
upcoming.insert(scheduled.para_id);
},
CoreState::Free => {},
}
},
CoreState::Scheduled(scheduled) => {
upcoming.insert(scheduled.para_id);
},
CoreState::Free => {},
}
}
},
}

Ok(upcoming.into_iter().collect())
Expand Down
30 changes: 19 additions & 11 deletions polkadot/node/network/collator-protocol/src/collator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use polkadot_node_subsystem_util::{
get_availability_cores, get_group_rotation_info, prospective_parachains_mode,
ProspectiveParachainsMode, RuntimeInfo,
},
vstaging::fetch_claim_queue,
TimeoutExt,
};
use polkadot_primitives::{
Expand Down Expand Up @@ -579,19 +580,26 @@ async fn determine_cores(
let cores = get_availability_cores(sender, relay_parent).await?;
let n_cores = cores.len();
let mut assigned_cores = Vec::new();
let maybe_claim_queue = fetch_claim_queue(sender, relay_parent).await?;

for (idx, core) in cores.iter().enumerate() {
let core_para_id = match core {
CoreState::Scheduled(scheduled) => Some(scheduled.para_id),
CoreState::Occupied(occupied) =>
if relay_parent_mode.is_enabled() {
// With async backing we don't care about the core state,
// it is only needed for figuring our validators group.
Some(occupied.candidate_descriptor.para_id)
} else {
None
},
CoreState::Free => None,
let core_para_id = match maybe_claim_queue {
Some(ref claim_queue) => {
//Runtime supports claim queue - use it
claim_queue.get_claim_for(CoreIndex::from(idx as u32), 0)
alindima marked this conversation as resolved.
Show resolved Hide resolved
},
None => match core {
CoreState::Scheduled(scheduled) => Some(scheduled.para_id),
CoreState::Occupied(occupied) =>
if relay_parent_mode.is_enabled() {
// With async backing we don't care about the core state,
// it is only needed for figuring our validators group.
occupied.next_up_on_available.as_ref().map(|c| c.para_id)
} else {
None
},
CoreState::Free => None,
},
};

if core_para_id == Some(para_id) {
Expand Down
39 changes: 28 additions & 11 deletions polkadot/node/network/collator-protocol/src/validator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use polkadot_node_subsystem_util::{
backing_implicit_view::View as ImplicitView,
reputation::{ReputationAggregator, REPUTATION_CHANGE_INTERVAL},
runtime::{prospective_parachains_mode, ProspectiveParachainsMode},
vstaging::fetch_claim_queue,
};
use polkadot_primitives::{
CandidateHash, CollatorId, CoreState, Hash, HeadData, Id as ParaId, OccupiedCoreAssumption,
Expand Down Expand Up @@ -491,23 +492,39 @@ where
.await
.map_err(Error::CancelledAvailabilityCores)??;

let para_now = match polkadot_node_subsystem_util::signing_key_and_index(&validators, keystore)
.and_then(|(_, index)| polkadot_node_subsystem_util::find_validator_group(&groups, index))
{
Some(group) => {
let core_now = rotation_info.core_for_group(group, cores.len());
let core_now = if let Some(group) =
polkadot_node_subsystem_util::signing_key_and_index(&validators, keystore).and_then(
|(_, index)| polkadot_node_subsystem_util::find_validator_group(&groups, index),
) {
rotation_info.core_for_group(group, cores.len())
} else {
gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator");
return Ok(())
};

let para_now = match fetch_claim_queue(sender, relay_parent).await.map_err(Error::Runtime)? {
Some(claim_queue) => {
// Runtime supports claim queue - use it
//
// `relay_parent_mode` is not examined here because if the runtime supports claim queue
// then it supports async backing params too (`ASYNC_BACKING_STATE_RUNTIME_REQUIREMENT`
// < `CLAIM_QUEUE_RUNTIME_REQUIREMENT`).
//
// Implementation note: here it might be better to fetch the whole claim queue for the
// core and use it as `paras_now`. Practical tests however showed that there is no
// benefit in doing this so to avoid unnecessary complexity we just only get the first
// claim from the queue.
claim_queue.get_claim_for(core_now, 0)
alindima marked this conversation as resolved.
Show resolved Hide resolved
},
None => {
// Claim queue is not supported by the runtime - use availability cores instead.
cores.get(core_now.0 as usize).and_then(|c| match c {
CoreState::Occupied(core) if relay_parent_mode.is_enabled() => Some(core.para_id()),
CoreState::Occupied(core) if relay_parent_mode.is_enabled() =>
core.next_up_on_available.as_ref().map(|c| c.para_id),
CoreState::Scheduled(core) => Some(core.para_id),
CoreState::Occupied(_) | CoreState::Free => None,
})
},
None => {
gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator");

return Ok(())
},
};

// This code won't work well, if at all for on-demand parachains. For on-demand we'll
Expand Down
5 changes: 5 additions & 0 deletions polkadot/node/subsystem-util/src/vstaging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ impl ClaimQueueSnapshot {
.iter()
.filter_map(move |(core_index, paras)| Some((*core_index, *paras.get(depth)?)))
}

/// Returns an iterator over the whole claim queue.
pub fn iter_all_claims(&self) -> impl Iterator<Item = (&CoreIndex, &VecDeque<ParaId>)> + '_ {
self.0.iter()
}
}

// TODO: https://github.com/paritytech/polkadot-sdk/issues/1940
Expand Down
48 changes: 48 additions & 0 deletions polkadot/zombienet_tests/smoke/0005-coretime-shared-core.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[settings]
timeout = 1000

[relaychain.genesis.runtimeGenesis.patch.configuration.config.async_backing_params]
max_candidate_depth = 2
allowed_ancestry_len = 2

[relaychain.genesis.runtimeGenesis.patch.configuration.config.scheduler_params]
max_validators_per_core = 1
lookahead = 3 # should be set to 2 once https://github.com/paritytech/polkadot-sdk/pull/4691 is merged
num_cores = 4

[relaychain.genesis.runtimeGenesis.patch.configuration.config.approval_voting_params]
needed_approvals = 3
max_approval_coalesce_count = 5

[relaychain]
default_image = "{{ZOMBIENET_INTEGRATION_TEST_IMAGE}}"
chain = "rococo-local"
command = "polkadot"

[[relaychain.nodes]]
name = "alice"
args = ["" ]

[[relaychain.node_groups]]
name = "validator"
args = ["-lruntime=debug,parachain=trace,runtime::parachains::scheduler=trace,runtime::inclusion-inherent=trace,runtime::inclusion=trace" ]
count = 8

{% for id in range(2000,2002) %}
[[parachains]]
id = {{id}}
addToGenesis = true
cumulus_based = true
chain = "glutton-westend-local-{{id}}"
[parachains.genesis.runtimeGenesis.patch.glutton]
compute = "50000000"
storage = "2500000000"
trashDataCount = 5120

[parachains.collator]
name = "collator"
image = "{{CUMULUS_IMAGE}}"
command = "polkadot-parachain"
args = ["-lparachain=debug"]

{% endfor %}
23 changes: 23 additions & 0 deletions polkadot/zombienet_tests/smoke/0005-coretime-shared-core.zndsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Description: CT shared core test
Network: ./0005-coretime-shared-core.toml
Creds: config

alice: is up
collator: is up
collator-1: is up

# configure relay chain
alice: js-script ./assign-core.js return is 0 within 600 seconds

#collator-single-core: reports block height is at least 20 within 225 seconds
#collator-elastic: reports block height is at least 20 within 225 seconds

#collator-elastic: restart after 30 seconds
#sleep 10 seconds
#collator-single-core: restart after 30 seconds

#collator-single-core: reports block height is at least 40 within 225 seconds
#collator-elastic: reports block height is at least 80 within 225 seconds


sleep 43200 seconds
49 changes: 49 additions & 0 deletions polkadot/zombienet_tests/smoke/assign-core-parts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Assign a parachain to a core.
//
// First argument should be the parachain id.
// Second argument should be the core.
// Third argument should be PartsOf57600 assigned for the parachain id.
async function run(nodeName, networkInfo, args) {
alindima marked this conversation as resolved.
Show resolved Hide resolved
const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName];
const api = await zombie.connect(wsUri, userDefinedTypes);

let para = Number(args[0]);
let core = Number(args[1]);
let parts_of_57600 = Number(args[2]);

console.log(`Assigning para ${para} to core ${core} parts_of_57600 ${parts_of_57600}`);

await zombie.util.cryptoWaitReady();

// Submit transaction with Alice accoung
const keyring = new zombie.Keyring({ type: "sr25519" });
const alice = keyring.addFromUri("//Alice");

// Wait for this transaction to be finalized in a block.
await new Promise(async (resolve, reject) => {
const unsub = await api.tx.sudo
.sudo(api.tx.coretime.assignCore(core, 0, [[{ task: para }, parts_of_57600]], null))
.signAndSend(alice, ({ status, isError }) => {
if (status.isInBlock) {
console.log(
`Transaction included at blockhash ${status.asInBlock}`,
);
} else if (status.isFinalized) {
console.log(
`Transaction finalized at blockHash ${status.asFinalized}`,
);
unsub();
return resolve();
} else if (isError) {
console.log(`Transaction error`);
reject(`Transaction error`);
}
});
});



return 0;
}

module.exports = { run };
59 changes: 59 additions & 0 deletions polkadot/zombienet_tests/smoke/assign-core.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Assign a parachain to a core.
async function run(nodeName, networkInfo, args) {
const { wsUri, userDefinedTypes } = networkInfo.nodesByName[nodeName];
const api = await zombie.connect(wsUri, userDefinedTypes);

await zombie.util.cryptoWaitReady();

// Submit transaction with Alice accoung
const keyring = new zombie.Keyring({ type: "sr25519" });
const alice = keyring.addFromUri("//Alice");

// Wait for this transaction to be finalized in a block.
await new Promise(async (resolve, reject) => {
const unsub = await api.tx.sudo
.sudo(api.tx.system.killPrefix("0x638595eebaa445ce03a13547bece90e704e6ac775a3245623103ffec2cb2c92f", 10))
.signAndSend(alice, ({ status, isError }) => {
if (status.isInBlock) {
console.log(
`killPrefix transaction included at blockhash ${status.asInBlock}`,
);
} else if (status.isFinalized) {
console.log(
`killPrefix transaction finalized at blockHash ${status.asFinalized}`,
);
unsub();
return resolve();
} else if (isError) {
console.log(`killPrefix error`);
reject(`killPrefix error`);
}
});
});

// Wait for this transaction to be finalized in a block.
await new Promise(async (resolve, reject) => {
const unsub = await api.tx.sudo
.sudo(api.tx.coretime.assignCore(0, 0, [[{ task: 2000 }, 28800], [{ task: 2001 }, 28800]], null))
.signAndSend(alice, ({ status, isError }) => {
if (status.isInBlock) {
console.log(
`assignCore transaction included at blockhash ${status.asInBlock}`,
);
} else if (status.isFinalized) {
console.log(
`assignCore transaction finalized at blockHash ${status.asFinalized}`,
);
unsub();
return resolve();
} else if (isError) {
console.log(`assignCore error`);
reject(`assignCore error`);
}
});
});

return 0;
}

module.exports = { run };
Loading