Skip to content

Commit

Permalink
Configure hist. cache size on startup (#6346)
Browse files Browse the repository at this point in the history
  • Loading branch information
maxtropets authored Jul 12, 2024
1 parent 91d4580 commit 17c4776
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 19 deletions.
4 changes: 4 additions & 0 deletions .daily_canary
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
/--x-m- /--n-n---xXx--/--yY------>>>----<<<>>]]{{}}---||-/\---..
2024__
!..!

,--.
( o>
//\\
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- The `cchost` configuration file now includes an `idle_connection_timeout` option. This controls how long the node will keep idle connections (for user TLS sessions) before automatically closing them. This may be set to `null` to restore the previous behaviour, where idle connections are never closed. By default connections will be closed after 60s of idle time.
- New endpoints `GET /gov/service/javascript-modules` and `GET /gov/service/javascript-modules/{moduleName}` to retrieve the raw JS code of the currently installed app. Note that the `{moduleName}` path parameter will need to be URL-encoded to escape any `/` characters (eg - `/foo/bar.js` should become `%2Ffoo%2Fbar.js`).
- New gov API version `2024-07-01`. This is near-identical to `2023-06-01-preview`, but additionally offers the new `javascript-modules` endpoints.
- Historical cache soft limit now is a node-specific startup parameter.

### Changed

Expand Down
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1576,6 +1576,11 @@ if(BUILD_TESTS)
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_perf.py
LABEL perf PERF_LABEL "Historical Queries"
)

add_e2e_test(
NAME historical_query_cache_test
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_cache.py
)
endif()
endif()

Expand Down
5 changes: 5 additions & 0 deletions doc/host_config_schema/cchost_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,11 @@
"type": "integer",
"description": "Cap at which node-to-node message channels will be closed, and a new channel will be created. Can be used to limit use of single cryptographic key",
"minimum": 0
},
"historical_cache_soft_limit": {
"type": "string",
"default": "512MB",
"description": "Historical queries cache soft limit (as size string)"
}
},
"required": ["enclave", "network", "command"],
Expand Down
2 changes: 2 additions & 0 deletions include/ccf/node/startup_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ struct CCFConfig
// 2**24.5 as per RFC8446 Section 5.5
size_t node_to_node_message_limit = 23'726'566;

ccf::ds::SizeString historical_cache_soft_limit = {"512MB"};

ccf::consensus::Configuration consensus = {};
ccf::NodeInfoNetwork network = {};

Expand Down
16 changes: 0 additions & 16 deletions samples/apps/logging/logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,6 @@ namespace loggingapp
PUBLIC_RECORDS, context, 10000, 20);
context.get_indexing_strategies().install_strategy(index_per_public_key);

// According to manual obvervation it's enough to start evicting old
// requests on historical perf test, but not too small to get stuck
// because of a single request being larget than the cache.
constexpr size_t cache_limit = 1024 * 1024 * 10; // MB
context.get_historical_state().set_soft_cache_limit(cache_limit);

const ccf::AuthnPolicies auth_policies = {
ccf::jwt_auth_policy,
ccf::user_cert_auth_policy,
Expand Down Expand Up @@ -1663,11 +1657,6 @@ namespace loggingapp
ccf::http::headers::CONTENT_TYPE,
ccf::http::headervalues::contenttype::JSON);
ctx.rpc_ctx->set_response_body(j_response.dump());

// ALSO: Assume this response makes it all the way to the client, and
// they're finished with it, so we can drop the retrieved state. In a
// real app this may be driven by a separate client request or an LRU
historical_cache.drop_cached_states(handle);
};
make_endpoint(
get_historical_range_path,
Expand Down Expand Up @@ -1834,11 +1823,6 @@ namespace loggingapp
ccf::http::headers::CONTENT_TYPE,
ccf::http::headervalues::contenttype::JSON);
ctx.rpc_ctx->set_response_body(j_response.dump());

// ALSO: Assume this response makes it all the way to the client, and
// they're finished with it, so we can drop the retrieved state. In a
// real app this may be driven by a separate client request or an LRU
historical_cache.drop_cached_states(handle);
};
make_endpoint(
get_historical_sparse_path,
Expand Down
3 changes: 2 additions & 1 deletion src/common/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ DECLARE_JSON_OPTIONAL_FIELDS(
ledger_signatures,
jwt,
attestation,
node_to_node_message_limit);
node_to_node_message_limit,
historical_cache_soft_limit);

DECLARE_JSON_TYPE(StartupConfig::Start);
DECLARE_JSON_REQUIRED_FIELDS(
Expand Down
3 changes: 3 additions & 0 deletions src/enclave/enclave.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ namespace ccf

node->set_n2n_message_limit(ccf_config_.node_to_node_message_limit);

historical_state_cache->set_soft_cache_limit(
ccf_config_.historical_cache_soft_limit);

// If we haven't heard from a node for multiple elections, then cleanup
// their node-to-node channel
const auto idle_timeout =
Expand Down
2 changes: 1 addition & 1 deletion src/node/historical_queries.h
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ namespace ccf::historical
std::unordered_map<SeqNo, std::set<CompoundHandle>> store_to_requests;
std::unordered_map<ccf::SeqNo, size_t> raw_store_sizes{};

CacheSize soft_store_cache_limit{1ll * 1024 * 1024 * 512 /*512 MB*/};
CacheSize soft_store_cache_limit{std::numeric_limits<size_t>::max()};
CacheSize soft_store_cache_limit_raw =
soft_store_cache_limit / soft_to_raw_ratio;
CacheSize estimated_store_cache_size{0};
Expand Down
3 changes: 2 additions & 1 deletion tests/config.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,6 @@
"max_fragment_size": "256KB"
},
"ignore_first_sigterm": {{ ignore_first_sigterm|tojson }}{% if node_to_node_message_limit %},
"node_to_node_message_limit": {{ node_to_node_message_limit|tojson }}{% endif %}
"node_to_node_message_limit": {{ node_to_node_message_limit|tojson }}{% endif %}{% if historical_cache_soft_limit %},
"historical_cache_soft_limit": {{ historical_cache_soft_limit|tojson }}{% endif %}
}
131 changes: 131 additions & 0 deletions tests/historical_query_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache 2.0 License.
import infra.e2e_args
import infra.network
import infra.proc
import infra.commit
import http
from infra.snp import IS_SNP
import infra.jwt_issuer
import time
import infra.bencher

from loguru import logger as LOG

DEFAULT_TIMEOUT_S = 10 if IS_SNP else 5


def format_message(idx):
return """
Nodes whisper secrets,
Across vast digital realms,
Harmony in bits.
""" + str(
idx
)


def submit_log_entry(primary, idx):
with primary.client("user0") as c:
msg = format_message(idx)
r = c.post(
"/app/log/public",
{
"id": idx,
"msg": msg,
},
log_capture=None,
)
assert r.status_code == http.HTTPStatus.OK
return (r.view, r.seqno)


def get_and_verify_entry(client, idx):
start_time = time.time()
end_time = start_time + 10
entries = []
path = f"/app/log/public/historical/range?id={idx}"
while time.time() < end_time:
r = client.get(path, headers={})
if r.status_code == http.HTTPStatus.OK:
j_body = r.body.json()
entries += j_body["entries"]
if "@nextLink" in j_body:
path = j_body["@nextLink"]
continue
else:
# No @nextLink means we've reached end of range
assert entries[0]["msg"] == format_message(idx)
return
elif r.status_code == http.HTTPStatus.ACCEPTED:
# Ignore retry-after header, retry soon
time.sleep(0.1)
continue
else:
raise ValueError(
f"""
Unexpected status code from historical range query: {r.status_code}
{r.body}
"""
)

raise TimeoutError("Historical range not available")


def test_historical_query_stress_cache(network, args):
"""This test loads the historical cache good enough so it's force to
lru_shrink. We go over the range twice and make sure we're able to load new
entries after they get evicted from the cache."""

jwt_issuer = infra.jwt_issuer.JwtIssuer()
jwt_issuer.register(network)
jwt = jwt_issuer.issue_jwt()

primary, _ = network.find_primary()

start = 1
end = 100
last_seqno = None
last_view = None
for i in range(start, end + 1):
last_view, last_seqno = submit_log_entry(primary, i)

with primary.client("user0") as c:
infra.commit.wait_for_commit(c, seqno=last_seqno, view=last_view, timeout=10)

network.wait_for_all_nodes_to_commit(primary=primary)
node = network.find_node_by_role(role=infra.network.NodeRole.BACKUP, log_capture=[])

with node.client(common_headers={"authorization": f"Bearer {jwt}"}) as c:
for cycle in range(0, 2):
LOG.info(f"Polling [{start}:{end + 1}] range. Attempt=[{cycle}]")
for idx in range(start, end + 1):
get_and_verify_entry(c, idx)

return network


def run(args):
with infra.network.network(
args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
) as network:
network.start_and_open(args)

network = test_historical_query_stress_cache(network, args)


if __name__ == "__main__":

def add(parser):
pass

args = infra.e2e_args.cli_args(add=add)
args.package = "samples/apps/logging/liblogging"
args.nodes = infra.e2e_args.max_nodes(args, f=0)
args.initial_member_count = 1
args.sig_ms_interval = 1000 # Set to cchost default value

args.historical_cache_soft_limit = "10KB"

run(args)
1 change: 1 addition & 0 deletions tests/infra/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class Network:
"acme",
"snp_endorsements_servers",
"node_to_node_message_limit",
"historical_cache_soft_limit",
"tick_ms",
"max_msg_size_bytes",
"snp_security_policy_file",
Expand Down
2 changes: 2 additions & 0 deletions tests/infra/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ def __init__(
snp_security_policy_file=None,
snp_uvm_endorsements_file=None,
service_subject_name="CN=CCF Test Service",
historical_cache_soft_limit=None,
**kwargs,
):
"""
Expand Down Expand Up @@ -823,6 +824,7 @@ def __init__(
snp_security_policy_file=snp_security_policy_file,
snp_uvm_endorsements_file=snp_uvm_endorsements_file,
service_subject_name=service_subject_name,
historical_cache_soft_limit=historical_cache_soft_limit,
**kwargs,
)

Expand Down

0 comments on commit 17c4776

Please sign in to comment.