From 564a54d758588a721db110240c5c918b4cfc3b5a Mon Sep 17 00:00:00 2001 From: stefan-mysten <135084671+stefan-mysten@users.noreply.github.com> Date: Fri, 17 May 2024 09:32:53 -0700 Subject: [PATCH] [GraphQL] Add a separate timeout field in ServiceConfig for mutations (#17742) ## Description Increases the timeout for mutation requests. ## Test plan Updated existing tests to also include this `mutation` timeout case. ``` cargo nextest run --features pg_integration -- tests::test_timeout ``` ![image](https://github.com/MystenLabs/sui/assets/135084671/3e9b9d2d-dac2-4eef-8976-f012d6bc44d9) --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] Indexer: - [ ] JSON-RPC: - [x] GraphQL: Increased the `timeout` value for when executing a transaction block. - [ ] CLI: - [ ] Rust SDK: --- .../schema/current_progress_schema.graphql | 9 ++- crates/sui-graphql-rpc/src/config.rs | 26 ++++++- .../sui-graphql-rpc/src/extensions/timeout.rs | 45 +++++++++--- crates/sui-graphql-rpc/src/server/builder.rs | 72 ++++++++++++++++--- crates/sui-graphql-rpc/tests/e2e_tests.rs | 14 +++- .../snapshot_tests__schema_sdl_export.snap | 9 ++- 6 files changed, 152 insertions(+), 23 deletions(-) diff --git a/crates/sui-graphql-rpc/schema/current_progress_schema.graphql b/crates/sui-graphql-rpc/schema/current_progress_schema.graphql index 87f9440bc57f0..cb7aa0e80470f 100644 --- a/crates/sui-graphql-rpc/schema/current_progress_schema.graphql +++ b/crates/sui-graphql-rpc/schema/current_progress_schema.graphql @@ -3055,7 +3055,14 @@ type ServiceConfig { """ maxPageSize: Int! """ - Maximum time in milliseconds that will be spent to serve one request. + Maximum time in milliseconds spent waiting for a response from fullnode after issuing a + a transaction to execute. Note that the transaction may still succeed even in the case of a + timeout. Transactions are idempotent, so a transaction that times out should be resubmitted + until the network returns a definite response (success or failure, not timeout). + """ + mutationTimeoutMs: Int! + """ + Maximum time in milliseconds that will be spent to serve one query request. """ requestTimeoutMs: Int! """ diff --git a/crates/sui-graphql-rpc/src/config.rs b/crates/sui-graphql-rpc/src/config.rs index 8be80e6a896d4..326f9bb3ea75a 100644 --- a/crates/sui-graphql-rpc/src/config.rs +++ b/crates/sui-graphql-rpc/src/config.rs @@ -28,6 +28,15 @@ const MAX_MOVE_VALUE_DEPTH: u32 = 128; pub(crate) const DEFAULT_REQUEST_TIMEOUT_MS: u64 = 40_000; +/// The time to wait for a transaction to be executed such that the effects can be returned to the +/// GraphQL query. If the transaction takes longer than this time to execute, the query will return +/// a timeout error, but the transaction will continue its execution. +/// +/// It's the sum of pre+post quorum timeouts from [`sui_core::authority_aggregator::TimeoutConfig`] +/// plus a small buffer of 10% rounded up: 60 + 7 => round_up(67 * 1.1) = 74 +/// +pub(crate) const DEFAULT_MUTATION_TIMEOUT_MS: u64 = 74_000; + const DEFAULT_IDE_TITLE: &str = "Sui GraphQL IDE"; pub(crate) const RPC_TIMEOUT_ERR_SLEEP_RETRY_PERIOD: Duration = Duration::from_millis(10_000); @@ -126,6 +135,8 @@ pub struct Limits { #[serde(default)] pub max_page_size: u64, #[serde(default)] + pub mutation_timeout_ms: u64, + #[serde(default)] pub request_timeout_ms: u64, #[serde(default)] pub max_type_argument_depth: u32, @@ -300,7 +311,15 @@ impl ServiceConfig { self.limits.max_page_size } - /// Maximum time in milliseconds that will be spent to serve one request. + /// Maximum time in milliseconds spent waiting for a response from fullnode after issuing a + /// a transaction to execute. Note that the transaction may still succeed even in the case of a + /// timeout. Transactions are idempotent, so a transaction that times out should be resubmitted + /// until the network returns a definite response (success or failure, not timeout). + async fn mutation_timeout_ms(&self) -> u64 { + self.limits.mutation_timeout_ms + } + + /// Maximum time in milliseconds that will be spent to serve one query request. async fn request_timeout_ms(&self) -> u64 { self.limits.request_timeout_ms } @@ -483,6 +502,7 @@ impl Default for Limits { max_db_query_cost: MAX_DB_QUERY_COST, default_page_size: DEFAULT_PAGE_SIZE, max_page_size: MAX_PAGE_SIZE, + mutation_timeout_ms: DEFAULT_MUTATION_TIMEOUT_MS, request_timeout_ms: DEFAULT_REQUEST_TIMEOUT_MS, max_type_argument_depth: MAX_TYPE_ARGUMENT_DEPTH, max_type_argument_width: MAX_TYPE_ARGUMENT_WIDTH, @@ -537,6 +557,7 @@ mod tests { max-db-query-cost = 50 default-page-size = 20 max-page-size = 50 + mutation-timeout-ms = 74000 request-timeout-ms = 27000 max-type-argument-depth = 32 max-type-argument-width = 64 @@ -555,6 +576,7 @@ mod tests { max_db_query_cost: 50, default_page_size: 20, max_page_size: 50, + mutation_timeout_ms: 74_000, request_timeout_ms: 27_000, max_type_argument_depth: 32, max_type_argument_width: 64, @@ -617,6 +639,7 @@ mod tests { max-db-query-cost = 20 default-page-size = 10 max-page-size = 20 + mutation-timeout-ms = 74000 request-timeout-ms = 30000 max-type-argument-depth = 32 max-type-argument-width = 64 @@ -638,6 +661,7 @@ mod tests { max_db_query_cost: 20, default_page_size: 10, max_page_size: 20, + mutation_timeout_ms: 74_000, request_timeout_ms: 30_000, max_type_argument_depth: 32, max_type_argument_width: 64, diff --git a/crates/sui-graphql-rpc/src/extensions/timeout.rs b/crates/sui-graphql-rpc/src/extensions/timeout.rs index 7e206cf3a020a..871521de6fe35 100644 --- a/crates/sui-graphql-rpc/src/extensions/timeout.rs +++ b/crates/sui-graphql-rpc/src/extensions/timeout.rs @@ -3,11 +3,14 @@ use async_graphql::{ extensions::{Extension, ExtensionContext, ExtensionFactory, NextExecute, NextParseQuery}, - parser::types::ExecutableDocument, + parser::types::{ExecutableDocument, OperationType}, Response, ServerError, ServerResult, }; use async_graphql_value::Variables; -use std::sync::Mutex; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Mutex, +}; use std::time::Duration; use std::{net::SocketAddr, sync::Arc}; use tokio::time::timeout; @@ -22,12 +25,14 @@ pub(crate) struct Timeout; #[derive(Debug, Default)] struct TimeoutExt { pub query: Mutex>, + pub is_mutation: AtomicBool, } impl ExtensionFactory for Timeout { fn create(&self) -> Arc { Arc::new(TimeoutExt { query: Mutex::new(None), + is_mutation: AtomicBool::new(false), }) } } @@ -42,7 +47,12 @@ impl Extension for TimeoutExt { next: NextParseQuery<'_>, ) -> ServerResult { let document = next.run(ctx, query, variables).await?; - *self.query.lock().unwrap() = Some(ctx.stringify_execute_doc(&document, variables)); + let is_mutation = document + .operations + .iter() + .any(|(_, operation)| operation.node.ty == OperationType::Mutation); + self.is_mutation.store(is_mutation, Ordering::Relaxed); + Ok(document) } @@ -52,10 +62,18 @@ impl Extension for TimeoutExt { operation_name: Option<&str>, next: NextExecute<'_>, ) -> Response { - let cfg = ctx - .data::() + let cfg: &ServiceConfig = ctx + .data() .expect("No service config provided in schema data"); - let request_timeout = Duration::from_millis(cfg.limits.request_timeout_ms); + + // increase the timeout if the request is a mutation + let is_mutation = self.is_mutation.load(Ordering::Relaxed); + let request_timeout = if is_mutation { + Duration::from_millis(cfg.limits.mutation_timeout_ms) + } else { + Duration::from_millis(cfg.limits.request_timeout_ms) + }; + timeout(request_timeout, next.run(ctx, operation_name)) .await .unwrap_or_else(|_| { @@ -74,13 +92,18 @@ impl Extension for TimeoutExt { %error_code, %query ); - Response::from_errors(vec![ServerError::new( + let error_msg = if is_mutation { format!( - "Request timed out. Limit: {}s", + "Mutation request timed out. Limit: {}s", request_timeout.as_secs_f32() - ), - None, - )]) + ) + } else { + format!( + "Query request timed out. Limit: {}s", + request_timeout.as_secs_f32() + ) + }; + Response::from_errors(vec![ServerError::new(error_msg, None)]) }) } } diff --git a/crates/sui-graphql-rpc/src/server/builder.rs b/crates/sui-graphql-rpc/src/server/builder.rs index 3a40334464548..a3a876e665683 100644 --- a/crates/sui-graphql-rpc/src/server/builder.rs +++ b/crates/sui-graphql-rpc/src/server/builder.rs @@ -577,8 +577,7 @@ pub mod tests { use crate::{ config::{ConnectionConfig, Limits, ServiceConfig, Version}, context_data::db_data_provider::PgManager, - extensions::query_limits_checker::QueryLimitsChecker, - extensions::timeout::Timeout, + extensions::{query_limits_checker::QueryLimitsChecker, timeout::Timeout}, }; use async_graphql::{ extensions::{Extension, ExtensionContext, NextExecute}, @@ -586,6 +585,8 @@ pub mod tests { }; use std::sync::Arc; use std::time::Duration; + use sui_sdk::{wallet_context::WalletContext, SuiClient}; + use sui_types::transaction::TransactionData; use uuid::Uuid; /// Prepares a schema for tests dealing with extensions. Returns a `ServerBuilder` that can be @@ -641,7 +642,7 @@ pub mod tests { Uuid::new_v4() } - pub async fn test_timeout_impl() { + pub async fn test_timeout_impl(wallet: WalletContext) { struct TimedExecuteExt { pub min_req_delay: Duration, } @@ -667,37 +668,92 @@ pub mod tests { } } - async fn test_timeout(delay: Duration, timeout: Duration) -> Response { + async fn test_timeout( + delay: Duration, + timeout: Duration, + query: &str, + sui_client: &SuiClient, + ) -> Response { let mut cfg = ServiceConfig::default(); cfg.limits.request_timeout_ms = timeout.as_millis() as u64; + cfg.limits.mutation_timeout_ms = timeout.as_millis() as u64; let schema = prep_schema(None, Some(cfg)) + .context_data(Some(sui_client.clone())) .extension(Timeout) .extension(TimedExecuteExt { min_req_delay: delay, }) .build_schema(); - schema.execute("{ chainIdentifier }").await + schema.execute(query).await } + let query = "{ chainIdentifier }"; let timeout = Duration::from_millis(1000); let delay = Duration::from_millis(100); + let sui_client = wallet.get_client().await.unwrap(); - test_timeout(delay, timeout) + test_timeout(delay, timeout, query, &sui_client) .await .into_result() .expect("Should complete successfully"); // Should timeout - let errs: Vec<_> = test_timeout(delay, delay) + let errs: Vec<_> = test_timeout(delay, delay, query, &sui_client) .await .into_result() .unwrap_err() .into_iter() .map(|e| e.message) .collect(); - let exp = format!("Request timed out. Limit: {}s", delay.as_secs_f32()); + let exp = format!("Query request timed out. Limit: {}s", delay.as_secs_f32()); + assert_eq!(errs, vec![exp]); + + // Should timeout for mutation + // Create a transaction and sign it, and use the tx_bytes + signatures for the GraphQL + // executeTransactionBlock mutation call. + let addresses = wallet.get_addresses(); + let gas = wallet + .get_one_gas_object_owned_by_address(addresses[0]) + .await + .unwrap(); + let tx_data = TransactionData::new_transfer_sui( + addresses[1], + addresses[0], + Some(1000), + gas.unwrap(), + 1_000_000, + wallet.get_reference_gas_price().await.unwrap(), + ); + + let tx = wallet.sign_transaction(&tx_data); + let (tx_bytes, signatures) = tx.to_tx_bytes_and_signatures(); + + let signature_base64 = &signatures[0]; + let query = format!( + r#" + mutation {{ + executeTransactionBlock(txBytes: "{}", signatures: "{}") {{ + effects {{ + status + }} + }} + }}"#, + tx_bytes.encoded(), + signature_base64.encoded() + ); + let errs: Vec<_> = test_timeout(delay, delay, &query, &sui_client) + .await + .into_result() + .unwrap_err() + .into_iter() + .map(|e| e.message) + .collect(); + let exp = format!( + "Mutation request timed out. Limit: {}s", + delay.as_secs_f32() + ); assert_eq!(errs, vec![exp]); } diff --git a/crates/sui-graphql-rpc/tests/e2e_tests.rs b/crates/sui-graphql-rpc/tests/e2e_tests.rs index 621b347b89160..5cc840e8edbf0 100644 --- a/crates/sui-graphql-rpc/tests/e2e_tests.rs +++ b/crates/sui-graphql-rpc/tests/e2e_tests.rs @@ -852,7 +852,19 @@ mod tests { #[tokio::test] #[serial] async fn test_timeout() { - test_timeout_impl().await; + let _guard = telemetry_subscribers::TelemetryConfig::new() + .with_env() + .init(); + let connection_config = ConnectionConfig::ci_integration_test_cfg(); + let cluster = + sui_graphql_rpc::test_infra::cluster::start_cluster(connection_config, None).await; + cluster + .wait_for_checkpoint_catchup(0, Duration::from_secs(10)) + .await; + // timeout test includes mutation timeout, which requies a [SuiClient] to be able to run + // the test, and a transaction. [WalletContext] gives access to everything that's needed. + let wallet = cluster.validator_fullnode_handle.wallet; + test_timeout_impl(wallet).await; } #[tokio::test] diff --git a/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap b/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap index 2543a258a46ae..93b5cf533a75a 100644 --- a/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap +++ b/crates/sui-graphql-rpc/tests/snapshots/snapshot_tests__schema_sdl_export.snap @@ -3059,7 +3059,14 @@ type ServiceConfig { """ maxPageSize: Int! """ - Maximum time in milliseconds that will be spent to serve one request. + Maximum time in milliseconds spent waiting for a response from fullnode after issuing a + a transaction to execute. Note that the transaction may still succeed even in the case of a + timeout. Transactions are idempotent, so a transaction that times out should be resubmitted + until the network returns a definite response (success or failure, not timeout). + """ + mutationTimeoutMs: Int! + """ + Maximum time in milliseconds that will be spent to serve one query request. """ requestTimeoutMs: Int! """