Skip to content
This repository has been archived by the owner on Dec 26, 2022. It is now read-only.

Commit

Permalink
Add retry logic to grapl-web integration tests on 500 errors (#2066)
Browse files Browse the repository at this point in the history
  • Loading branch information
inickles-grapl authored Oct 19, 2022
1 parent 5995041 commit 2e5b328
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 58 deletions.
6 changes: 4 additions & 2 deletions src/rust/grapl-web-ui/tests/api/auth/check_login.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
async fn auth_unauthenticated_check_login() -> eyre::Result<()> {
let app = crate::test_app::TestApp::init().await?;

let response = app.post("api/auth/checkLogin").send().await?;
let request = app.post("api/auth/checkLogin");
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::UNAUTHORIZED,
Expand All @@ -19,7 +20,8 @@ async fn auth_authenticated_check_login() -> eyre::Result<()> {

app.login_with_test_user().await?;

let response = app.post("api/auth/checkLogin").send().await?;
let request = app.post("api/auth/checkLogin");
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand Down
28 changes: 12 additions & 16 deletions src/rust/grapl-web-ui/tests/api/auth/sign_in_with_password.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
async fn auth_password_incorrect_password() -> eyre::Result<()> {
let app = crate::test_app::TestApp::init().await?;

let response = app
let request = app
.post("api/auth/sign_in_with_password")
.json(&serde_json::json!({
"username": app.test_user.username,
"password": "nope",
}))
.send()
.await?;
}));
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::UNAUTHORIZED,
Expand All @@ -24,14 +23,13 @@ async fn auth_password_incorrect_password() -> eyre::Result<()> {
async fn auth_password_nonexistent_user() -> eyre::Result<()> {
let app = crate::test_app::TestApp::init().await?;

let response = app
let request = app
.post("api/auth/sign_in_with_password")
.json(&serde_json::json!({
"username": "nope",
"password": "nope",
}))
.send()
.await?;
}));
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::UNAUTHORIZED,
Expand All @@ -46,14 +44,13 @@ async fn auth_password_nonexistent_user() -> eyre::Result<()> {
async fn auth_password_empty_creds() -> eyre::Result<()> {
let app = crate::test_app::TestApp::init().await?;

let response = app
let request = app
.post("api/auth/sign_in_with_password")
.json(&serde_json::json!({
"username": "",
"password": "",
}))
.send()
.await?;
}));
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::UNAUTHORIZED,
Expand All @@ -68,14 +65,13 @@ async fn auth_password_empty_creds() -> eyre::Result<()> {
async fn auth_password_success() -> eyre::Result<()> {
let app = crate::test_app::TestApp::init().await?;

let response = app
let request = app
.post("api/auth/sign_in_with_password")
.json(&serde_json::json!({
"username": app.test_user.username,
"password": app.test_user.password,
}))
.send()
.await?;
}));
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand Down
4 changes: 0 additions & 4 deletions src/rust/grapl-web-ui/tests/api/ingress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ async fn publish_log() -> eyre::Result<()> {

let create_response = crate::plugin::create_plugin(&app, plugin_name).await?;

//TODO: this shouldn't be necessary, but we're seeing 500 errors without it.
// I'll file a task to look into it for now so we can unblock frontend work.
std::thread::sleep(std::time::Duration::from_secs(5));

let plugin_metadata =
crate::plugin::get_plugin_metadata(&app, &create_response.plugin_id).await?;

Expand Down
92 changes: 56 additions & 36 deletions src/rust/grapl-web-ui/tests/api/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ async fn plugin_lifecycle() -> eyre::Result<()> {

let create_response = create_plugin(&app, plugin_name).await?;

//TODO: this shouldn't be necessary, but we're seeing 50 errors without it.
// I'll file a task to look into it for now so we can unblock frontend work.
std::thread::sleep(std::time::Duration::from_secs(5));

let plugin_metadata = get_plugin_metadata(&app, &create_response.plugin_id).await?;

eyre::ensure!(
Expand All @@ -59,10 +55,6 @@ async fn plugin_lifecycle() -> eyre::Result<()> {
"plugin health expected to be 'not_deployed'"
);

//TODO: this shouldn't be necessary, but we're seeing 50 errors without it.
// I'll file a task to look into it for now so we can unblock frontend work.
std::thread::sleep(std::time::Duration::from_secs(5));

deploy_plugin(&app, &plugin_id).await?;

let deployment_status = get_deployment(&app, &plugin_id).await?;
Expand Down Expand Up @@ -103,10 +95,52 @@ async fn plugin_lifecycle() -> eyre::Result<()> {
}

pub async fn create_plugin(app: &TestApp, plugin_name: &str) -> eyre::Result<CreateResponse> {
// This includes retry logic that is very similar to, and for the same reasons as,
// TestApp::send_with_retries. We deplicate that logic here because we cannot clone
// the POST body.
//
// This is a (hopefully temporary) mitigation around intermittent errors we're getting from
// the Consul sidecar in Nomad.
// See: https://github.com/grapl-security/issue-tracker/issues/1008
let mut response = _create_plugin(app, plugin_name).await?;

let num_retries = 10;
for _ in 1..num_retries {
let status_code = response.status().as_u16();

if status_code >= 500 && status_code <= 599 {
// We recevied a 500 error, wait a moment before trying the request again
println!("Error: {:?}", response);

let one_sec = std::time::Duration::from_secs(1);
std::thread::sleep(one_sec);

response = _create_plugin(app, plugin_name).await?;

continue;
} else {
break;
}
}

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
"unexpected response: {:?}",
&response
);

let response_body = response.json::<CreateResponse>().await?;

println!("create response body: {:?}", response_body);

Ok(response_body)
}

async fn _create_plugin(app: &TestApp, plugin_name: &str) -> eyre::Result<reqwest::Response> {
let create_metadata_body = serde_json::json!({
"plugin_name": plugin_name,
"plugin_type": "generator",
"event_source_id": uuid::Uuid::new_v4()
"plugin_name": plugin_name,
"plugin_type": "generator",
"event_source_id": uuid::Uuid::new_v4()
});

let generator_bytes = e2e_tests::test_fixtures::get_sysmon_generator()?;
Expand All @@ -123,27 +157,15 @@ pub async fn create_plugin(app: &TestApp, plugin_name: &str) -> eyre::Result<Cre

let response = app.post("api/plugin/create").multipart(form).send().await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
"unexpected response: {:?}",
&response
);

let response_body = response.json::<CreateResponse>().await?;

println!("create response body: {:?}", response_body);

Ok(response_body)
Ok(response)
}

pub async fn get_plugin_metadata(
app: &TestApp,
plugin_id: &uuid::Uuid,
) -> eyre::Result<GetPluginMetadataResponse> {
let response = app
.get(format!("api/plugin/get_metadata?plugin_id={plugin_id}").as_str())
.send()
.await?;
let request = app.get(format!("api/plugin/get_metadata?plugin_id={plugin_id}").as_str());
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand All @@ -163,7 +185,8 @@ async fn deploy_plugin(app: &TestApp, plugin_id: &uuid::Uuid) -> eyre::Result<()
"plugin_id": plugin_id,
});

let response = app.post("api/plugin/deploy").json(&body).send().await?;
let request = app.post("api/plugin/deploy").json(&body);
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand All @@ -178,10 +201,8 @@ async fn get_deployment(
app: &TestApp,
plugin_id: &uuid::Uuid,
) -> eyre::Result<PluginDeploymentResponse> {
let response = app
.get(format!("api/plugin/get_deployment?plugin_id={plugin_id}").as_str())
.send()
.await?;
let request = app.get(format!("api/plugin/get_deployment?plugin_id={plugin_id}").as_str());
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand All @@ -201,7 +222,8 @@ async fn tear_down(app: &TestApp, plugin_id: &uuid::Uuid) -> eyre::Result<()> {
"plugin_id": plugin_id,
});

let response = app.post("api/plugin/tear_down").json(&body).send().await?;
let request = app.post("api/plugin/tear_down").json(&body);
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand All @@ -216,10 +238,8 @@ async fn get_health(
app: &TestApp,
plugin_id: &uuid::Uuid,
) -> eyre::Result<GetPluginHealthResponse> {
let response = app
.get(format!("api/plugin/get_health?plugin_id={plugin_id}").as_str())
.send()
.await?;
let request = app.get(format!("api/plugin/get_health?plugin_id={plugin_id}").as_str());
let response = app.send_with_retries(request).await?;

eyre::ensure!(
response.status() == actix_web::http::StatusCode::OK,
Expand Down
45 changes: 45 additions & 0 deletions src/rust/grapl-web-ui/tests/api/test_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,51 @@ impl TestApp {
})
}

/// Send a reqwest::RequestBuilder and return immediately unless a 500 error was returned.
/// In that case, retry the request up to ten times before returning the last error response.
///
/// This is a (hopefully temporary) mitigation around intermittent errors we're getting from
/// the Consul sidecar in Nomad.
/// See: https://github.com/grapl-security/issue-tracker/issues/1008
pub async fn send_with_retries(
&self,
request: reqwest::RequestBuilder,
) -> eyre::Result<reqwest::Response> {
let num_retries = 10;
let mut response = request
.try_clone()
.ok_or_else(|| eyre::eyre!("Unable to clone request - perhaps it is a stream?"))?
.send()
.await?;

for _ in 1..num_retries {
let status_code = response.status().as_u16();

if status_code >= 500 && status_code <= 599 {
// We recevied a 500 error, wait a moment before trying the request again
println!("Error: {:?}", response);

let one_sec = std::time::Duration::from_secs(1);
std::thread::sleep(one_sec);

response = request
.try_clone()
.ok_or_else(|| {
eyre::eyre!("Unable to clone request - perhaps it is a stream?")
})?
.send()
.await?;

continue;
} else {
// Non-500 error, break the retry loop to return it
break;
}
}

Ok(response)
}

pub fn post(&self, path: &str) -> reqwest::RequestBuilder {
let endpoint_url = self.endpoint_url.as_str();
self.client.post(format!("{endpoint_url}{path}"))
Expand Down

0 comments on commit 2e5b328

Please sign in to comment.