From 93dcf858725334cf72955fce0c787802b8a90351 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Thu, 20 May 2021 19:57:35 +0200 Subject: [PATCH 1/3] gather PaginationOptions with a builder This will allow in the future to have code like: ```rust PaginationOptions::builder() .enable_seek(true) .gather(req)?; ``` --- src/controllers/category.rs | 2 +- src/controllers/helpers/pagination.rs | 42 +++++++++++++++++---------- src/controllers/keyword.rs | 3 +- src/controllers/krate/metadata.rs | 2 +- src/controllers/krate/search.rs | 4 +-- src/controllers/user/me.rs | 4 +-- 6 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/controllers/category.rs b/src/controllers/category.rs index 6ee010fb6b..be774c9a62 100644 --- a/src/controllers/category.rs +++ b/src/controllers/category.rs @@ -11,7 +11,7 @@ pub fn index(req: &mut dyn RequestExt) -> EndpointResult { // FIXME: There are 69 categories, 47 top level. This isn't going to // grow by an OoM. We need a limit for /summary, but we don't need // to paginate this. - let options = PaginationOptions::new(req)?; + let options = PaginationOptions::builder().gather(req)?; let offset = options.offset().unwrap_or_default(); let sort = query.get("sort").map_or("alpha", String::as_str); diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index dd587d1a1f..7f56476836 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -61,7 +61,23 @@ pub(crate) struct PaginationOptions { } impl PaginationOptions { - pub(crate) fn new(req: &mut dyn RequestExt) -> AppResult { + pub(crate) fn builder() -> PaginationOptionsBuilder { + PaginationOptionsBuilder + } + + pub(crate) fn offset(&self) -> Option { + if let Page::Numeric(p) = self.page { + Some((p - 1) * self.per_page) + } else { + None + } + } +} + +pub(crate) struct PaginationOptionsBuilder; + +impl PaginationOptionsBuilder { + pub(crate) fn gather(self, req: &mut dyn RequestExt) -> AppResult { const DEFAULT_PER_PAGE: u32 = 10; const MAX_PER_PAGE: u32 = 100; @@ -78,27 +94,19 @@ impl PaginationOptions { ))); } - Ok(Self { + Ok(PaginationOptions { page: Page::new(req)?, per_page, }) } - - pub(crate) fn offset(&self) -> Option { - if let Page::Numeric(p) = self.page { - Some((p - 1) * self.per_page) - } else { - None - } - } } pub(crate) trait Paginate: Sized { - fn paginate(self, req: &mut dyn RequestExt) -> AppResult> { - Ok(PaginatedQuery { + fn pages_pagination(self, options: PaginationOptions) -> PaginatedQuery { + PaginatedQuery { query: self, - options: PaginationOptions::new(req)?, - }) + options, + } } } @@ -247,14 +255,16 @@ mod tests { #[test] fn per_page_must_be_a_number() { let mut req = mock("per_page="); - let per_page_error = PaginationOptions::new(&mut req) + let per_page_error = PaginationOptions::builder() + .gather(&mut req) .unwrap_err() .response() .unwrap(); assert_eq!(per_page_error.status(), StatusCode::BAD_REQUEST); let mut req = mock("per_page=not_a_number"); - let per_page_error = PaginationOptions::new(&mut req) + let per_page_error = PaginationOptions::builder() + .gather(&mut req) .unwrap_err() .response() .unwrap(); diff --git a/src/controllers/keyword.rs b/src/controllers/keyword.rs index 19bbedab0d..1b0e09cb97 100644 --- a/src/controllers/keyword.rs +++ b/src/controllers/keyword.rs @@ -1,5 +1,6 @@ use super::prelude::*; +use crate::controllers::helpers::pagination::PaginationOptions; use crate::controllers::helpers::{pagination::Paginated, Paginate}; use crate::models::Keyword; use crate::views::EncodableKeyword; @@ -19,7 +20,7 @@ pub fn index(req: &mut dyn RequestExt) -> EndpointResult { query = query.order(keywords::keyword.asc()); } - let query = query.paginate(req)?; + let query = query.pages_pagination(PaginationOptions::builder().gather(req)?); let conn = req.db_read_only()?; let data: Paginated = query.load(&*conn)?; let total = data.total(); diff --git a/src/controllers/krate/metadata.rs b/src/controllers/krate/metadata.rs index dcae13f992..0d51431ddd 100644 --- a/src/controllers/krate/metadata.rs +++ b/src/controllers/krate/metadata.rs @@ -247,7 +247,7 @@ pub fn versions(req: &mut dyn RequestExt) -> EndpointResult { pub fn reverse_dependencies(req: &mut dyn RequestExt) -> EndpointResult { use diesel::dsl::any; - let pagination_options = PaginationOptions::new(req)?; + let pagination_options = PaginationOptions::builder().gather(req)?; let name = &req.params()["crate_id"]; let conn = req.db_read_only()?; let krate: Crate = Crate::by_name(name).first(&*conn)?; diff --git a/src/controllers/krate/search.rs b/src/controllers/krate/search.rs index 0df3f36626..d7f59111c4 100644 --- a/src/controllers/krate/search.rs +++ b/src/controllers/krate/search.rs @@ -12,7 +12,7 @@ use crate::schema::*; use crate::util::errors::{bad_request, ChainError}; use crate::views::EncodableCrate; -use crate::controllers::helpers::pagination::Paginated; +use crate::controllers::helpers::pagination::{Paginated, PaginationOptions}; use crate::models::krate::{canon_crate_name, ALL_COLUMNS}; /// Handles the `GET /crates` route. @@ -194,7 +194,7 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { query = query.then_order_by(crates::name.asc()) } - let query = query.paginate(req)?; + let query = query.pages_pagination(PaginationOptions::builder().gather(req)?); let conn = req.db_read_only()?; let data: Paginated<(Crate, bool, Option)> = query.load(&*conn)?; let total = data.total(); diff --git a/src/controllers/user/me.rs b/src/controllers/user/me.rs index 1d1640895d..8ccb89e078 100644 --- a/src/controllers/user/me.rs +++ b/src/controllers/user/me.rs @@ -4,7 +4,7 @@ use crate::controllers::frontend_prelude::*; use crate::controllers::helpers::*; -use crate::controllers::helpers::pagination::Paginated; +use crate::controllers::helpers::pagination::{Paginated, PaginationOptions}; use crate::models::{ CrateOwner, Email, Follow, NewEmail, OwnerKind, User, Version, VersionOwnerAction, }; @@ -68,7 +68,7 @@ pub fn updates(req: &mut dyn RequestExt) -> EndpointResult { crates::name, users::all_columns.nullable(), )) - .paginate(req)?; + .pages_pagination(PaginationOptions::builder().gather(req)?); let conn = req.db_conn()?; let data: Paginated<(Version, String, Option)> = query.load(&*conn)?; let more = data.next_page_params().is_some(); From 5b18e0c5d358635f85a77989ea8475d4ce330109 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Fri, 21 May 2021 14:36:34 +0200 Subject: [PATCH 2/3] refactor gathering pagination options and add tests --- src/controllers/helpers/pagination.rs | 200 ++++++++++++++------------ src/controllers/krate/search.rs | 6 +- 2 files changed, 113 insertions(+), 93 deletions(-) diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 7f56476836..f04fdcb6d3 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -4,65 +4,35 @@ use crate::models::helpers::with_count::*; use crate::util::errors::{bad_request, AppResult}; use crate::util::request_header; +use crate::App; use diesel::pg::Pg; use diesel::query_builder::*; use diesel::query_dsl::LoadQuery; use diesel::sql_types::BigInt; use indexmap::IndexMap; +use std::sync::Arc; -#[derive(Debug, Clone, Copy)] +const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10; +const DEFAULT_PER_PAGE: u32 = 10; +const MAX_PER_PAGE: u32 = 100; + +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum Page { Numeric(u32), Unspecified, } -impl Page { - fn new(req: &mut dyn RequestExt) -> AppResult { - const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10; - - let params = req.query(); - if let Some(s) = params.get("page") { - let numeric_page = s.parse().map_err(|e| bad_request(&e))?; - if numeric_page < 1 { - return Err(bad_request(&format_args!( - "page indexing starts from 1, page {} is invalid", - numeric_page, - ))); - } - - if numeric_page > MAX_PAGE_BEFORE_SUSPECTED_BOT { - req.log_metadata("bot", "suspected"); - } - - // Block large offsets for known violators of the crawler policy - let config = &req.app().config; - let user_agent = request_header(req, header::USER_AGENT); - if numeric_page > config.max_allowed_page_offset - && config - .page_offset_ua_blocklist - .iter() - .any(|blocked| user_agent.contains(blocked)) - { - add_custom_metadata(req, "cause", "large page offset"); - return Err(bad_request("requested page offset is too large")); - } - - Ok(Page::Numeric(numeric_page)) - } else { - Ok(Page::Unspecified) - } - } -} - -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone)] pub(crate) struct PaginationOptions { - page: Page, + pub(crate) page: Page, pub(crate) per_page: u32, } impl PaginationOptions { pub(crate) fn builder() -> PaginationOptionsBuilder { - PaginationOptionsBuilder + PaginationOptionsBuilder { + limit_page_numbers: None, + } } pub(crate) fn offset(&self) -> Option { @@ -74,19 +44,57 @@ impl PaginationOptions { } } -pub(crate) struct PaginationOptionsBuilder; +pub(crate) struct PaginationOptionsBuilder { + limit_page_numbers: Option>, +} impl PaginationOptionsBuilder { - pub(crate) fn gather(self, req: &mut dyn RequestExt) -> AppResult { - const DEFAULT_PER_PAGE: u32 = 10; - const MAX_PER_PAGE: u32 = 100; + pub(crate) fn limit_page_numbers(mut self, app: Arc) -> Self { + self.limit_page_numbers = Some(app); + self + } + pub(crate) fn gather(self, req: &mut dyn RequestExt) -> AppResult { let params = req.query(); + let page_param = params.get("page"); + + let page = if let Some(s) = page_param { + let numeric_page = s.parse().map_err(|e| bad_request(&e))?; + if numeric_page < 1 { + return Err(bad_request(&format_args!( + "page indexing starts from 1, page {} is invalid", + numeric_page, + ))); + } + + if numeric_page > MAX_PAGE_BEFORE_SUSPECTED_BOT { + req.log_metadata("bot", "suspected"); + } + + // Block large offsets for known violators of the crawler policy + if let Some(app) = self.limit_page_numbers { + let config = &app.config; + let user_agent = request_header(req, header::USER_AGENT); + if numeric_page > config.max_allowed_page_offset + && config + .page_offset_ua_blocklist + .iter() + .any(|blocked| user_agent.contains(blocked)) + { + add_custom_metadata(req, "cause", "large page offset"); + return Err(bad_request("requested page offset is too large")); + } + } + + Page::Numeric(numeric_page) + } else { + Page::Unspecified + }; + let per_page = params .get("per_page") .map(|s| s.parse().map_err(|e| bad_request(&e))) .unwrap_or(Ok(DEFAULT_PER_PAGE))?; - if per_page > MAX_PER_PAGE { return Err(bad_request(&format_args!( "cannot request more than {} items", @@ -94,10 +102,7 @@ impl PaginationOptionsBuilder { ))); } - Ok(PaginationOptions { - page: Page::new(req)?, - per_page, - }) + Ok(PaginationOptions { page, per_page }) } } @@ -139,14 +144,10 @@ impl Paginated { } pub(crate) fn prev_page_params(&self) -> Option> { - if let Page::Numeric(1) | Page::Unspecified = self.options.page { - return None; - } - let mut opts = IndexMap::new(); match self.options.page { + Page::Numeric(1) | Page::Unspecified => return None, Page::Numeric(n) => opts.insert("page".into(), (n - 1).to_string()), - Page::Unspecified => unreachable!(), }; Some(opts) } @@ -176,7 +177,7 @@ impl PaginatedQuery { where Self: LoadQuery>, { - let options = self.options; + let options = self.options.clone(); let records_and_total = self.internal_load(conn)?; Ok(Paginated { records_and_total, @@ -215,59 +216,74 @@ where #[cfg(test)] mod tests { - use super::{Page, PaginationOptions}; - + use super::*; use conduit::StatusCode; - use conduit_test::MockRequest; + use conduit_test::{MockRequest, ResponseExt}; - fn mock(query: &str) -> MockRequest { - let mut req = MockRequest::new(http::Method::GET, ""); - req.with_query(query); - req + #[test] + fn no_pagination_param() { + let pagination = PaginationOptions::builder().gather(&mut mock("")).unwrap(); + assert_eq!(Page::Unspecified, pagination.page); + assert_eq!(DEFAULT_PER_PAGE, pagination.per_page); } #[test] - fn page_must_be_a_number() { - let mut req = mock("page="); - let page_error = Page::new(&mut req).unwrap_err().response().unwrap(); - - assert_eq!(page_error.status(), StatusCode::BAD_REQUEST); + fn page_param_parsing() { + let assert_error = + |query, msg| assert_pagination_error(PaginationOptions::builder(), query, msg); - let mut req = mock("page=not_a_number"); - let page_error = Page::new(&mut req).unwrap_err().response().unwrap(); + assert_error("page=", "cannot parse integer from empty string"); + assert_error("page=not_a_number", "invalid digit found in string"); + assert_error("page=1.0", "invalid digit found in string"); + assert_error("page=0", "page indexing starts from 1, page 0 is invalid"); - assert_eq!(page_error.status(), StatusCode::BAD_REQUEST); + let pagination = PaginationOptions::builder() + .gather(&mut mock("page=5")) + .unwrap(); + assert_eq!(Page::Numeric(5), pagination.page); } #[test] - fn page_must_be_non_zero_positive_integer() { - let mut req = mock("page=0"); - let page_error = Page::new(&mut req).unwrap_err().response().unwrap(); + fn per_page_param_parsing() { + let assert_error = + |query, msg| assert_pagination_error(PaginationOptions::builder(), query, msg); - assert_eq!(page_error.status(), StatusCode::BAD_REQUEST); + assert_error("per_page=", "cannot parse integer from empty string"); + assert_error("per_page=not_a_number", "invalid digit found in string"); + assert_error("per_page=1.0", "invalid digit found in string"); + assert_error("per_page=101", "cannot request more than 100 items"); - let mut req = mock("page=1.0"); - let page_error = Page::new(&mut req).unwrap_err().response().unwrap(); + let pagination = PaginationOptions::builder() + .gather(&mut mock("per_page=5")) + .unwrap(); + assert_eq!(5, pagination.per_page); + } - assert_eq!(page_error.status(), StatusCode::BAD_REQUEST); + fn mock(query: &str) -> MockRequest { + let mut req = MockRequest::new(http::Method::GET, ""); + req.with_query(query); + req } - #[test] - fn per_page_must_be_a_number() { - let mut req = mock("per_page="); - let per_page_error = PaginationOptions::builder() - .gather(&mut req) + fn assert_pagination_error(options: PaginationOptionsBuilder, query: &str, message: &str) { + let response = options + .gather(&mut mock(query)) .unwrap_err() .response() .unwrap(); - assert_eq!(per_page_error.status(), StatusCode::BAD_REQUEST); - let mut req = mock("per_page=not_a_number"); - let per_page_error = PaginationOptions::builder() - .gather(&mut req) - .unwrap_err() - .response() - .unwrap(); - assert_eq!(per_page_error.status(), StatusCode::BAD_REQUEST); + assert_eq!(StatusCode::BAD_REQUEST, response.status()); + + #[derive(Deserialize)] + struct Parsed { + errors: [ParsedError; 1], + } + #[derive(Deserialize)] + struct ParsedError { + detail: String, + } + + let parsed: Parsed = serde_json::from_slice(&response.into_cow()).unwrap(); + assert_eq!(message, parsed.errors[0].detail); } } diff --git a/src/controllers/krate/search.rs b/src/controllers/krate/search.rs index d7f59111c4..583bbfd7df 100644 --- a/src/controllers/krate/search.rs +++ b/src/controllers/krate/search.rs @@ -194,7 +194,11 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { query = query.then_order_by(crates::name.asc()) } - let query = query.pages_pagination(PaginationOptions::builder().gather(req)?); + let query = query.pages_pagination( + PaginationOptions::builder() + .limit_page_numbers(req.app().clone()) + .gather(req)?, + ); let conn = req.db_read_only()?; let data: Paginated<(Crate, bool, Option)> = query.load(&*conn)?; let total = data.total(); From 5cfdbcaea943909087c12961fc4a270d96cb055c Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Thu, 13 May 2021 17:39:49 +0200 Subject: [PATCH 3/3] add basic support for seek-based pagination in the crates endpoint --- src/controllers/helpers/pagination.rs | 105 ++++++++++++++++++++++- src/controllers/krate/search.rs | 115 +++++++++++++++++++++++--- src/tests/krate/search.rs | 100 ++++++++++++++++++++-- 3 files changed, 301 insertions(+), 19 deletions(-) diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index f04fdcb6d3..c42663ce2b 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -10,6 +10,7 @@ use diesel::query_builder::*; use diesel::query_dsl::LoadQuery; use diesel::sql_types::BigInt; use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; use std::sync::Arc; const MAX_PAGE_BEFORE_SUSPECTED_BOT: u32 = 10; @@ -19,6 +20,7 @@ const MAX_PER_PAGE: u32 = 100; #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum Page { Numeric(u32), + Seek(RawSeekPayload), Unspecified, } @@ -32,6 +34,7 @@ impl PaginationOptions { pub(crate) fn builder() -> PaginationOptionsBuilder { PaginationOptionsBuilder { limit_page_numbers: None, + enable_seek: false, } } @@ -46,6 +49,7 @@ impl PaginationOptions { pub(crate) struct PaginationOptionsBuilder { limit_page_numbers: Option>, + enable_seek: bool, } impl PaginationOptionsBuilder { @@ -54,9 +58,21 @@ impl PaginationOptionsBuilder { self } + pub(crate) fn enable_seek(mut self, enable: bool) -> Self { + self.enable_seek = enable; + self + } + pub(crate) fn gather(self, req: &mut dyn RequestExt) -> AppResult { let params = req.query(); let page_param = params.get("page"); + let seek_param = params.get("seek"); + + if seek_param.is_some() && page_param.is_some() { + return Err(bad_request( + "providing both ?page= and ?seek= is unsupported", + )); + } let page = if let Some(s) = page_param { let numeric_page = s.parse().map_err(|e| bad_request(&e))?; @@ -87,6 +103,12 @@ impl PaginationOptionsBuilder { } Page::Numeric(numeric_page) + } else if let Some(s) = seek_param { + if self.enable_seek { + Page::Seek(RawSeekPayload(s.clone())) + } else { + return Err(bad_request("?seek= is not supported for this request")); + } } else { Page::Unspecified }; @@ -139,6 +161,7 @@ impl Paginated { match self.options.page { Page::Numeric(n) => opts.insert("page".into(), (n + 1).to_string()), Page::Unspecified => opts.insert("page".into(), 2.to_string()), + Page::Seek(_) => return None, }; Some(opts) } @@ -146,7 +169,7 @@ impl Paginated { pub(crate) fn prev_page_params(&self) -> Option> { let mut opts = IndexMap::new(); match self.options.page { - Page::Numeric(1) | Page::Unspecified => return None, + Page::Numeric(1) | Page::Unspecified | Page::Seek(_) => return None, Page::Numeric(n) => opts.insert("page".into(), (n - 1).to_string()), }; Some(opts) @@ -214,6 +237,35 @@ where } } +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct RawSeekPayload(String); + +impl RawSeekPayload { + pub(crate) fn decode Deserialize<'a>>(&self) -> AppResult { + decode_seek(&self.0) + } +} + +/// Encode a payload to be used as a seek key. +/// +/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no +/// technical measure to prevent API consumers for manually creating or modifying them, but +/// hopefully the base64 will be enough to convey that doing it is unsupported. +pub(crate) fn encode_seek(params: S) -> AppResult { + Ok(base64::encode_config( + &serde_json::to_vec(¶ms)?, + base64::URL_SAFE_NO_PAD, + )) +} + +/// Decode a list of params previously encoded with [`encode_seek`]. +pub(crate) fn decode_seek Deserialize<'a>>(seek: &str) -> AppResult { + Ok(serde_json::from_slice(&base64::decode_config( + seek, + base64::URL_SAFE_NO_PAD, + )?)?) +} + #[cfg(test)] mod tests { use super::*; @@ -259,6 +311,57 @@ mod tests { assert_eq!(5, pagination.per_page); } + #[test] + fn seek_param_parsing() { + assert_pagination_error( + PaginationOptions::builder(), + "seek=OTg", + "?seek= is not supported for this request", + ); + + let pagination = PaginationOptions::builder() + .enable_seek(true) + .gather(&mut mock("seek=OTg")) + .unwrap(); + + if let Page::Seek(raw) = pagination.page { + assert_eq!(98, raw.decode::().unwrap()); + } else { + panic!( + "did not parse a seek page, parsed {:?} instead", + pagination.page + ); + } + } + + #[test] + fn both_page_and_seek() { + assert_pagination_error( + PaginationOptions::builder(), + "page=1&seek=OTg", + "providing both ?page= and ?seek= is unsupported", + ); + assert_pagination_error( + PaginationOptions::builder().enable_seek(true), + "page=1&seek=OTg", + "providing both ?page= and ?seek= is unsupported", + ); + } + + #[test] + fn test_seek_encode_and_decode() { + // Encoding produces the results we expect + assert_eq!("OTg", encode_seek(98).unwrap()); + assert_eq!("WyJmb28iLDQyXQ", encode_seek(("foo", 42)).unwrap()); + + // Encoded values can be then decoded. + assert_eq!(98, decode_seek::(&encode_seek(98).unwrap()).unwrap(),); + assert_eq!( + ("foo".into(), 42), + decode_seek::<(String, i32)>(&encode_seek(("foo", 42)).unwrap()).unwrap(), + ); + } + fn mock(query: &str) -> MockRequest { let mut req = MockRequest::new(http::Method::GET, ""); req.with_query(query); diff --git a/src/controllers/krate/search.rs b/src/controllers/krate/search.rs index 583bbfd7df..917d818c71 100644 --- a/src/controllers/krate/search.rs +++ b/src/controllers/krate/search.rs @@ -2,6 +2,7 @@ use diesel::dsl::*; use diesel_full_text_search::*; +use indexmap::IndexMap; use crate::controllers::cargo_prelude::*; use crate::controllers::helpers::Paginate; @@ -12,7 +13,7 @@ use crate::schema::*; use crate::util::errors::{bad_request, ChainError}; use crate::views::EncodableCrate; -use crate::controllers::helpers::pagination::{Paginated, PaginationOptions}; +use crate::controllers::helpers::pagination::{Page, Paginated, PaginationOptions}; use crate::models::krate::{canon_crate_name, ALL_COLUMNS}; /// Handles the `GET /crates` route. @@ -56,7 +57,13 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { .select(selection) .into_boxed(); + let mut supports_seek = true; + if let Some(q_string) = params.get("q") { + // Searching with a query string always puts the exact match at the start of the results, + // so we can't support seek-based pagination with it. + supports_seek = false; + if !q_string.is_empty() { let sort = params.get("sort").map(|s| &**s).unwrap_or("relevance"); @@ -84,6 +91,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { } if let Some(cat) = params.get("category") { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + query = query.filter( crates::id.eq_any( crates_categories::table @@ -102,6 +112,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { use diesel::sql_types::Array; sql_function!(#[aggregate] fn array_agg(x: T) -> Array); + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + let names: Vec<_> = kws .split_whitespace() .map(|name| name.to_lowercase()) @@ -120,6 +133,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ), ); } else if let Some(kw) = params.get("keyword") { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + query = query.filter( crates::id.eq_any( crates_keywords::table @@ -129,6 +145,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ), ); } else if let Some(letter) = params.get("letter") { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + let pattern = format!( "{}%", letter @@ -140,6 +159,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ); query = query.filter(canon_crate_name(crates::name).like(pattern)); } else if let Some(user_id) = params.get("user_id").and_then(|s| s.parse::().ok()) { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + query = query.filter( crates::id.eq_any( CrateOwner::by_owner_kind(OwnerKind::User) @@ -148,6 +170,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ), ); } else if let Some(team_id) = params.get("team_id").and_then(|s| s.parse::().ok()) { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + query = query.filter( crates::id.eq_any( CrateOwner::by_owner_kind(OwnerKind::Team) @@ -156,6 +181,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ), ); } else if params.get("following").is_some() { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + let user_id = req.authenticate()?.user_id(); query = query.filter( crates::id.eq_any( @@ -165,6 +193,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { ), ); } else if params.get("ids[]").is_some() { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + let query_bytes = req.query_string().unwrap_or("").as_bytes(); let ids: Vec<_> = url::form_urlencoded::parse(query_bytes) .filter(|(key, _)| key == "ids[]") @@ -175,6 +206,9 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { } if !include_yanked { + // Calculating the total number of results with filters is not supported yet. + supports_seek = false; + query = query.filter(exists( versions::table .filter(versions::crate_id.eq(crates::id)) @@ -183,28 +217,89 @@ pub fn search(req: &mut dyn RequestExt) -> EndpointResult { } if sort == Some("downloads") { + // Custom sorting is not supported yet with seek. + supports_seek = false; + query = query.then_order_by(crates::downloads.desc()) } else if sort == Some("recent-downloads") { + // Custom sorting is not supported yet with seek. + supports_seek = false; + query = query.then_order_by(recent_crate_downloads::downloads.desc().nulls_last()) } else if sort == Some("recent-updates") { + // Custom sorting is not supported yet with seek. + supports_seek = false; + query = query.order(crates::updated_at.desc()); } else if sort == Some("new") { + // Custom sorting is not supported yet with seek. + supports_seek = false; + query = query.order(crates::created_at.desc()); } else { query = query.then_order_by(crates::name.asc()) } - let query = query.pages_pagination( - PaginationOptions::builder() - .limit_page_numbers(req.app().clone()) - .gather(req)?, - ); + let pagination: PaginationOptions = PaginationOptions::builder() + .limit_page_numbers(req.app().clone()) + .enable_seek(supports_seek) + .gather(req)?; let conn = req.db_read_only()?; - let data: Paginated<(Crate, bool, Option)> = query.load(&*conn)?; - let total = data.total(); - let next_page = data.next_page_params().map(|p| req.query_with_params(p)); - let prev_page = data.prev_page_params().map(|p| req.query_with_params(p)); + let (explicit_page, seek) = match pagination.page.clone() { + Page::Numeric(_) => (true, None), + Page::Seek(s) => (false, Some(s.decode::()?)), + Page::Unspecified => (false, None), + }; + + // To avoid breaking existing users, seek-based pagination is only used if an explicit page has + // not been provided. This way clients relying on meta.next_page will use the faster seek-based + // paginations, while client hardcoding pages handling will use the slower offset-based code. + let (total, next_page, prev_page, data, conn) = if supports_seek && !explicit_page { + // Equivalent of: + // `WHERE name > (SELECT name FROM crates WHERE id = $1) LIMIT $2` + query = query.limit(pagination.per_page as i64); + if let Some(seek) = seek { + let crate_name: String = crates::table + .find(seek) + .select(crates::name) + .get_result(&*conn)?; + query = query.filter(crates::name.gt(crate_name)); + } + + // This does a full index-only scan over the crates table to gather how many crates were + // published. Unfortunately on PostgreSQL counting the rows in a table requires scanning + // the table, and the `total` field is part of the stable registries API. + // + // If this becomes a problem in the future the crates count could be denormalized, at least + // for the filterless happy path. + let total: i64 = crates::table.count().get_result(&*conn)?; + + let results: Vec<(Crate, bool, Option)> = query.load(&*conn)?; + + let next_page = if let Some(last) = results.last() { + let mut params = IndexMap::new(); + params.insert( + "seek".into(), + crate::controllers::helpers::pagination::encode_seek(last.0.id)?, + ); + Some(req.query_with_params(params)) + } else { + None + }; + + (total, next_page, None, results, conn) + } else { + let query = query.pages_pagination(pagination); + let data: Paginated<(Crate, bool, Option)> = query.load(&*conn)?; + ( + data.total(), + data.next_page_params().map(|p| req.query_with_params(p)), + data.prev_page_params().map(|p| req.query_with_params(p)), + data.into_iter().collect::>(), + conn, + ) + }; let perfect_matches = data.iter().map(|&(_, b, _)| b).collect::>(); let recent_downloads = data diff --git a/src/tests/krate/search.rs b/src/tests/krate/search.rs index a20c2648cd..e589f5aa8f 100644 --- a/src/tests/krate/search.rs +++ b/src/tests/krate/search.rs @@ -695,17 +695,101 @@ fn pagination_links_included_if_applicable() { CrateBuilder::new("pagination_links_3", user.id).expect_build(conn); }); - let page1 = anon.search("per_page=1"); - let page2 = anon.search("page=2&per_page=1"); - let page3 = anon.search("page=3&per_page=1"); - let page4 = anon.search("page=4&per_page=1"); + // This uses a filter (`letter=p`) to disable seek-based pagination, as seek-based pagination + // does not return page numbers. If the test fails after expanding the scope of seek-based + // pagination replace the filter with something else still using pages. - assert_eq!(Some("?per_page=1&page=2".to_string()), page1.meta.next_page); + let page1 = anon.search("letter=p&per_page=1"); + let page2 = anon.search("letter=p&page=2&per_page=1"); + let page3 = anon.search("letter=p&page=3&per_page=1"); + let page4 = anon.search("letter=p&page=4&per_page=1"); + + assert_eq!( + Some("?letter=p&per_page=1&page=2".to_string()), + page1.meta.next_page + ); assert_eq!(None, page1.meta.prev_page); - assert_eq!(Some("?page=3&per_page=1".to_string()), page2.meta.next_page); - assert_eq!(Some("?page=1&per_page=1".to_string()), page2.meta.prev_page); + assert_eq!( + Some("?letter=p&page=3&per_page=1".to_string()), + page2.meta.next_page + ); + assert_eq!( + Some("?letter=p&page=1&per_page=1".to_string()), + page2.meta.prev_page + ); assert_eq!(None, page4.meta.next_page); - assert_eq!(Some("?page=2&per_page=1".to_string()), page3.meta.prev_page); + assert_eq!( + Some("?letter=p&page=2&per_page=1".to_string()), + page3.meta.prev_page + ); +} + +#[test] +fn seek_based_pagination() { + let (app, anon, user) = TestApp::init().with_user(); + let user = user.as_model(); + + app.db(|conn| { + CrateBuilder::new("pagination_links_1", user.id).expect_build(conn); + CrateBuilder::new("pagination_links_2", user.id).expect_build(conn); + CrateBuilder::new("pagination_links_3", user.id).expect_build(conn); + }); + + let mut url = Some("?per_page=1".to_string()); + let mut results = Vec::new(); + let mut calls = 0; + while let Some(current_url) = url.take() { + let resp = anon.search(current_url.trim_start_matches('?')); + calls += 1; + + results.append( + &mut resp + .crates + .iter() + .map(|res| res.name.clone()) + .collect::>(), + ); + + if let Some(new_url) = resp.meta.next_page { + assert_eq!(1, resp.crates.len()); + url = Some(new_url); + } else { + assert!(resp.crates.is_empty()); + } + + assert_eq!(None, resp.meta.prev_page); + assert_eq!(3, resp.meta.total); + } + + assert_eq!(4, calls); + assert_eq!( + vec![ + "pagination_links_1", + "pagination_links_2", + "pagination_links_3" + ], + results + ); +} + +#[test] +fn test_pages_work_even_with_seek_based_pagination() { + let (app, anon, user) = TestApp::init().with_user(); + let user = user.as_model(); + + app.db(|conn| { + CrateBuilder::new("pagination_links_1", user.id).expect_build(conn); + CrateBuilder::new("pagination_links_2", user.id).expect_build(conn); + CrateBuilder::new("pagination_links_3", user.id).expect_build(conn); + }); + + // The next_page returned by the request is seek-based + let first = anon.search("per_page=1"); + assert!(first.meta.next_page.unwrap().contains("seek=")); + + // Calling with page=2 will revert to offset-based pagination + let second = anon.search("page=2&per_page=1"); + assert!(second.meta.next_page.unwrap().contains("page=3")); } #[test]