diff --git a/Cargo.toml b/Cargo.toml index 5d67559..5a1f158 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "matchit" -version = "0.7.0" -license = "MIT" +version = "0.7.2" +license = "MIT AND BSD-3-Clause" authors = ["Ibraheem Ahmed "] edition = "2021" description = "A blazing fast URL router." @@ -22,6 +22,11 @@ gonzales = "0.0.3-beta" path-tree = "0.2.2" routefinder = "0.5.2" +# examples +tower = { version = "0.4", features = ["make", "util"] } +tokio = { version = "1", features = ["full"] } +hyper = { version = "0.14", features = ["full"] } + [features] default = [] __test_helpers = [] diff --git a/README.md b/README.md index c36ab6b..51de149 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,10 @@ # `matchit` -[![Documentation](https://img.shields.io/badge/docs-0.7.0-4d76ae?style=for-the-badge)](https://docs.rs/matchit) +[![Documentation](https://img.shields.io/badge/docs-0.7.2-4d76ae?style=for-the-badge)](https://docs.rs/matchit) [![Version](https://img.shields.io/crates/v/matchit?style=for-the-badge)](https://crates.io/crates/matchit) [![License](https://img.shields.io/crates/l/matchit?style=for-the-badge)](https://crates.io/crates/matchit) -[![Actions](https://img.shields.io/github/workflow/status/ibraheemdev/matchit/Rust/master?style=for-the-badge)](https://github.com/ibraheemdev/matchit/actions) -A blazing fast URL router. +A high performance, zero-copy URL router. ```rust use matchit::Router; @@ -48,9 +47,11 @@ Catch-all parameters start with `*` and match everything after the `/`. They mus let mut m = Router::new(); m.insert("/*p", true)?; -assert_eq!(m.at("/")?.params.get("p"), Some("")); assert_eq!(m.at("/foo.js")?.params.get("p"), Some("foo.js")); assert_eq!(m.at("/c/bar.css")?.params.get("p"), Some("c/bar.css")); + +// note that this would not match: +assert_eq!(m.at("/").is_err()); ``` ## Routing Priority diff --git a/examples/hyper.rs b/examples/hyper.rs new file mode 100644 index 0000000..803af5f --- /dev/null +++ b/examples/hyper.rs @@ -0,0 +1,87 @@ +use std::collections::HashMap; +use std::convert::Infallible; +use std::sync::{Arc, Mutex}; + +use hyper::server::Server; +use hyper::service::{make_service_fn, service_fn}; +use hyper::{Body, Method, Request, Response}; +use tower::util::BoxCloneService; +use tower::Service as _; + +// GET / +async fn index(_req: Request) -> hyper::Result> { + Ok(Response::new(Body::from("Hello, world!"))) +} + +// GET /blog +async fn blog(_req: Request) -> hyper::Result> { + Ok(Response::new(Body::from("..."))) +} + +// 404 handler +async fn not_found(_req: Request) -> hyper::Result> { + Ok(Response::builder().status(404).body(Body::empty()).unwrap()) +} + +// We can use `BoxCloneService` to erase the type of each handler service. +// +// We still need a `Mutex` around each service because `BoxCloneService` doesn't +// require the service to implement `Sync`. +type Service = Mutex, Response, hyper::Error>>; + +// We use a `HashMap` to hold a `Router` for each HTTP method. This allows us +// to register the same route for multiple methods. +type Router = HashMap>; + +async fn route(router: Arc, req: Request) -> hyper::Result> { + // find the subrouter for this request method + let router = match router.get(req.method()) { + Some(router) => router, + // if there are no routes for this method, respond with 405 Method Not Allowed + None => return Ok(Response::builder().status(405).body(Body::empty()).unwrap()), + }; + + // find the service for this request path + match router.at(req.uri().path()) { + Ok(found) => { + // lock the service for a very short time, just to clone the service + let mut service = found.value.lock().unwrap().clone(); + service.call(req).await + } + // if we there is no matching service, call the 404 handler + Err(_) => not_found(req).await, + } +} + +#[tokio::main] +async fn main() { + // Create a router and register our routes. + let mut router = Router::new(); + + // GET / => `index` + router + .entry(Method::GET) + .or_default() + .insert("/", BoxCloneService::new(service_fn(index)).into()) + .unwrap(); + + // GET /blog => `blog` + router + .entry(Method::GET) + .or_default() + .insert("/blog", BoxCloneService::new(service_fn(blog)).into()) + .unwrap(); + + // boilerplate for the hyper service + let router = Arc::new(router); + let make_service = make_service_fn(|_| { + let router = router.clone(); + async { Ok::<_, Infallible>(service_fn(move |request| route(router.clone(), request))) } + }); + + // run the server + Server::bind(&([127, 0, 0, 1], 3000).into()) + .serve(make_service) + .await + .unwrap() +} diff --git a/src/lib.rs b/src/lib.rs index ad87f8d..b33c5a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,8 @@ //! # `matchit` //! -//! [![Documentation](https://img.shields.io/badge/docs-0.7.0-4d76ae?style=for-the-badge)](https://docs.rs/matchit) +//! [![Documentation](https://img.shields.io/badge/docs-0.7.2-4d76ae?style=for-the-badge)](https://docs.rs/matchit) //! [![Version](https://img.shields.io/crates/v/matchit?style=for-the-badge)](https://crates.io/crates/matchit) //! [![License](https://img.shields.io/crates/l/matchit?style=for-the-badge)](https://crates.io/crates/matchit) -//! [![Actions](https://img.shields.io/github/workflow/status/ibraheemdev/matchit/Rust/master?style=for-the-badge)](https://github.com/ibraheemdev/matchit/actions) //! //! A blazing fast URL router. //! @@ -57,6 +56,9 @@ //! assert_eq!(m.at("/foo.js")?.params.get("p"), Some("foo.js")); //! assert_eq!(m.at("/c/bar.css")?.params.get("p"), Some("c/bar.css")); //! +//! // note that this would not match +//! assert!(m.at("/").is_err()); +//! //! # Ok(()) //! # } //! ``` diff --git a/src/tree.rs b/src/tree.rs index ae71b87..b00937f 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -44,7 +44,7 @@ impl Node { self.priority += 1; - // empty tree + // the tree is empty if self.prefix.is_empty() && self.children.is_empty() { let last = self.insert_child(prefix, &route, val)?; last.param_remapping = param_remapping; @@ -56,46 +56,40 @@ impl Node { 'walk: loop { // find the longest common prefix - // - // this also implies that the common prefix contains - // no ':' or '*', since the existing key can't contain - // those chars - let mut i = 0; - let max = min(prefix.len(), current.prefix.len()); - - while i < max && prefix[i] == current.prefix[i] { - i += 1; - } - - // split edge - if i < current.prefix.len() { - let mut child = Self { - prefix: current.prefix[i..].to_owned(), + let len = min(prefix.len(), current.prefix.len()); + let common_prefix = (0..len) + .find(|&i| prefix[i] != current.prefix[i]) + .unwrap_or(len); + + // the common prefix is a substring of the current node's prefix, split the node + if common_prefix < current.prefix.len() { + let child = Node { + prefix: current.prefix[common_prefix..].to_owned(), + children: mem::take(&mut current.children), wild_child: current.wild_child, indices: current.indices.clone(), value: current.value.take(), param_remapping: mem::take(&mut current.param_remapping), priority: current.priority - 1, - ..Self::default() + ..Node::default() }; - mem::swap(&mut current.children, &mut child.children); - + // the current node now holds only the common prefix current.children = vec![child]; - current.indices = current.prefix[i..=i].to_owned(); - current.prefix = prefix[..i].to_owned(); + current.indices = vec![current.prefix[common_prefix]]; + current.prefix = prefix[..common_prefix].to_owned(); current.wild_child = false; } - // make new node a child of this node - if prefix.len() > i { - prefix = &prefix[i..]; + // the route has a common prefix, search deeper + if prefix.len() > common_prefix { + prefix = &prefix[common_prefix..]; - let first = prefix[0]; + let next = prefix[0]; // `/` after param if current.node_type == NodeType::Param - && first == b'/' + && next == b'/' && current.children.len() == 1 { current = &mut current.children[0]; @@ -104,52 +98,64 @@ impl Node { continue 'walk; } - // check if a child with the next path byte exists + // find a child that matches the next path byte for mut i in 0..current.indices.len() { - if first == current.indices[i] { + // found a match + if next == current.indices[i] { i = current.update_child_priority(i); current = &mut current.children[i]; continue 'walk; } } - if first != b':' && first != b'*' && current.node_type != NodeType::CatchAll { - current.indices.push(first); - let mut child = current.add_child(Self::default()); + // not a wildcard and there is no matching child node, create a new one + if !matches!(next, b':' | b'*') && current.node_type != NodeType::CatchAll { + current.indices.push(next); + let mut child = current.add_child(Node::default()); child = current.update_child_priority(child); - current = &mut current.children[child]; - } else if current.wild_child { - // inserting a wildcard node, check if it conflicts with the existing wildcard + + // insert into the new node + let last = current.children[child].insert_child(prefix, &route, val)?; + last.param_remapping = param_remapping; + return Ok(()); + } + + // inserting a wildcard, and this node already has a wildcard child + if current.wild_child { + // wildcards are always at the end current = current.children.last_mut().unwrap(); current.priority += 1; - // check if the wildcard matches - if prefix.len() >= current.prefix.len() - && current.prefix == prefix[..current.prefix.len()] - // adding a child to a catchall Node is not possible - && current.node_type != NodeType::CatchAll + // make sure the wildcard matches + if prefix.len() < current.prefix.len() + || current.prefix != prefix[..current.prefix.len()] + // catch-alls cannot have children + || current.node_type == NodeType::CatchAll // check for longer wildcard, e.g. :name and :names - && (current.prefix.len() >= prefix.len() - || prefix[current.prefix.len()] == b'/') + || (current.prefix.len() < prefix.len() + && prefix[current.prefix.len()] != b'/') { - continue 'walk; + return Err(InsertError::conflict(&route, prefix, current)); } - return Err(InsertError::conflict(&route, prefix, current)); + continue 'walk; } + // otherwise, create the wildcard node let last = current.insert_child(prefix, &route, val)?; last.param_remapping = param_remapping; return Ok(()); } - // otherwise add value to current node + // exact match, this node should be empty if current.value.is_some() { return Err(InsertError::conflict(&route, prefix, current)); } + // add the value to current node current.value = Some(UnsafeCell::new(val)); current.param_remapping = param_remapping; + return Ok(()); } } @@ -167,34 +173,36 @@ impl Node { } } - // increments priority of the given child and reorders if necessary - // returns the new position (index) of the child - fn update_child_priority(&mut self, pos: usize) -> usize { - self.children[pos].priority += 1; - let priority = self.children[pos].priority; + // increments priority of the given child and reorders if necessary. + // + // returns the new index of the child + fn update_child_priority(&mut self, i: usize) -> usize { + self.children[i].priority += 1; + let priority = self.children[i].priority; // adjust position (move to front) - let mut new_pos = pos; - while new_pos > 0 && self.children[new_pos - 1].priority < priority { + let mut updated = i; + while updated > 0 && self.children[updated - 1].priority < priority { // swap node positions - self.children.swap(new_pos - 1, new_pos); - new_pos -= 1; + self.children.swap(updated - 1, updated); + updated -= 1; } // build new index list - if new_pos != pos { + if updated != i { self.indices = [ - &self.indices[..new_pos], // unchanged prefix, might be empty - &self.indices[pos..=pos], // the index char we move - &self.indices[new_pos..pos], // rest without char at 'pos' - &self.indices[pos + 1..], + &self.indices[..updated], // unchanged prefix, might be empty + &self.indices[i..=i], // the index char we move + &self.indices[updated..i], // rest without char at 'pos' + &self.indices[i + 1..], ] .concat(); } - new_pos + updated } + // insert a child node at this node fn insert_child( &mut self, mut prefix: &[u8], @@ -215,7 +223,7 @@ impl Node { } }; - // route parameter + // regular route parameter if wildcard[0] == b':' { // insert prefix before the current wildcard if wildcard_index > 0 { @@ -251,45 +259,45 @@ impl Node { // otherwise we're done. Insert the value in the new leaf current.value = Some(UnsafeCell::new(val)); return Ok(current); - } - // catch all route - assert_eq!(wildcard[0], b'*'); + // catch-all route + } else if wildcard[0] == b'*' { + // "/foo/*x/bar" + if wildcard_index + wildcard.len() != prefix.len() { + return Err(InsertError::InvalidCatchAll); + } - // "/foo/*catchall/bar" - if wildcard_index + wildcard.len() != prefix.len() { - return Err(InsertError::InvalidCatchAll); - } + if let Some(i) = wildcard_index.checked_sub(1) { + // "/foo/bar*x" + if prefix[i] != b'/' { + return Err(InsertError::InvalidCatchAll); + } + } - if let Some(i) = wildcard_index.checked_sub(1) { - // "/foo/bar*catchall" - if prefix[i] != b'/' { + // "*x" without leading `/` + if prefix == route && route[0] != b'/' { return Err(InsertError::InvalidCatchAll); } - } - - // "*catchall" - if prefix == route && route[0] != b'/' { - return Err(InsertError::InvalidCatchAll); - } - if wildcard_index > 0 { - current.prefix = prefix[..wildcard_index].to_owned(); - prefix = &prefix[wildcard_index..]; - } + // insert prefix before the current wildcard + if wildcard_index > 0 { + current.prefix = prefix[..wildcard_index].to_owned(); + prefix = &prefix[wildcard_index..]; + } - let child = Self { - prefix: prefix.to_owned(), - node_type: NodeType::CatchAll, - value: Some(UnsafeCell::new(val)), - priority: 1, - ..Self::default() - }; + let child = Self { + prefix: prefix.to_owned(), + node_type: NodeType::CatchAll, + value: Some(UnsafeCell::new(val)), + priority: 1, + ..Self::default() + }; - let i = current.add_child(child); - current.wild_child = true; + let i = current.add_child(child); + current.wild_child = true; - return Ok(&mut current.children[i]); + return Ok(&mut current.children[i]); + } } } } @@ -322,9 +330,9 @@ macro_rules! backtracker { } impl Node { - // It's a bit sad that we have to introduce unsafe here but rust doesn't really have a way - // to abstract over mutability, so UnsafeCell lets us avoid having to duplicate logic between - // `at` and `at_mut`. + // it's a bit sad that we have to introduce unsafe here but rust doesn't really have a way + // to abstract over mutability, so `UnsafeCell` lets us avoid having to duplicate logic between + // `at` and `at_mut` pub fn at<'n, 'p>( &'n self, full_path: &'p [u8], @@ -338,11 +346,11 @@ impl Node { 'walk: loop { backtracker!(skipped_nodes, path, current, params, backtracking, 'walk); - // the path is longer than this node's prefix - we are expecting a child node + // the path is longer than this node's prefix, we are expecting a child node if path.len() > current.prefix.len() { let (prefix, rest) = path.split_at(current.prefix.len()); - // prefix matches + // the prefix matches if prefix == current.prefix { let first = rest[0]; let consumed = path; @@ -376,8 +384,7 @@ impl Node { } } - // we didn't find a match and there are no children with wildcards, - // there is no match + // we didn't find a match and there are no children with wildcards, there is no match if !current.wild_child { // extra trailing slash if path == b"/" && current.value.is_some() { @@ -428,6 +435,11 @@ impl Node { return Err(MatchError::ExtraTrailingSlash); } + // try backtracking + if path != b"/" { + try_backtrack!(); + } + return Err(MatchError::NotFound); } // this is the last path segment @@ -503,7 +515,7 @@ impl Node { try_backtrack!(); } - // TODO: does this always means there is an extra trailing slash? + // TODO: does this *always* means there is an extra trailing slash? if path == b"/" && current.wild_child && current.node_type != NodeType::Root { return Err(MatchError::unsure(full_path)); } @@ -647,7 +659,7 @@ fn find_wildcard(path: &[u8]) -> Result, InsertError> { match c { b'/' => return Ok(Some((&path[start..start + 1 + end], start))), b':' | b'*' => return Err(InsertError::TooManyParams), - _ => (), + _ => {} } } diff --git a/tests/tree.rs b/tests/tree.rs index e439804..99e6d8a 100644 --- a/tests/tree.rs +++ b/tests/tree.rs @@ -1,5 +1,17 @@ use matchit::{InsertError, MatchError, Router}; +#[test] +fn issue_31() { + let mut router = Router::new(); + router.insert("/path/foo/:arg", "foo").unwrap(); + router.insert("/path/*rest", "wildcard").unwrap(); + + assert_eq!( + router.at("/path/foo/myarg/bar/baz").map(|m| *m.value), + Ok("wildcard") + ); +} + #[test] fn issue_22() { let mut x = Router::new(); @@ -27,6 +39,16 @@ match_tests! { "/doc/rust1.26.html", "/ʯ", "/β", + "/sd!here", + "/sd$here", + "/sd&here", + "/sd'here", + "/sd(here", + "/sd)here", + "/sd+here", + "/sd,here", + "/sd;here", + "/sd=here", ], "/a" :: "/a" => {}, "" :: "/" => None, @@ -38,7 +60,17 @@ match_tests! { "" :: "/no" => None, "/ab" :: "/ab" => {}, "/ʯ" :: "/ʯ" => {}, - "/β" :: "/β" => {} + "/β" :: "/β" => {}, + "/sd!here" :: "/sd!here" => {}, + "/sd$here" :: "/sd$here" => {}, + "/sd&here" :: "/sd&here" => {}, + "/sd'here" :: "/sd'here" => {}, + "/sd(here" :: "/sd(here" => {}, + "/sd)here" :: "/sd)here" => {}, + "/sd+here" :: "/sd+here" => {}, + "/sd,here" :: "/sd,here" => {}, + "/sd;here" :: "/sd;here" => {}, + "/sd=here" :: "/sd=here" => {}, }, wildcard { routes = [