diff --git a/Cargo.toml b/Cargo.toml index 26f9144..12902df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ chrono = { version = "0.4.31", optional = true, default-features = false, featur ] } libc = "0.2.149" scopeguard = "1.2.0" -url = "2.4.1" +urlencoding = "2.1.3" once_cell = "1.18.0" [target.'cfg(any(target_os = "dragonfly", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies] diff --git a/src/freedesktop.rs b/src/freedesktop.rs index 8373ef9..b8f85fb 100644 --- a/src/freedesktop.rs +++ b/src/freedesktop.rs @@ -7,12 +7,16 @@ //! use std::{ - borrow::Borrow, + borrow::{Borrow, Cow}, collections::HashSet, + ffi::{OsStr, OsString}, fs::{self, File, OpenOptions}, io::{BufRead, BufReader, Write}, - os::unix::{ffi::OsStrExt, fs::PermissionsExt}, - path::{Path, PathBuf}, + os::unix::{ + ffi::{OsStrExt, OsStringExt}, + fs::PermissionsExt, + }, + path::{Component, Path, PathBuf}, }; use log::{debug, warn}; @@ -136,13 +140,16 @@ pub fn list() -> Result, Error> { let value = split.next().unwrap().trim(); if key == "Path" { - let mut value_path = Path::new(value).to_owned(); - if value_path.is_relative() { - value_path = top_dir.join(value_path); - } - let full_path_utf8 = PathBuf::from(parse_uri_path(&value_path)); - name = Some(full_path_utf8.file_name().unwrap().to_str().unwrap().to_owned()); - let parent = full_path_utf8.parent().unwrap(); + let value_path = { + let path = Path::new(value); + if path.is_relative() { + decode_uri_path(top_dir.join(path)) + } else { + decode_uri_path(path) + } + }; + name = value_path.file_name().map(|name| name.to_owned()); + let parent = value_path.parent().expect("Absolute path to trashed item should have a parent"); original_parent = Some(parent.into()); } else if key == "DeletionDate" { #[cfg(feature = "chrono")] @@ -439,15 +446,20 @@ fn move_to_trash( // already exist. This newly created empty file can then be safely overwritten by the src file // using the `rename` function. let filename = src.file_name().unwrap(); - let mut appendage = 0; + let mut appendage = 0usize; loop { appendage += 1; - let in_trash_name = if appendage > 1 { - format!("{}.{}", filename.to_str().unwrap(), appendage) + let in_trash_name: Cow<'_, OsStr> = if appendage > 1 { + let mut trash_name = filename.to_owned(); + trash_name.push(format!(".{appendage}")); + trash_name.into() } else { - filename.to_str().unwrap().into() + filename.into() }; - let info_name = format!("{in_trash_name}.trashinfo"); + // Length of name + length of '.trashinfo' + let mut info_name = OsString::with_capacity(in_trash_name.len() + 10); + info_name.push(&in_trash_name); + info_name.push(".trashinfo"); let info_file_path = info_folder.join(&info_name); let info_result = OpenOptions::new().create_new(true).write(true).open(&info_file_path); match info_result { @@ -574,15 +586,31 @@ fn try_creating_placeholders(src: impl AsRef, dst: impl AsRef) -> Re Ok(()) } -fn parse_uri_path(absolute_file_path: impl AsRef) -> String { - let file_path_chars = absolute_file_path.as_ref().to_str().unwrap().chars(); - let url: String = "file://".chars().chain(file_path_chars).collect(); - return url::Url::parse(&url).unwrap().to_file_path().unwrap().to_str().unwrap().into(); +fn decode_uri_path(path: impl AsRef) -> PathBuf { + // Paths may be invalid Unicode on most Unixes so they should be treated as byte strings + // A higher level crate, such as `url`, can't be used directly since its API intakes valid Rust + // strings. Thus, the easiest way is to manually decode each segment of the path and recombine. + path.as_ref().iter().map(|part| OsString::from_vec(urlencoding::decode_binary(part.as_bytes()).to_vec())).collect() } -fn encode_uri_path(absolute_file_path: impl AsRef) -> String { - let url = url::Url::from_file_path(absolute_file_path.as_ref()).unwrap(); - url.path().to_owned() +fn encode_uri_path(path: impl AsRef) -> String { + // `iter()` cannot be used here because it yields '/' in certain situations, such as + // for root directories. + // Slashes would be encoded and thus mess up the path + let path: PathBuf = path + .as_ref() + .components() + .map(|component| { + // Only encode names and not '/', 'C:\', et cetera + if let Component::Normal(part) = component { + urlencoding::encode_binary(part.as_bytes()).to_string() + } else { + component.as_os_str().to_str().expect("Path components such as '/' are valid Unicode").to_owned() + } + }) + .collect(); + + path.to_str().expect("URL encoded bytes is valid Unicode").to_owned() } #[derive(Eq, PartialEq, Debug)] @@ -849,10 +877,10 @@ mod tests { use std::{ collections::{hash_map::Entry, HashMap}, env, - ffi::OsString, + ffi::{OsStr, OsString}, fmt, fs::File, - os::unix, + os::unix::{self, ffi::OsStringExt}, path::{Path, PathBuf}, process::Command, }; @@ -862,10 +890,13 @@ mod tests { use crate::{ canonicalize_paths, delete, delete_all, os_limited::{list, purge_all, restore_all}, + platform::encode_uri_path, tests::get_unique_name, Error, }; + use super::decode_uri_path; + #[test] #[serial] fn test_list() { @@ -874,7 +905,7 @@ mod tests { let file_name_prefix = get_unique_name(); let batches: usize = 2; let files_per_batch: usize = 3; - let names: Vec<_> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i)).collect(); + let names: Vec = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect(); for _ in 0..batches { for path in &names { File::create(path).unwrap(); @@ -890,8 +921,10 @@ mod tests { } } let items = list().unwrap(); - let items: HashMap<_, Vec<_>> = - items.into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).fold(HashMap::new(), |mut map, x| { + let items: HashMap<_, Vec<_>> = items + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .fold(HashMap::new(), |mut map, x| { match map.entry(x.name.clone()) { Entry::Occupied(mut entry) => { entry.get_mut().push(x); @@ -905,7 +938,7 @@ mod tests { for name in names { match items.get(&name) { Some(items) => assert_eq!(items.len(), batches), - None => panic!("ERROR Could not find '{}' in {:#?}", name, items), + None => panic!("ERROR Could not find '{:?}' in {:#?}", name, items), } } @@ -921,8 +954,8 @@ mod tests { let file_name_prefix = get_unique_name(); let file_count = 2; - let names: Vec<_> = (0..file_count).map(|i| format!("{}#{}", file_name_prefix, i)).collect(); - let symlink_names: Vec<_> = names.iter().map(|name| format!("{}-symlink", name)).collect(); + let names: Vec = (0..file_count).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect(); + let symlink_names: Vec = names.iter().map(|name| format!("{:?}-symlink", name).into()).collect(); // Test file symbolic link and directory symbolic link File::create(&names[0]).unwrap(); @@ -951,6 +984,34 @@ mod tests { } } + #[test] + fn uri_enc_dec_roundtrip() { + let fake = format!("/tmp/{}", get_unique_name()); + let path = decode_uri_path(&fake); + + assert_eq!(path.to_str().expect("Path is valid Unicode"), fake, "Decoded path shouldn't be different"); + + let encoded = encode_uri_path(&path); + assert_eq!(encoded, fake, "URL encoded alphanumeric String shouldn't change"); + } + + #[test] + fn uri_enc_dec_roundtrip_invalid_unicode() { + let base = OsStr::new(&format!("/tmp/{}/", get_unique_name())).to_os_string(); + + // Add invalid UTF-8 byte + let mut bytes = base.into_encoded_bytes(); + bytes.push(168); + let fake = OsString::from_vec(bytes); + assert!(fake.to_str().is_none(), "Invalid Unicode cannot be a Rust String"); + + let path = decode_uri_path(&fake); + assert_eq!(path.as_os_str().as_encoded_bytes(), fake.as_encoded_bytes()); + + // Shouldn't panic + encode_uri_path(&path); + } + ////////////////////////////////////////////////////////////////////////////////////// /// System ////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lib.rs b/src/lib.rs index 0369e07..cf624cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -274,7 +274,7 @@ pub struct TrashItem { /// The name of the item. For example if the folder '/home/user/New Folder' /// was deleted, its `name` is 'New Folder' - pub name: String, + pub name: OsString, /// The path to the parent folder of this item before it was put inside the /// trash. For example if the folder '/home/user/New Folder' is in the diff --git a/src/tests.rs b/src/tests.rs index a614387..72e72ea 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -29,8 +29,12 @@ mod os_limited { use super::{get_unique_name, init_logging}; use serial_test::serial; use std::collections::{hash_map::Entry, HashMap}; + use std::ffi::{OsStr, OsString}; use std::fs::File; + #[cfg(all(unix, not(target_os = "macos"), not(target_os = "ios"), not(target_os = "android")))] + use std::os::unix::ffi::OsStringExt; + use crate as trash; #[test] @@ -45,7 +49,7 @@ mod os_limited { let file_name_prefix = get_unique_name(); let batches: usize = 2; let files_per_batch: usize = 3; - let names: Vec<_> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i)).collect(); + let names: Vec = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect(); for _ in 0..batches { for path in names.iter() { File::create(path).unwrap(); @@ -53,8 +57,10 @@ mod os_limited { trash::delete_all(&names).unwrap(); } let items = trash::os_limited::list().unwrap(); - let items: HashMap<_, Vec<_>> = - items.into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).fold(HashMap::new(), |mut map, x| { + let items: HashMap<_, Vec<_>> = items + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .fold(HashMap::new(), |mut map, x| { match map.entry(x.name.clone()) { Entry::Occupied(mut entry) => { entry.get_mut().push(x); @@ -82,7 +88,7 @@ mod os_limited { } } } - None => panic!("ERROR Could not find '{}' in {:#?}", name, items), + None => panic!("ERROR Could not find '{:?}' in {:#?}", name, items), } } @@ -91,6 +97,23 @@ mod os_limited { let _ = trash::os_limited::purge_all(items.iter().flat_map(|(_name, item)| item)); } + #[cfg(all(unix, not(target_os = "macos"), not(target_os = "ios"), not(target_os = "android")))] + #[test] + #[serial] + fn list_invalid_utf8() { + let mut name = OsStr::new(&get_unique_name()).to_os_string().into_encoded_bytes(); + name.push(168); + let name = OsString::from_vec(name); + File::create(&name).unwrap(); + + // Delete, list, and remove file with an invalid UTF8 name + // Listing items is already exhaustively checked above, so this test is mainly concerned + // with checking that listing non-Unicode names does not panic + trash::delete(&name).unwrap(); + let item = trash::os_limited::list().unwrap().into_iter().find(|item| item.name == name).unwrap(); + let _ = trash::os_limited::purge_all([item]); + } + #[test] fn purge_empty() { init_logging(); @@ -119,12 +142,18 @@ mod os_limited { } // Collect it because we need the exact number of items gathered. - let targets: Vec<_> = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect(); + let targets: Vec<_> = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .collect(); assert_eq!(targets.len(), batches * files_per_batch); trash::os_limited::purge_all(targets).unwrap(); - let remaining = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).count(); + let remaining = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .count(); assert_eq!(remaining, 0); } @@ -141,12 +170,18 @@ mod os_limited { trash::delete_all(&names).unwrap(); // Collect it because we need the exact number of items gathered. - let targets: Vec<_> = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect(); + let targets: Vec<_> = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .collect(); assert_eq!(targets.len(), file_count); trash::os_limited::restore_all(targets).unwrap(); - let remaining = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).count(); + let remaining = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .count(); assert_eq!(remaining, 0); // They are not in the trash anymore but they should be at their original location @@ -178,15 +213,18 @@ mod os_limited { for path in names.iter().skip(file_count - collision_remaining) { File::create(path).unwrap(); } - let mut targets: Vec<_> = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect(); + let mut targets: Vec<_> = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .collect(); targets.sort_by(|a, b| a.name.cmp(&b.name)); assert_eq!(targets.len(), file_count); let remaining_count = match trash::os_limited::restore_all(targets) { Err(trash::Error::RestoreCollision { remaining_items, .. }) => { let contains = |v: &Vec, name: &String| { for curr in v.iter() { - if *curr.name == *name { + if curr.name.as_encoded_bytes() == name.as_bytes() { return true; } } @@ -208,7 +246,7 @@ mod os_limited { let remaining = trash::os_limited::list() .unwrap() .into_iter() - .filter(|x| x.name.starts_with(&file_name_prefix)) + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) .collect::>(); assert_eq!(remaining.len(), remaining_count); trash::os_limited::purge_all(remaining).unwrap(); @@ -234,8 +272,11 @@ mod os_limited { File::create(twin_name).unwrap(); trash::delete(twin_name).unwrap(); - let mut targets: Vec<_> = - trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect(); + let mut targets: Vec<_> = trash::os_limited::list() + .unwrap() + .into_iter() + .filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes())) + .collect(); targets.sort_by(|a, b| a.name.cmp(&b.name)); assert_eq!(targets.len(), file_count + 1); // plus one for one of the twins match trash::os_limited::restore_all(targets) { diff --git a/src/windows.rs b/src/windows.rs index 9817336..da496f1 100644 --- a/src/windows.rs +++ b/src/windows.rs @@ -97,9 +97,12 @@ pub fn list() -> Result, Error> { let original_location = OsString::from_wide(original_location_bstr.as_wide()); let date_deleted = get_date_deleted_unix(&item2)?; + // NTFS paths are valid Unicode according to this chart: + // https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations + // Converting a String back to OsString doesn't do extra work item_vec.push(TrashItem { id, - name: name.into_string().map_err(|original| Error::ConvertOsString { original })?, + name: name.into_string().map_err(|original| Error::ConvertOsString { original })?.into(), original_parent: PathBuf::from(original_location), time_deleted: date_deleted, }); diff --git a/tests/trash.rs b/tests/trash.rs index 6d09c3c..936c7fc 100644 --- a/tests/trash.rs +++ b/tests/trash.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::fs::{create_dir, File}; use std::path::{Path, PathBuf}; @@ -138,6 +139,15 @@ fn create_remove_single_file() { assert!(File::open(&name).is_err()); } +#[cfg(all(unix, not(target_os = "macos"), not(target_os = "ios"), not(target_os = "android")))] +#[test] +#[serial] +fn create_remove_single_file_invalid_utf8() { + let name = unsafe { OsStr::from_encoded_bytes_unchecked(&[168]) }; + File::create(name).unwrap(); + trash::delete(name).unwrap(); +} + #[test] fn recursive_file_deletion() { let parent_dir = Path::new("remove-me");