Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Support non-Unicode paths #108

Merged
merged 6 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ chrono = { version = "0.4.31", optional = true, default-features = false, featur
] }
libc = "0.2.149"
scopeguard = "1.2.0"
url = "2.4.1"
urlencoding = "2.1.3"
once_cell = "1.18.0"

[target.'cfg(any(target_os = "dragonfly", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies]
Expand Down
133 changes: 104 additions & 29 deletions src/freedesktop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
//!

use std::{
borrow::Borrow,
borrow::{Borrow, Cow},
collections::HashSet,
ffi::{OsStr, OsString},
fs::{self, File, OpenOptions},
io::{BufRead, BufReader, Write},
os::unix::{ffi::OsStrExt, fs::PermissionsExt},
path::{Path, PathBuf},
os::unix::{
ffi::{OsStrExt, OsStringExt},
fs::PermissionsExt,
},
path::{Component, Path, PathBuf},
};

use log::{debug, warn};
Expand Down Expand Up @@ -136,13 +140,16 @@ pub fn list() -> Result<Vec<TrashItem>, Error> {
let value = split.next().unwrap().trim();

if key == "Path" {
let mut value_path = Path::new(value).to_owned();
if value_path.is_relative() {
value_path = top_dir.join(value_path);
}
let full_path_utf8 = PathBuf::from(parse_uri_path(&value_path));
name = Some(full_path_utf8.file_name().unwrap().to_str().unwrap().to_owned());
let parent = full_path_utf8.parent().unwrap();
let value_path = {
let path = Path::new(value);
if path.is_relative() {
decode_uri_path(top_dir.join(path))
} else {
decode_uri_path(path)
}
};
name = value_path.file_name().map(|name| name.to_owned());
let parent = value_path.parent().expect("Absolute path to trashed item should have a parent");
original_parent = Some(parent.into());
} else if key == "DeletionDate" {
#[cfg(feature = "chrono")]
Expand Down Expand Up @@ -439,15 +446,23 @@ fn move_to_trash(
// already exist. This newly created empty file can then be safely overwritten by the src file
// using the `rename` function.
let filename = src.file_name().unwrap();
let mut appendage = 0;
let mut appendage = 0usize;
loop {
appendage += 1;
let in_trash_name = if appendage > 1 {
format!("{}.{}", filename.to_str().unwrap(), appendage)
let in_trash_name: Cow<'_, OsStr> = if appendage > 1 {
// Length of the digits plus the dot
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it seems that 999 yields 2, so the +1 is compensating for that only with the . missing in the calculation. (see playground).

Personally I find this memory optimisation in code that deals with comparatively slow disk a bit of a cognitive burden, despite being guilty of doing it myself when I can.

Here I do recommend to remove it, particularly when looking at decode_uri_path I think avoiding allocations here is futile and not worth the added complexity.

let appen_len: usize = appendage.checked_ilog10().unwrap_or_default() as usize + 1;
let mut trash_name = OsString::with_capacity(filename.len() + appen_len);
trash_name.push(filename);
trash_name.push(".");
trash_name.push(appendage.to_string());
trash_name.into()
} else {
filename.to_str().unwrap().into()
filename.into()
};
let info_name = format!("{in_trash_name}.trashinfo");
let mut info_name = OsString::with_capacity(in_trash_name.len() + 10);
Byron marked this conversation as resolved.
Show resolved Hide resolved
info_name.push(&in_trash_name);
info_name.push(".trashinfo");
let info_file_path = info_folder.join(&info_name);
let info_result = OpenOptions::new().create_new(true).write(true).open(&info_file_path);
match info_result {
Expand Down Expand Up @@ -574,15 +589,34 @@ fn try_creating_placeholders(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Re
Ok(())
}

fn parse_uri_path(absolute_file_path: impl AsRef<Path>) -> String {
let file_path_chars = absolute_file_path.as_ref().to_str().unwrap().chars();
let url: String = "file://".chars().chain(file_path_chars).collect();
return url::Url::parse(&url).unwrap().to_file_path().unwrap().to_str().unwrap().into();
fn decode_uri_path(path: impl AsRef<Path>) -> PathBuf {
// Paths may be invalid Unicode so they should be treated as byte strings
// The `url` crate takes valid Rust Strings, so paths must be URL encoded so that they may be
// passed into the `url` functions.
// Simply parsing the path doesn't work because of the possibility of invalid Unicode.
// URL encoding the entire path doesn't work because back slashes will be encoded too
// Thus, the easiest way is to manually decode each segment of the path and recombine
path.as_ref().iter().map(|part| OsString::from_vec(urlencoding::decode_binary(part.as_bytes()).to_vec())).collect()
}

fn encode_uri_path(absolute_file_path: impl AsRef<Path>) -> String {
let url = url::Url::from_file_path(absolute_file_path.as_ref()).unwrap();
url.path().to_owned()
fn encode_uri_path(path: impl AsRef<Path>) -> String {
// `iter()` cannot be used here because it yields '/' in certain situations, such as
// for root directories.
// Slashes would be encoded and thus mess up the path
let path: PathBuf = path
.as_ref()
.components()
.map(|component| {
// Only encode names and not '/', 'C:\', et cetera
if let Component::Normal(part) = component {
urlencoding::encode_binary(part.as_bytes()).to_string()
} else {
component.as_os_str().to_str().expect("Path components such as '/' are valid Unicode").to_owned()
}
})
.collect();

path.to_str().expect("URL encoded bytes is valid Unicode").to_owned()
}

#[derive(Eq, PartialEq, Debug)]
Expand Down Expand Up @@ -849,7 +883,7 @@ mod tests {
use std::{
collections::{hash_map::Entry, HashMap},
env,
ffi::OsString,
ffi::{OsStr, OsString},
fmt,
fs::File,
os::unix,
Expand All @@ -862,10 +896,13 @@ mod tests {
use crate::{
canonicalize_paths, delete, delete_all,
os_limited::{list, purge_all, restore_all},
platform::encode_uri_path,
tests::get_unique_name,
Error,
};

use super::decode_uri_path;

#[test]
#[serial]
fn test_list() {
Expand All @@ -874,7 +911,7 @@ mod tests {
let file_name_prefix = get_unique_name();
let batches: usize = 2;
let files_per_batch: usize = 3;
let names: Vec<_> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i)).collect();
let names: Vec<OsString> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect();
for _ in 0..batches {
for path in &names {
File::create(path).unwrap();
Expand All @@ -890,8 +927,10 @@ mod tests {
}
}
let items = list().unwrap();
let items: HashMap<_, Vec<_>> =
items.into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).fold(HashMap::new(), |mut map, x| {
let items: HashMap<_, Vec<_>> = items
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.fold(HashMap::new(), |mut map, x| {
match map.entry(x.name.clone()) {
Entry::Occupied(mut entry) => {
entry.get_mut().push(x);
Expand All @@ -905,7 +944,7 @@ mod tests {
for name in names {
match items.get(&name) {
Some(items) => assert_eq!(items.len(), batches),
None => panic!("ERROR Could not find '{}' in {:#?}", name, items),
None => panic!("ERROR Could not find '{:?}' in {:#?}", name, items),
}
}

Expand All @@ -921,8 +960,8 @@ mod tests {

let file_name_prefix = get_unique_name();
let file_count = 2;
let names: Vec<_> = (0..file_count).map(|i| format!("{}#{}", file_name_prefix, i)).collect();
let symlink_names: Vec<_> = names.iter().map(|name| format!("{}-symlink", name)).collect();
let names: Vec<OsString> = (0..file_count).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect();
let symlink_names: Vec<OsString> = names.iter().map(|name| format!("{:?}-symlink", name).into()).collect();

// Test file symbolic link and directory symbolic link
File::create(&names[0]).unwrap();
Expand Down Expand Up @@ -951,6 +990,42 @@ mod tests {
}
}

#[test]
fn uri_enc_dec_roundtrip() {
let fake = format!("/tmp/{}", get_unique_name());
let path = decode_uri_path(&fake);

assert_eq!(fake, path.to_str().expect("Path is valid Unicode"), "Decoded path shouldn't be different");
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally, I go with assert_eq(actual, expected), but it might be there is precedent for doing it the other way around somewhere in the codebase already. But if not, let's go with (actual, expected) instead.


let encoded = encode_uri_path(&path);
assert_eq!(fake, encoded, "URL encoded alphanumeric String shouldn't change");
}

#[test]
fn uri_enc_dec_roundtrip_invalid_unicode() {
let base = OsStr::new(&format!("/tmp/{}/", get_unique_name())).to_os_string();

// Add invalid UTF-8 byte
let mut bytes = base.into_encoded_bytes();
bytes.push(168);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the quickest way to do this that I have seen so far! Byte 168, I shall remember that.


// SAFETY:
// * OsString is produced in part from a valid OsStr
// * OsString does not have to be valid Unicode
// * The string isn't written to disk or transferred anywhere where it may be read by a
// different Rust version than produced it
//
// This looks a bit convoluted because [`OsString::from_vec`] is only defined on Unix
let fake = unsafe { OsString::from_encoded_bytes_unchecked(bytes) };
assert!(fake.to_str().is_none(), "Invalid Unicode cannot be a Rust String");

let path = decode_uri_path(&fake);
assert_eq!(fake.as_encoded_bytes(), path.as_os_str().as_encoded_bytes());

// Shouldn't panic
encode_uri_path(&path);
}

//////////////////////////////////////////////////////////////////////////////////////
/// System
//////////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ pub struct TrashItem {

/// The name of the item. For example if the folder '/home/user/New Folder'
/// was deleted, its `name` is 'New Folder'
pub name: String,
pub name: OsString,

/// The path to the parent folder of this item before it was put inside the
/// trash. For example if the folder '/home/user/New Folder' is in the
Expand Down
57 changes: 39 additions & 18 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mod os_limited {
use super::{get_unique_name, init_logging};
use serial_test::serial;
use std::collections::{hash_map::Entry, HashMap};
use std::ffi::OsString;
use std::fs::File;

use crate as trash;
Expand All @@ -45,16 +46,18 @@ mod os_limited {
let file_name_prefix = get_unique_name();
let batches: usize = 2;
let files_per_batch: usize = 3;
let names: Vec<_> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i)).collect();
let names: Vec<OsString> = (0..files_per_batch).map(|i| format!("{}#{}", file_name_prefix, i).into()).collect();
for _ in 0..batches {
for path in names.iter() {
File::create(path).unwrap();
}
trash::delete_all(&names).unwrap();
}
let items = trash::os_limited::list().unwrap();
let items: HashMap<_, Vec<_>> =
items.into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).fold(HashMap::new(), |mut map, x| {
let items: HashMap<_, Vec<_>> = items
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.fold(HashMap::new(), |mut map, x| {
match map.entry(x.name.clone()) {
Entry::Occupied(mut entry) => {
entry.get_mut().push(x);
Expand Down Expand Up @@ -82,7 +85,7 @@ mod os_limited {
}
}
}
None => panic!("ERROR Could not find '{}' in {:#?}", name, items),
None => panic!("ERROR Could not find '{:?}' in {:#?}", name, items),
}
}

Expand Down Expand Up @@ -119,12 +122,18 @@ mod os_limited {
}

// Collect it because we need the exact number of items gathered.
let targets: Vec<_> =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect();
let targets: Vec<_> = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.collect();
assert_eq!(targets.len(), batches * files_per_batch);
trash::os_limited::purge_all(targets).unwrap();
let remaining =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).count();
let remaining = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.count();
assert_eq!(remaining, 0);
}

Expand All @@ -141,12 +150,18 @@ mod os_limited {
trash::delete_all(&names).unwrap();

// Collect it because we need the exact number of items gathered.
let targets: Vec<_> =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect();
let targets: Vec<_> = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.collect();
assert_eq!(targets.len(), file_count);
trash::os_limited::restore_all(targets).unwrap();
let remaining =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).count();
let remaining = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.count();
assert_eq!(remaining, 0);

// They are not in the trash anymore but they should be at their original location
Expand Down Expand Up @@ -178,15 +193,18 @@ mod os_limited {
for path in names.iter().skip(file_count - collision_remaining) {
File::create(path).unwrap();
}
let mut targets: Vec<_> =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect();
let mut targets: Vec<_> = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.collect();
targets.sort_by(|a, b| a.name.cmp(&b.name));
assert_eq!(targets.len(), file_count);
let remaining_count = match trash::os_limited::restore_all(targets) {
Err(trash::Error::RestoreCollision { remaining_items, .. }) => {
let contains = |v: &Vec<trash::TrashItem>, name: &String| {
for curr in v.iter() {
if *curr.name == *name {
if curr.name.as_encoded_bytes() == name.as_bytes() {
return true;
}
}
Expand All @@ -208,7 +226,7 @@ mod os_limited {
let remaining = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.starts_with(&file_name_prefix))
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.collect::<Vec<_>>();
assert_eq!(remaining.len(), remaining_count);
trash::os_limited::purge_all(remaining).unwrap();
Expand All @@ -234,8 +252,11 @@ mod os_limited {
File::create(twin_name).unwrap();
trash::delete(twin_name).unwrap();

let mut targets: Vec<_> =
trash::os_limited::list().unwrap().into_iter().filter(|x| x.name.starts_with(&file_name_prefix)).collect();
let mut targets: Vec<_> = trash::os_limited::list()
.unwrap()
.into_iter()
.filter(|x| x.name.as_encoded_bytes().starts_with(file_name_prefix.as_bytes()))
.collect();
targets.sort_by(|a, b| a.name.cmp(&b.name));
assert_eq!(targets.len(), file_count + 1); // plus one for one of the twins
match trash::os_limited::restore_all(targets) {
Expand Down
9 changes: 9 additions & 0 deletions tests/trash.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::fs::{create_dir, File};
use std::path::{Path, PathBuf};

Expand Down Expand Up @@ -138,6 +139,14 @@ fn create_remove_single_file() {
assert!(File::open(&name).is_err());
}

#[test]
#[serial]
fn create_remove_single_file_invalid_utf8() {
let name = unsafe { OsStr::from_encoded_bytes_unchecked(&[168]) };
File::create(name).unwrap();
trash::delete(name).unwrap();
}

#[test]
fn recursive_file_deletion() {
let parent_dir = Path::new("remove-me");
Expand Down
Loading