Skip to content

Commit

Permalink
Merge pull request #2 from chris-ha458/idiomatic_fixes
Browse files Browse the repository at this point in the history
Idiomatic fixes
  • Loading branch information
nickspring authored Sep 24, 2023
2 parents b740ca3 + 3615389 commit 25bf11a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 36 deletions.
29 changes: 14 additions & 15 deletions src/cd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,26 @@ pub(crate) fn encoding_unicode_range(iana_name: &str) -> Result<Vec<&str>, Strin

// Return inferred languages used with a unicode range.
pub(crate) fn unicode_range_languages(primary_range: &str) -> Vec<&'static Language> {
let mut languages = vec![];
if primary_range.is_empty() {
return languages;
return vec![];
}
for (language, characters, _, _) in LANGUAGES.iter() {
for character in characters.chars() {
if unicode_range(&character).unwrap_or("") == primary_range {
languages.push(language);
break;
}
}
}
languages
LANGUAGES
.iter()
.filter_map(|(language, characters, _, _)| {
characters
.chars()
.find(|&character| unicode_range(&character).unwrap_or_default() == primary_range)
.map(|_| language)
})
.collect()
}

// Single-byte encoding language association.
// Some code page are heavily linked to particular language(s).
// This function does the correspondence.
#[cache(LruCache : LruCache::new(128))]
pub(crate) fn encoding_languages(iana_name: String) -> Vec<&'static Language> {
let unicode_ranges = encoding_unicode_range(&iana_name).unwrap_or(vec![]);
let unicode_ranges = encoding_unicode_range(&iana_name).unwrap_or_default();
let mut primary_range: Option<&str> = None;

for specified_range in unicode_ranges {
Expand Down Expand Up @@ -190,8 +189,8 @@ pub(crate) fn filter_alt_coherence_matches(results: &CoherenceMatches) -> Cohere
*score = result.score.max(*score);
}
index
.iter()
.map(|(&language, &score)| CoherenceMatch { language, score })
.into_iter()
.map(|(language, score)| CoherenceMatch { language, score })
.collect()
}

Expand Down Expand Up @@ -229,7 +228,7 @@ pub(crate) fn coherence_ratio(
include_languages: Option<Vec<&'static Language>>,
) -> Result<CoherenceMatches, String> {
let threshold = f32::from(threshold.unwrap_or(OrderedFloat(0.1)));
let mut include_languages = include_languages.unwrap_or(vec![]);
let mut include_languages = include_languages.unwrap_or_default();
let ignore_non_latin =
include_languages.len() == 1 && include_languages.first() == Some(&&Language::Unknown);
if ignore_non_latin {
Expand Down
4 changes: 2 additions & 2 deletions src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ lazy_static! {

pub static ref MAX_PROCESSED_BYTES: usize = 500_000;
pub static ref TOO_SMALL_SEQUENCE: usize = 32;
pub static ref TOO_BIG_SEQUENCE: usize = 10e6 as usize;
pub static ref TOO_BIG_SEQUENCE: usize = 1_000_000; // 10E6

pub(crate) static ref UTF8_MAXIMAL_ALLOCATION: usize = 1112064;
pub(crate) static ref UTF8_MAXIMAL_ALLOCATION: usize = 1_112_064;
pub(crate) static ref UNICODE_RANGES_COMBINED: Vec<(&'static str, Range<u32>)> = vec![
("Control character", 0..31 + 1),
("Basic Latin", 32..127 + 1),
Expand Down
16 changes: 7 additions & 9 deletions src/entity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl CharsetMatch {
blake3::hash(
obj.decoded_payload
.as_ref()
.unwrap_or(&String::new())
.unwrap_or(&String::default())
.as_bytes()
)
);
Expand Down Expand Up @@ -260,12 +260,10 @@ impl CharsetMatch {
}
// Multibyte usage ratio
pub fn multi_byte_usage(&self) -> f32 {
1.0 - (self
.decoded_payload()
.unwrap_or(String::new().as_ref())
.chars()
.count() as f32)
/ (self.payload.len() as f32)
let decoded_chars = self.decoded_payload().unwrap_or_default().chars().count() as f32;
let payload_len = self.payload.len() as f32;

1.0 - (decoded_chars / payload_len)
}
// Original untouched bytes
pub fn raw(&self) -> &Vec<u8> {
Expand Down Expand Up @@ -310,7 +308,7 @@ impl CharsetMatch {
}
// Returns sorted list of unicode ranges (if exists)
pub fn unicode_ranges(&self) -> Vec<String> {
let mut ranges: Vec<String> = range_scan(self.decoded_payload().unwrap_or(""))
let mut ranges: Vec<String> = range_scan(self.decoded_payload().unwrap_or_default())
.iter()
.cloned()
.collect();
Expand Down Expand Up @@ -341,7 +339,7 @@ pub struct CharsetMatchesIter<'a> {
impl CharsetMatches {
// Initialization method
pub fn new(items: Option<Vec<CharsetMatch>>) -> Self {
let mut items = items.unwrap_or(vec![]);
let mut items = items.unwrap_or_default();
CharsetMatches::resort(&mut items);
CharsetMatches { items }
}
Expand Down
17 changes: 7 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ use crate::utils::{
};
use encoding::DecoderTrap;
use log::{debug, trace};
use std::fs::File;
use std::fs::{metadata, File};
use std::io::Read;
use std::path::PathBuf;

Expand Down Expand Up @@ -169,7 +169,7 @@ pub mod utils;
pub fn from_bytes(bytes: &Vec<u8>, settings: Option<NormalizerSettings>) -> CharsetMatches {
// init settings with default values if it's None and recheck include_encodings and
// exclude_encodings settings
let mut settings = settings.unwrap_or(NormalizerSettings::default());
let mut settings = settings.unwrap_or_default();
if !settings.include_encodings.is_empty() {
settings.include_encodings = settings
.include_encodings
Expand Down Expand Up @@ -623,15 +623,12 @@ pub fn from_path(
settings: Option<NormalizerSettings>,
) -> Result<CharsetMatches, String> {
// read file
let file = File::open(path);
if file.is_err() {
return Err(String::from("Error opening file"));
}
let mut file = File::open(path).map_err(|e| format!("Error opening file: {e}"))?;
let file_size = metadata(path).map(|m| m.len()).unwrap_or_default();

let mut buffer = Vec::new();
if file.unwrap().read_to_end(&mut buffer).is_err() {
return Err(String::from("Error reading from file"));
}
let mut buffer = Vec::with_capacity(file_size as usize);
file.read_to_end(&mut buffer)
.map_err(|e| format!("Error reading from file: {e}"))?;

// calculate
Ok(from_bytes(&buffer, settings))
Expand Down

0 comments on commit 25bf11a

Please sign in to comment.