Merge pull request #2 from chris-ha458/idiomatic_fixes

Idiomatic fixes
nickspring · Sep 24, 2023 · 25bf11a · 25bf11a
2 parents b740ca3 + 3615389
commit 25bf11a
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 36 deletions.
diff --git a/src/cd.rs b/src/cd.rs
@@ -55,27 +55,26 @@ pub(crate) fn encoding_unicode_range(iana_name: &str) -> Result<Vec<&str>, Strin
 
 // Return inferred languages used with a unicode range.
 pub(crate) fn unicode_range_languages(primary_range: &str) -> Vec<&'static Language> {
-    let mut languages = vec![];
     if primary_range.is_empty() {
-        return languages;
+        return vec![];
     }
-    for (language, characters, _, _) in LANGUAGES.iter() {
-        for character in characters.chars() {
-            if unicode_range(&character).unwrap_or("") == primary_range {
-                languages.push(language);
-                break;
-            }
-        }
-    }
-    languages
+    LANGUAGES
+        .iter()
+        .filter_map(|(language, characters, _, _)| {
+            characters
+                .chars()
+                .find(|&character| unicode_range(&character).unwrap_or_default() == primary_range)
+                .map(|_| language)
+        })
+        .collect()
 }
 
 // Single-byte encoding language association.
 // Some code page are heavily linked to particular language(s).
 // This function does the correspondence.
 #[cache(LruCache : LruCache::new(128))]
 pub(crate) fn encoding_languages(iana_name: String) -> Vec<&'static Language> {
-    let unicode_ranges = encoding_unicode_range(&iana_name).unwrap_or(vec![]);
+    let unicode_ranges = encoding_unicode_range(&iana_name).unwrap_or_default();
     let mut primary_range: Option<&str> = None;
 
     for specified_range in unicode_ranges {
@@ -190,8 +189,8 @@ pub(crate) fn filter_alt_coherence_matches(results: &CoherenceMatches) -> Cohere
         *score = result.score.max(*score);
     }
     index
-        .iter()
-        .map(|(&language, &score)| CoherenceMatch { language, score })
+        .into_iter()
+        .map(|(language, score)| CoherenceMatch { language, score })
         .collect()
 }
 
@@ -229,7 +228,7 @@ pub(crate) fn coherence_ratio(
     include_languages: Option<Vec<&'static Language>>,
 ) -> Result<CoherenceMatches, String> {
     let threshold = f32::from(threshold.unwrap_or(OrderedFloat(0.1)));
-    let mut include_languages = include_languages.unwrap_or(vec![]);
+    let mut include_languages = include_languages.unwrap_or_default();
     let ignore_non_latin =
         include_languages.len() == 1 && include_languages.first() == Some(&&Language::Unknown);
     if ignore_non_latin {

diff --git a/src/consts.rs b/src/consts.rs
@@ -18,9 +18,9 @@ lazy_static! {
 
     pub static ref MAX_PROCESSED_BYTES: usize = 500_000;
     pub static ref TOO_SMALL_SEQUENCE: usize = 32;
-    pub static ref TOO_BIG_SEQUENCE: usize = 10e6 as usize;
+    pub static ref TOO_BIG_SEQUENCE: usize = 1_000_000; // 10E6
 
-    pub(crate) static ref UTF8_MAXIMAL_ALLOCATION: usize = 1112064;
+    pub(crate) static ref UTF8_MAXIMAL_ALLOCATION: usize = 1_112_064;
     pub(crate) static ref UNICODE_RANGES_COMBINED: Vec<(&'static str, Range<u32>)> = vec![
         ("Control character", 0..31 + 1),
         ("Basic Latin", 32..127 + 1),

diff --git a/src/entity.rs b/src/entity.rs
@@ -178,7 +178,7 @@ impl CharsetMatch {
                 blake3::hash(
                     obj.decoded_payload
                         .as_ref()
-                        .unwrap_or(&String::new())
+                        .unwrap_or(&String::default())
                         .as_bytes()
                 )
             );
@@ -260,12 +260,10 @@ impl CharsetMatch {
     }
     // Multibyte usage ratio
     pub fn multi_byte_usage(&self) -> f32 {
-        1.0 - (self
-            .decoded_payload()
-            .unwrap_or(String::new().as_ref())
-            .chars()
-            .count() as f32)
-            / (self.payload.len() as f32)
+        let decoded_chars = self.decoded_payload().unwrap_or_default().chars().count() as f32;
+        let payload_len = self.payload.len() as f32;
+
+        1.0 - (decoded_chars / payload_len)
     }
     // Original untouched bytes
     pub fn raw(&self) -> &Vec<u8> {
@@ -310,7 +308,7 @@ impl CharsetMatch {
     }
     // Returns sorted list of unicode ranges (if exists)
     pub fn unicode_ranges(&self) -> Vec<String> {
-        let mut ranges: Vec<String> = range_scan(self.decoded_payload().unwrap_or(""))
+        let mut ranges: Vec<String> = range_scan(self.decoded_payload().unwrap_or_default())
             .iter()
             .cloned()
             .collect();
@@ -341,7 +339,7 @@ pub struct CharsetMatchesIter<'a> {
 impl CharsetMatches {
     // Initialization method
     pub fn new(items: Option<Vec<CharsetMatch>>) -> Self {
-        let mut items = items.unwrap_or(vec![]);
+        let mut items = items.unwrap_or_default();
         CharsetMatches::resort(&mut items);
         CharsetMatches { items }
     }

diff --git a/src/lib.rs b/src/lib.rs
@@ -140,7 +140,7 @@ use crate::utils::{
 };
 use encoding::DecoderTrap;
 use log::{debug, trace};
-use std::fs::File;
+use std::fs::{metadata, File};
 use std::io::Read;
 use std::path::PathBuf;
 
@@ -169,7 +169,7 @@ pub mod utils;
 pub fn from_bytes(bytes: &Vec<u8>, settings: Option<NormalizerSettings>) -> CharsetMatches {
     // init settings with default values if it's None and recheck include_encodings and
     // exclude_encodings settings
-    let mut settings = settings.unwrap_or(NormalizerSettings::default());
+    let mut settings = settings.unwrap_or_default();
     if !settings.include_encodings.is_empty() {
         settings.include_encodings = settings
             .include_encodings
@@ -623,15 +623,12 @@ pub fn from_path(
     settings: Option<NormalizerSettings>,
 ) -> Result<CharsetMatches, String> {
     // read file
-    let file = File::open(path);
-    if file.is_err() {
-        return Err(String::from("Error opening file"));
-    }
+    let mut file = File::open(path).map_err(|e| format!("Error opening file: {e}"))?;
+    let file_size = metadata(path).map(|m| m.len()).unwrap_or_default();
 
-    let mut buffer = Vec::new();
-    if file.unwrap().read_to_end(&mut buffer).is_err() {
-        return Err(String::from("Error reading from file"));
-    }
+    let mut buffer = Vec::with_capacity(file_size as usize);
+    file.read_to_end(&mut buffer)
+        .map_err(|e| format!("Error reading from file: {e}"))?;
 
     // calculate
     Ok(from_bytes(&buffer, settings))