Skip to content

Commit

Permalink
Merge pull request #7 from nickspring/compare-performance-by-times
Browse files Browse the repository at this point in the history
ci: Performance comparison by multiplier
  • Loading branch information
nickspring authored Sep 26, 2023
2 parents 8689fe5 + 1e36284 commit 74ea353
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions src/performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use charset_normalizer_rs::utils::{get_large_test_datasets, round_float};
use clap::Parser;
use encoding::label::encoding_from_whatwg_label;
use log::trace;
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::Read;
use std::process;
Expand Down Expand Up @@ -57,15 +57,17 @@ fn performance_compare(args: &PerformanceArgs) -> i32 {

// tested functions
let mut performance_results: HashMap<&str, Vec<PerformanceResult>> = HashMap::new();
let mut tested_functions: HashMap<&str, Box<dyn Fn(&Vec<u8>) -> String>> = HashMap::new();

// we need BTreeMap as we need preserve keys order
let mut tested_functions: BTreeMap<&str, Box<dyn Fn(&Vec<u8>) -> String>> = BTreeMap::new();

/////////////////////////////////////////////////////////////////
// Tested functions (libraries)
/////////////////////////////////////////////////////////////////

// charset-normalizer-rs
tested_functions.insert(
"charset-normalizer-rs",
"A) charset-normalizer-rs",
Box::new(|bytes: &Vec<u8>| {
if let Some(gb) = from_bytes(bytes, None).get_best() {
gb.encoding().to_string()
Expand All @@ -77,7 +79,7 @@ fn performance_compare(args: &PerformanceArgs) -> i32 {

// chardet
tested_functions.insert(
"chardet",
"B) chardet",
Box::new(|bytes: &Vec<u8>| {
let detected = &chardet::detect(bytes).0.to_ascii_lowercase();
let alternative = CHARDET_CORRESPONDENCE.get(&detected.as_str());
Expand All @@ -93,7 +95,7 @@ fn performance_compare(args: &PerformanceArgs) -> i32 {

// chardetng
tested_functions.insert(
"chardetng",
"C) chardetng",
Box::new(|bytes: &Vec<u8>| {
let mut ed = EncodingDetector::new();
ed.feed(bytes, true);
Expand Down Expand Up @@ -146,14 +148,25 @@ fn performance_compare(args: &PerformanceArgs) -> i32 {

// Statistics
let mut our_accuracy = 0.0;
let mut our_total_time: Duration = Duration::new(0, 0);
for (&name, _) in &tested_functions {
if let Some(results) = performance_results.get(name) {
let (total_duration, mean_duration, accuracy) = calc_stat(results);
if name == "charset-normalizer-rs" {
our_accuracy = accuracy;
}
println!("\n------------------------------");
println!("--> {} Conclusions", name);
if name == "A) charset-normalizer-rs" {
our_accuracy = accuracy;
our_total_time = total_duration.clone();
} else {
// compare speed in %
println!(
" --> Faster than charset-normalizer-rs by {:?} times",
round_float(
our_total_time.as_secs_f32() / total_duration.as_secs_f32(),
1
),
);
}
println!(" --> Accuracy: {:?}%", accuracy);
println!(" --> Total time: {:?}", total_duration);
println!(" --> Avg time: {:?}", mean_duration);
Expand Down

0 comments on commit 74ea353

Please sign in to comment.