diff --git a/Cargo.lock b/Cargo.lock index fa8fd3a1..8be7d346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1343,6 +1343,8 @@ dependencies = [ "clap", "gettext-rs", "plib", + "strum", + "strum_macros", ] [[package]] @@ -1669,6 +1671,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + [[package]] name = "rustyline" version = "14.0.0" @@ -1798,6 +1806,25 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.79", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/i18n/Cargo.toml b/i18n/Cargo.toml index db77e7c0..fdedb491 100644 --- a/i18n/Cargo.toml +++ b/i18n/Cargo.toml @@ -11,8 +11,10 @@ rust-version.workspace = true plib = { path = "../plib" } clap.workspace = true gettext-rs.workspace = true -bytemuck = { version = "1.17.0", features = ["derive"] } -byteorder = "1.5.0" +bytemuck = { version = "1.17", features = ["derive"] } +byteorder = "1.5" +strum = "0.26" +strum_macros = "0.26" [lints] workspace = true @@ -20,3 +22,7 @@ workspace = true [[bin]] name = "gencat" path = "./gencat.rs" + +[[bin]] +name = "iconv" +path = "./iconv.rs" diff --git a/i18n/iconv.rs b/i18n/iconv.rs new file mode 100644 index 00000000..41a7de32 --- /dev/null +++ b/i18n/iconv.rs @@ -0,0 +1,550 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +use clap::Parser; +use gettextrs::{bind_textdomain_codeset, setlocale, textdomain, LocaleCategory}; +use iconv_lib::{ + ascii, + utf_16::{self, UTF16Variant}, + utf_32::{self, UTF32Variant}, + utf_8, +}; +use plib::PROJECT_NAME; +use std::{ + collections::HashMap, + env, + fs::File, + io::{self, BufRead, BufReader, Read, Write}, + path::{Path, PathBuf}, + process::exit, + str::FromStr, +}; +use strum::IntoEnumIterator; +use strum_macros::{Display, EnumIter, EnumString}; + +mod iconv_lib; + +/// iconv โ€” codeset conversion +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Omit invalid characters of the input file from the output + #[arg(short = 'c')] + omit_invalid: bool, + + /// Suppress messages about invalid characters + #[arg(short = 's')] + suppress_messages: bool, + + /// Identify the codeset of the input file + #[arg(short = 'f')] + from_codeset: Option, + + /// List all supported codeset values + #[arg(short = 'l')] + list_codesets: bool, + + /// Identify the codeset for the output file + #[arg(short = 't')] + to_codeset: Option, + + /// Input files (reads from stdin if not provided) + files: Option>, +} + +struct CircularBuffer { + reader: R, + buffer: [u8; 10000], + capacity: usize, + read_pos: usize, + write_pos: usize, + length: usize, +} + +impl CircularBuffer { + fn new(reader: R) -> Self { + CircularBuffer { + reader, + buffer: [0; 10000], + capacity: 10000, + read_pos: 0, + write_pos: 0, + length: 0, + } + } + + fn available_space(&self) -> usize { + self.capacity - self.length + } + + fn fill_buffer(&mut self) -> io::Result<()> { + while self.length < self.capacity { + let mut temp_buf = vec![0; self.available_space()]; + match self.reader.read(&mut temp_buf) { + Ok(0) => return Ok(()), // EOF reached + Ok(n) => { + self.write(&temp_buf[..n]); + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + } + } + Ok(()) + } + + fn write(&mut self, data: &[u8]) -> usize { + let mut bytes_written = 0; + for &byte in data.iter().take(self.available_space()) { + self.buffer[self.write_pos] = byte; + self.write_pos = (self.write_pos + 1) % self.capacity; + self.length += 1; + bytes_written += 1; + } + bytes_written + } + + fn iter(self) -> CircularBufferIterator { + CircularBufferIterator { buffer: self } + } +} + +struct CircularBufferIterator { + buffer: CircularBuffer, +} + +impl Iterator for CircularBufferIterator { + type Item = u8; + + fn next(&mut self) -> Option { + if self.buffer.length == 0 { + match self.buffer.fill_buffer() { + Ok(()) if self.buffer.length == 0 => return None, // EOF reached + Ok(()) => {} + Err(e) => { + eprintln!("Error: {}", e); + exit(1); + } + } + } + + if self.buffer.length > 0 { + let item = self.buffer.buffer[self.buffer.read_pos]; + self.buffer.read_pos = (self.buffer.read_pos + 1) % self.buffer.capacity; + self.buffer.length -= 1; + Some(item) + } else { + None + } + } +} + +impl IntoIterator for CircularBuffer { + type Item = u8; + type IntoIter = CircularBufferIterator; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +#[derive(EnumString, EnumIter, Debug, PartialEq, Display)] +#[strum(serialize_all = "SCREAMING-KEBAB-CASE")] +#[allow(non_camel_case_types)] +enum Encodings { + ASCII, + UTF_8, + UTF_16, + UTF_16LE, + UTF_16BE, + UTF_32, + UTF_32LE, + UTF_32BE, +} + +impl Encodings { + fn parse(encoding: &str) -> Self { + let cleaned_encoding = encoding.trim_matches('"'); + match Encodings::from_str(cleaned_encoding) { + Ok(encoding) => encoding, + Err(_) => { + eprintln!("Error: Unknown encoding: {}", cleaned_encoding); + exit(1); + } + } + } +} + +fn list_encodings() { + for encoding in Encodings::iter() { + println!("{:?}", encoding); + } +} + +#[derive(Debug, Default)] +struct CharmapHeader { + code_set_name: Option, + mb_cur_max: usize, + mb_cur_min: usize, + escape_char: char, + comment_char: char, +} + +#[derive(Debug)] +struct CharmapEntry { + symbolic_name: String, + encoding: Vec, + _comments: Option, +} + +#[derive(Debug)] +struct Charmap { + header: CharmapHeader, + entries: HashMap, + width_entries: HashMap, + width_default: usize, +} + +fn parse_encoding( + encoding: &str, + escape_char: char, +) -> Result, Box> { + let mut bytes = Vec::new(); + let mut chars = encoding.chars().peekable(); + + while let Some(&c) = chars.peek() { + if c == escape_char { + chars.next(); // consume escape char + match chars.next() { + Some('d') => { + let num: String = chars.by_ref().take(3).collect(); + bytes.push(num.parse::()?); + } + Some('x') => { + let num: String = chars.by_ref().take(2).collect(); + bytes.push(u8::from_str_radix(&num, 16)?); + } + Some(c) if c.is_digit(8) => { + let num: String = std::iter::once(c).chain(chars.by_ref().take(2)).collect(); + bytes.push(u8::from_str_radix(&num, 8)?); + } + _ => return Err("Invalid encoding format".into()), + } + } else { + chars.next(); // consume char + } + } + + Ok(bytes) +} + +fn parse_charmap(path: &Path) -> Result> { + let file = File::open(path)?; + let reader = BufReader::new(file); + let mut charmap = Charmap { + header: CharmapHeader::default(), + entries: HashMap::new(), + width_entries: HashMap::new(), + width_default: 1, + }; + + let mut in_charmap_section = false; + let mut in_width_section = false; + + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + + if trimmed.is_empty() || trimmed.starts_with(charmap.header.comment_char) { + continue; + } + + if !in_charmap_section && !in_width_section { + if trimmed.starts_with("") { + charmap.header.code_set_name = + Some(trimmed.split_whitespace().nth(1).unwrap().to_string()); + } else if trimmed.starts_with("") { + charmap.header.mb_cur_max = trimmed.split_whitespace().nth(1).unwrap().parse()?; + } else if trimmed.starts_with("") { + charmap.header.mb_cur_min = trimmed.split_whitespace().nth(1).unwrap().parse()?; + } else if trimmed.starts_with("") { + charmap.header.escape_char = trimmed + .split_whitespace() + .nth(1) + .unwrap() + .chars() + .next() + .expect("Escape char is missing in charmap file") + } else if trimmed.starts_with("") { + charmap.header.comment_char = trimmed + .split_whitespace() + .nth(1) + .unwrap() + .chars() + .next() + .expect("Comment char is missing in charmap file") + } else if trimmed == "CHARMAP" { + in_charmap_section = true; + } + } else if in_charmap_section { + if trimmed == "END CHARMAP" { + in_charmap_section = false; + } else { + let mut parts = Vec::new(); + let mut remaining = trimmed; + + for _ in 0..3 { + if remaining.is_empty() { + break; + } + let part = remaining + .trim_start() + .split_once(' ') + .map_or((remaining, ""), |(a, b)| (a, b)); + parts.push(part.0); + remaining = part.1; + } + + if parts.len() >= 2 { + let symbolic_name = parts[0].trim_matches(|c| c == '<' || c == '>').to_string(); + let encoding = parse_encoding(parts[1], charmap.header.escape_char)?; + let _comments = parts.get(2).map(|&s| s.to_string()); + charmap.entries.insert( + symbolic_name.clone(), + CharmapEntry { + symbolic_name, + encoding, + _comments, + }, + ); + } + } + } else if in_width_section { + if trimmed == "END WIDTH" { + in_width_section = false; + } else { + let parts: Vec<&str> = trimmed.split_whitespace().collect(); + if parts.len() >= 2 { + let symbolic_name = parts[0].trim_matches(|c| c == '<' || c == '>').to_string(); + let width = parts[1].parse()?; + charmap.width_entries.insert(symbolic_name, width); + } + } + } else if trimmed == "WIDTH" { + in_width_section = true; + } else if trimmed.starts_with("WIDTH_DEFAULT") { + charmap.width_default = trimmed.split_whitespace().nth(1).unwrap().parse()?; + } + } + + Ok(charmap) +} + +#[derive(Debug)] +enum CodesetType { + Encoding(Encodings), + Charmap(Charmap), +} + +fn parse_codeset(codeset: &str) -> Result> { + if codeset.contains('/') { + Ok(CodesetType::Charmap(parse_charmap(Path::new(codeset))?)) + } else { + Ok(CodesetType::Encoding(Encodings::parse(codeset))) + } +} + +fn encoding_conversion( + from: &Encodings, + to: &Encodings, + input: CircularBuffer>, + omit_invalid: bool, + supress_error: bool, +) { + let iter = input.into_iter(); + let ucs4 = match from { + Encodings::UTF_8 => utf_8::to_ucs4(iter, omit_invalid, supress_error), + Encodings::UTF_16 => { + utf_16::to_ucs4(iter, omit_invalid, supress_error, UTF16Variant::UTF16) + } + Encodings::UTF_16LE => { + utf_16::to_ucs4(iter, omit_invalid, supress_error, UTF16Variant::UTF16LE) + } + Encodings::UTF_16BE => { + utf_16::to_ucs4(iter, omit_invalid, supress_error, UTF16Variant::UTF16BE) + } + Encodings::UTF_32 => { + utf_32::to_ucs4(iter, omit_invalid, supress_error, UTF32Variant::UTF32) + } + Encodings::UTF_32LE => { + utf_32::to_ucs4(iter, omit_invalid, supress_error, UTF32Variant::UTF32LE) + } + Encodings::UTF_32BE => { + utf_32::to_ucs4(iter, omit_invalid, supress_error, UTF32Variant::UTF32BE) + } + Encodings::ASCII => ascii::to_ucs4(iter, omit_invalid, supress_error), + }; + + let expected = match to { + Encodings::UTF_8 => utf_8::from_ucs4(ucs4, omit_invalid, supress_error), + Encodings::UTF_16 => { + utf_16::from_ucs4(ucs4, omit_invalid, supress_error, UTF16Variant::UTF16) + } + Encodings::UTF_16BE => { + utf_16::from_ucs4(ucs4, omit_invalid, supress_error, UTF16Variant::UTF16BE) + } + Encodings::UTF_16LE => { + utf_16::from_ucs4(ucs4, omit_invalid, supress_error, UTF16Variant::UTF16LE) + } + Encodings::UTF_32 => { + utf_32::from_ucs4(ucs4, omit_invalid, supress_error, UTF32Variant::UTF32) + } + Encodings::UTF_32LE => { + utf_32::from_ucs4(ucs4, omit_invalid, supress_error, UTF32Variant::UTF32LE) + } + Encodings::UTF_32BE => { + utf_32::from_ucs4(ucs4, omit_invalid, supress_error, UTF32Variant::UTF32BE) + } + Encodings::ASCII => ascii::from_ucs4(ucs4, omit_invalid, supress_error), + }; + + expected.for_each(|byte| { + io::stdout().write_all(&[byte]).unwrap(); + io::stdout().flush().unwrap(); + }); +} + +fn charmap_conversion( + from: &Charmap, + to: &Charmap, + input: CircularBuffer>, + omit_invalid: bool, + suppress_error: bool, +) { + let mut buffer = Vec::new(); + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + + for byte in input { + buffer.push(byte); + let mut found = false; + for (_, entry) in &from.entries { + if buffer.starts_with(&entry.encoding) { + if let Some(to_entry) = to + .entries + .values() + .find(|e| e.symbolic_name == entry.symbolic_name) + { + if let Err(e) = stdout.write_all(&to_entry.encoding) { + eprintln!("Error writing to stdout: {}", e); + } + if let Err(e) = stdout.flush() { + eprintln!("Error flushing stdout: {}", e); + } + buffer.clear(); + found = true; + break; + } + } + } + if !found && buffer.len() >= from.header.mb_cur_max { + if !suppress_error { + eprintln!("Error: Invalid or unmapped character"); + } + if omit_invalid { + buffer.clear(); + } else { + if let Err(e) = stdout.write_all(&[buffer[0]]) { + eprintln!("Error writing to stdout: {}", e); + } + if let Err(e) = stdout.flush() { + eprintln!("Error flushing stdout: {}", e); + } + buffer.remove(0); + } + } + } + + for &byte in &buffer { + if !omit_invalid { + if let Err(e) = stdout.write_all(&[byte]) { + eprintln!("Error writing to stdout: {}", e); + } + if let Err(e) = stdout.flush() { + eprintln!("Error flushing stdout: {}", e); + } + } + if !suppress_error { + eprintln!("Error: Invalid or unmapped character at end of input"); + } + } +} + +fn main() -> Result<(), Box> { + let args = Args::parse(); + + setlocale(LocaleCategory::LcAll, ""); + textdomain(PROJECT_NAME)?; + bind_textdomain_codeset(PROJECT_NAME, "UTF-8")?; + + if args.list_codesets { + list_encodings(); + exit(0); + } + + let from_codeset = args.from_codeset.unwrap_or_else(|| { + env::var("LANG") + .ok() + .and_then(|lang| lang.split('.').nth(1).map(String::from)) + .unwrap_or_else(|| { + eprintln!("Error: Could not find a codeset from your locale"); + exit(1); + }) + }); + + let to_codeset = args.to_codeset.unwrap_or_else(|| { + env::var("LANG") + .ok() + .and_then(|lang| lang.split('.').nth(1).map(String::from)) + .unwrap_or_else(|| { + eprintln!("Error: Could not find a codeset from your locale"); + exit(1); + }) + }); + + let from_codeset = parse_codeset(&from_codeset)?; + let to_codeset = parse_codeset(&to_codeset)?; + + let inputs: Vec> = match args.files { + Some(files) => files + .into_iter() + .map(|file| plib::io::input_stream(&file, true)) + .collect::, _>>()?, + None => vec![Box::new(io::stdin().lock())], + }; + + for input in inputs { + let buf = CircularBuffer::new(input); + match (&from_codeset, &to_codeset) { + (CodesetType::Encoding(from), CodesetType::Encoding(to)) => { + encoding_conversion(from, to, buf, args.omit_invalid, args.suppress_messages); + } + (CodesetType::Charmap(from), CodesetType::Charmap(to)) => { + charmap_conversion(from, to, buf, args.omit_invalid, args.suppress_messages); + } + _ => { + eprintln!( + "Error: Both codesets must be of the same type (either Encoding or Charmap)" + ); + exit(1); + } + } + } + + Ok(()) +} diff --git a/i18n/iconv_lib/ascii.rs b/i18n/iconv_lib/ascii.rs new file mode 100644 index 00000000..12270308 --- /dev/null +++ b/i18n/iconv_lib/ascii.rs @@ -0,0 +1,64 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +use std::{iter, process::exit}; + +// Convert ASCII to UCS-4 +pub fn to_ucs4 + 'static>( + mut input: I, + omit_invalid: bool, + suppress_error: bool, +) -> Box> { + let mut position = 0; + + let iter = std::iter::from_fn(move || { + while let Some(code_point) = input.next() { + position += 1; + if code_point <= 127 { + return Some(code_point as u32); + } else if omit_invalid { + continue; + } else if !suppress_error { + eprintln!("Error: Invalid input position {}", position - 1); + std::process::exit(1); + } else { + return None; + } + } + None + }); + + Box::new(iter) +} + +pub fn from_ucs4 + 'static>( + mut input: I, + omit_invalid: bool, + suppress_error: bool, +) -> Box> { + let mut position = 0; + let iter = iter::from_fn(move || { + while let Some(code_point) = input.next() { + position += 1; + if code_point <= 127 { + return Some(code_point as u8); + } else if omit_invalid { + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid input position {}", position - 1); + exit(1) + } + return None; + } + } + None + }); + Box::new(iter) +} diff --git a/i18n/iconv_lib/mod.rs b/i18n/iconv_lib/mod.rs new file mode 100644 index 00000000..f6076675 --- /dev/null +++ b/i18n/iconv_lib/mod.rs @@ -0,0 +1,4 @@ +pub mod ascii; +pub mod utf_16; +pub mod utf_32; +pub mod utf_8; diff --git a/i18n/iconv_lib/utf_16.rs b/i18n/iconv_lib/utf_16.rs new file mode 100644 index 00000000..d3f498fe --- /dev/null +++ b/i18n/iconv_lib/utf_16.rs @@ -0,0 +1,200 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +use byteorder::{BigEndian, ByteOrder, LittleEndian}; +use std::{ + iter::{self}, + process::exit, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UTF16Variant { + UTF16LE, + UTF16BE, + UTF16, +} + +const BOM: u16 = 0xFEFF; +const BOM_SWAPPED: u16 = 0xFFFE; + +// Convert UTF-16 to UCS-4 +pub fn to_ucs4 + 'static>( + mut input: I, + omit_invalid: bool, + suppress_error: bool, + variant: UTF16Variant, +) -> Box> { + let mut buffer = Vec::with_capacity(4); + let mut determined_variant = variant; + let mut bom_checked = false; + + let iter = iter::from_fn(move || { + loop { + if buffer.len() < 2 { + buffer.extend(input.by_ref().take(4 - buffer.len())); + if buffer.len() < 2 { + return None; // End of input + } + } + + if !bom_checked { + bom_checked = true; + if variant == UTF16Variant::UTF16 { + let first_word = BigEndian::read_u16(&buffer[0..2]); + if first_word == BOM { + determined_variant = UTF16Variant::UTF16BE; + buffer.drain(0..2); + continue; + } else if first_word == BOM_SWAPPED { + determined_variant = UTF16Variant::UTF16LE; + buffer.drain(0..2); + continue; + } else { + determined_variant = if cfg!(target_endian = "little") { + UTF16Variant::UTF16LE + } else { + UTF16Variant::UTF16BE + }; + } + } + } + + let code_unit = match determined_variant { + UTF16Variant::UTF16LE => LittleEndian::read_u16(&buffer[0..2]), + UTF16Variant::UTF16BE => BigEndian::read_u16(&buffer[0..2]), + UTF16Variant::UTF16 => unreachable!(), + }; + + // Surrogate pair + if (0xD800..=0xDBFF).contains(&code_unit) { + if buffer.len() < 4 { + buffer.extend(input.by_ref().take(4 - buffer.len())); + if buffer.len() < 4 { + if omit_invalid { + buffer.clear(); + continue; + } else { + if !suppress_error { + eprintln!("Error: Unpaired surrogate at end of input"); + } + return None; + } + } + } + + let low_surrogate = match determined_variant { + UTF16Variant::UTF16LE => LittleEndian::read_u16(&buffer[2..4]), + UTF16Variant::UTF16BE => BigEndian::read_u16(&buffer[2..4]), + UTF16Variant::UTF16 => unreachable!(), + }; + + if !(0xDC00..=0xDFFF).contains(&low_surrogate) { + if omit_invalid { + buffer.drain(0..2); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid low surrogate"); + } + return None; + } + } + + let high = u32::from(code_unit - 0xD800); + let low = u32::from(low_surrogate - 0xDC00); + let code_point = (high << 10) + low + 0x10000; + buffer.drain(0..4); + return Some(code_point); + + // Unpaired low surrogate + } else if (0xDC00..=0xDFFF).contains(&code_unit) { + if omit_invalid { + buffer.drain(0..2); + continue; + } else { + if !suppress_error { + eprintln!("Error: Unpaired low surrogate"); + } + return None; + } + } else { + buffer.drain(0..2); + return Some(u32::from(code_unit)); + } + } + }); + + Box::new(iter) +} + +/// Convert UTF-32 from UCS-4 +pub fn from_ucs4 + 'static>( + input: I, + omit_invalid: bool, + suppress_error: bool, + variant: UTF16Variant, +) -> Box> { + let variant = match variant { + UTF16Variant::UTF16LE | UTF16Variant::UTF16BE => variant, + UTF16Variant::UTF16 => { + if cfg!(target_endian = "little") { + UTF16Variant::UTF16LE + } else { + UTF16Variant::UTF16BE + } + } + }; + + let iter = input.flat_map(move |code_point| { + let mut utf16 = Vec::new(); + + if code_point <= 0xD7FF || (0xE000..=0xFFFF).contains(&code_point) { + utf16.push(code_point as u16); + } else if (0xD800..=0xDFFF).contains(&code_point) { + if !omit_invalid { + return Vec::new(); + } else { + if !suppress_error { + eprintln!("Error: Isolated surrogate code point U+{:04X}", code_point); + } + exit(1) + } + } else if code_point <= 0x10FFFF { + let code_point = code_point - 0x10000; + let high_surrogate = ((code_point >> 10) as u16) + 0xD800; + let low_surrogate = ((code_point & 0x3FF) as u16) + 0xDC00; + utf16.extend_from_slice(&[high_surrogate, low_surrogate]); + } else { + if !omit_invalid { + return Vec::new(); + } else { + if !suppress_error { + eprintln!("Error: Invalid Unicode code point U+{:X}", code_point); + } + exit(1) + } + } + + to_bytes(&utf16, variant) + }); + + Box::new(iter) +} + +#[inline] +fn to_bytes(utf16: &[u16], variant: UTF16Variant) -> Vec { + utf16 + .iter() + .flat_map(|&code_unit| match variant { + UTF16Variant::UTF16LE => code_unit.to_le_bytes().to_vec(), + UTF16Variant::UTF16BE => code_unit.to_be_bytes().to_vec(), + _ => unreachable!(), + }) + .collect() +} diff --git a/i18n/iconv_lib/utf_32.rs b/i18n/iconv_lib/utf_32.rs new file mode 100644 index 00000000..f1dec4c9 --- /dev/null +++ b/i18n/iconv_lib/utf_32.rs @@ -0,0 +1,153 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +use byteorder::{BigEndian, ByteOrder, LittleEndian}; +use std::{iter, process::exit}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UTF32Variant { + UTF32LE, + UTF32BE, + UTF32, +} + +const BOM: u32 = 0x0000FEFF; +const BOM_OE: u32 = 0xFFFE0000; + +/// Convert UTF-32 to UCS-4 +pub fn to_ucs4 + 'static>( + mut input: I, + omit_invalid: bool, + suppress_error: bool, + variant: UTF32Variant, +) -> Box> { + let mut buffer = Vec::with_capacity(4); + let mut variant = variant; + let mut bom_checked = false; + + let iter = iter::from_fn(move || { + loop { + if buffer.len() < 4 { + buffer.extend(input.by_ref().take(4 - buffer.len())); + if buffer.len() < 4 { + return None; // End of input + } + } + + if !bom_checked { + let first_word = BigEndian::read_u32(&buffer); + match variant { + UTF32Variant::UTF32 => { + if first_word == BOM { + variant = UTF32Variant::UTF32BE; + buffer.clear(); + } else if first_word == BOM_OE { + variant = UTF32Variant::UTF32LE; + buffer.clear(); + } else { + variant = if cfg!(target_endian = "little") { + UTF32Variant::UTF32LE + } else { + UTF32Variant::UTF32BE + }; + } + } + _ => {} + } + bom_checked = true; + if buffer.is_empty() { + continue; + } + } + + let code_point = match variant { + UTF32Variant::UTF32LE => LittleEndian::read_u32(&buffer), + UTF32Variant::UTF32BE => BigEndian::read_u32(&buffer), + UTF32Variant::UTF32 => unreachable!(), + }; + + buffer.clear(); + + if code_point >= 0x110000 { + if !suppress_error { + eprintln!("Error: Invalid code point U+{:X}", code_point); + } + if omit_invalid { + continue; + } else { + return None; + } + } + + return Some(code_point); + } + }); + Box::new(iter) +} + +/// Convert UCS-4 to UTF-32 +pub fn from_ucs4 + 'static>( + input: I, + omit_invalid: bool, + suppress_error: bool, + variant: UTF32Variant, +) -> Box> { + let variant = match variant { + UTF32Variant::UTF32LE => UTF32Variant::UTF32LE, + UTF32Variant::UTF32BE => UTF32Variant::UTF32BE, + UTF32Variant::UTF32 => { + if cfg!(target_endian = "little") { + UTF32Variant::UTF32LE + } else { + UTF32Variant::UTF32BE + } + } + }; + + let mut code_point = input.peekable(); + let mut buf = [0u8; 4]; + let mut idx = 4; + + let iter = iter::from_fn(move || loop { + if idx < 4 { + let byte = buf[idx]; + idx += 1; + return Some(byte); + } + + match code_point.next() { + Some(cp) => { + if cp > 0x10FFFF { + if omit_invalid { + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid Unicode code point U+{:X}", cp); + } + exit(1); + } + } + write_u32(&mut buf, cp, variant); // Write code point to buffer + idx = 0; + } + None => return None, + } + }); + + Box::new(iter) +} + +#[inline] +fn write_u32(buffer: &mut [u8; 4], value: u32, variant: UTF32Variant) { + match variant { + UTF32Variant::UTF32LE => LittleEndian::write_u32(buffer, value), + UTF32Variant::UTF32BE => BigEndian::write_u32(buffer, value), + _ => unreachable!(), + } +} diff --git a/i18n/iconv_lib/utf_8.rs b/i18n/iconv_lib/utf_8.rs new file mode 100644 index 00000000..a65ee364 --- /dev/null +++ b/i18n/iconv_lib/utf_8.rs @@ -0,0 +1,244 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +use std::{iter, process::exit}; + +/// Convert UTF-8 to UCS-4 +pub fn to_ucs4 + 'static>( + mut input: I, + omit_invalid: bool, + suppress_error: bool, +) -> Box> { + let mut buffer = Vec::with_capacity(4); + let iter = iter::from_fn(move || { + loop { + if buffer.is_empty() { + buffer.extend(input.by_ref().take(4)); + if buffer.is_empty() { + return None; // End of input + } + } + let byte = buffer[0]; + + // 1 byte + if byte <= 0x7F { + let code_point = byte as u32; + buffer.drain(0..1); + return Some(code_point); + + // 2 bytes + } else if byte >= 0xC0 && byte <= 0xDF { + if buffer.len() < 2 { + let add_bytes = input.by_ref().take(2 - buffer.len()).collect::>(); + buffer.extend(add_bytes); + if buffer.len() < 2 { + if omit_invalid { + buffer.clear(); + continue; + } else { + if !suppress_error { + eprintln!("Error: Incomplete 2-byte UTF-8 sequence"); + } + exit(1) + } + } + } + if (buffer[1] & 0xC0) != 0x80 { + if omit_invalid { + buffer.drain(0..1); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid 2-byte UTF-8 sequence"); + } + exit(1) + } + } + let code_point = ((byte as u32 & 0x1F) << 6) | (buffer[1] as u32 & 0x3F); + if code_point < 0x80 { + if omit_invalid { + buffer.drain(0..2); + continue; + } else { + if !suppress_error { + eprintln!("Error: Overlong 2-byte sequence"); + } + exit(1) + } + } + buffer.drain(0..2); + return Some(code_point); + + // 3 bytes + } else if byte >= 0xE0 && byte <= 0xEF { + if buffer.len() < 3 { + let add_bytes = input.by_ref().take(3 - buffer.len()).collect::>(); + buffer.extend(add_bytes); + if buffer.len() < 3 { + if omit_invalid { + buffer.clear(); + continue; + } else { + if !suppress_error { + eprintln!("Error: Incomplete 3-byte UTF-8 sequence"); + } + exit(1) + } + } + } + if (buffer[1] & 0xC0) != 0x80 || (buffer[2] & 0xC0) != 0x80 { + if omit_invalid { + buffer.drain(0..1); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid 3-byte UTF-8 sequence"); + } + exit(1) + } + } + let code_point = ((byte as u32 & 0x0F) << 12) + | ((buffer[1] as u32 & 0x3F) << 6) + | (buffer[2] as u32 & 0x3F); + if code_point < 0x800 || (0xD800..=0xDFFF).contains(&code_point) { + if omit_invalid { + buffer.drain(0..3); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid 3-byte sequence"); + } + exit(1) + } + } + buffer.drain(0..3); + return Some(code_point); + + // 4 bytes + } else if byte >= 0xF0 && byte <= 0xF4 { + if buffer.len() < 4 { + let add_bytes = input.by_ref().take(4 - buffer.len()).collect::>(); + buffer.extend(add_bytes); + if buffer.len() < 4 { + if omit_invalid { + buffer.clear(); + continue; + } else { + if !suppress_error { + eprintln!("Error: Incomplete 4-byte UTF-8 sequence"); + } + exit(1) + } + } + } + if (buffer[1] & 0xC0) != 0x80 + || (buffer[2] & 0xC0) != 0x80 + || (buffer[3] & 0xC0) != 0x80 + { + if omit_invalid { + buffer.drain(0..1); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid 4-byte UTF-8 sequence"); + } + exit(1) + } + } + let code_point = ((byte as u32 & 0x07) << 18) + | ((buffer[1] as u32 & 0x3F) << 12) + | ((buffer[2] as u32 & 0x3F) << 6) + | (buffer[3] as u32 & 0x3F); + if code_point > 0x10FFFF || code_point < 0x10000 { + if omit_invalid { + buffer.drain(0..4); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid code point in 4-byte sequence"); + } + exit(1) + } + } + buffer.drain(0..4); + return Some(code_point); + } else { + if omit_invalid { + buffer.drain(0..1); + continue; + } else { + if !suppress_error { + eprintln!("Error: Invalid byte"); + } + exit(1); + } + } + } + }); + Box::new(iter) +} + +/// Convert UCS-4 to UTF-8 +pub fn from_ucs4 + 'static>( + input: I, + omit_invalid: bool, + suppress_error: bool, +) -> Box> { + let iter = input.flat_map(move |code_point| { + if code_point <= 0x7F { + Some(vec![code_point as u8]) + } else if code_point <= 0x7FF { + Some(vec![ + 0xC0 | ((code_point >> 6) as u8), + 0x80 | ((code_point & 0x3F) as u8), + ]) + } else if code_point <= 0xFFFF { + if (0xD800..=0xDFFF).contains(&code_point) { + if !suppress_error { + eprintln!( + "Error: Surrogate code point U+{:04X} is not allowed in UTF-8", + code_point + ); + } + if omit_invalid { + None + } else { + Some(vec![]) + } + } else { + Some(vec![ + 0xE0 | ((code_point >> 12) as u8), + 0x80 | (((code_point >> 6) & 0x3F) as u8), + 0x80 | ((code_point & 0x3F) as u8), + ]) + } + } else if code_point <= 0x10FFFF { + Some(vec![ + 0xF0 | ((code_point >> 18) as u8), + 0x80 | (((code_point >> 12) & 0x3F) as u8), + 0x80 | (((code_point >> 6) & 0x3F) as u8), + 0x80 | ((code_point & 0x3F) as u8), + ]) + } else { + if omit_invalid { + None + } else { + if !suppress_error { + eprintln!( + "Error: Code point U+{:X} is out of valid Unicode range", + code_point + ); + exit(1) + } + Some(vec![]) + } + } + }); + Box::new(iter.flatten()) +} diff --git a/i18n/tests/i18n-tests.rs b/i18n/tests/i18n-tests.rs index ae771334..5e14bd44 100644 --- a/i18n/tests/i18n-tests.rs +++ b/i18n/tests/i18n-tests.rs @@ -8,3 +8,4 @@ // mod gencat; +mod iconv; diff --git a/i18n/tests/iconv/mod.rs b/i18n/tests/iconv/mod.rs new file mode 100644 index 00000000..3fb4b265 --- /dev/null +++ b/i18n/tests/iconv/mod.rs @@ -0,0 +1,1084 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// + +#![allow(non_snake_case)] +use plib::{run_test_u8, TestPlanU8}; +use std::env; +use std::path::PathBuf; +use std::{fs::File, io::Read}; + +fn iconv_test(args: &[&str], input: Vec, expected_output: Vec, expected_error: Vec) { + let str_args: Vec = args.iter().map(|s| String::from(*s)).collect(); + run_test_u8(TestPlanU8 { + cmd: String::from("iconv"), + args: str_args, + stdin_data: input, + expected_out: expected_output, + expected_err: expected_error, + expected_exit_code: 0, + }) +} + +#[test] +fn iconv_no_flag_data_input() { + let input = "Hello world".as_bytes().to_vec(); + iconv_test(&[], input.clone(), input.clone(), Vec::new()); +} + +#[test] +fn iconv_UTF8_to_ASCII_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf8"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf8_to_ascii_with_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-c", "-f", "UTF-8", "-t", "ASCII", "-"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-c", "-f", "UTF-8", "-t", "ASCII"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF8_to_UTF16LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf8"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf8_to_utf16le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-16LE", "-"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-16LE"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +#[allow(non_snake_case)] +fn iconv_UTF8_to_UTF16BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf8"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf8_to_utf16be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-16BE", "-"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-16BE"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +#[allow(non_snake_case)] +fn iconv_UTF8_to_UTF32BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf8"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf8_to_utf32be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-32BE", "-"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-32BE"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +#[allow(non_snake_case)] +fn iconv_UTF8_to_UTF32LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf8"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf8_to_utf32le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-32LE", "-"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-8", "-t", "UTF-32LE"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_ASCII_to_UTF8_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-8"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-8", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_ASCII_to_UTF16LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_ascii_to_utf16le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-16LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-16LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_ASCII_to_UTF16BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_ascii_to_utf16be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-16BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-16BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_ASCII_to_UTF32LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_ascii_to_utf32le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-32LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-32LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_ASCII_to_UTF32BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_ascii"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_ascii_to_utf32be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-32BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "ASCII", "-t", "UTF-32BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32LE_to_UTF32BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32le_to_utf32be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-32BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-32BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32LE_to_ASCII_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32le_to_ascii_with_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-c", "-f", "UTF-32LE", "-t", "ASCII"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-c", "-f", "UTF-32LE", "-t", "ASCII", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32LE_to_UTF8_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32le_to_utf8_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-8"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-8", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32LE_to_UTF16LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32le_to_utf16le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-16LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-16LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32LE_to_UTF16BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32le_to_utf16be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-16BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32LE", "-t", "UTF-16BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32BE_to_UTF8_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32be_to_utf8_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-8"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-8", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32BE_to_UTF32LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32be_to_utf32le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-32LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-32LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32BE_to_ASCII_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32be_to_ascii_with_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-c", "-f", "UTF-32BE", "-t", "ASCII"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-c", "-f", "UTF-32BE", "-t", "ASCII", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32BE_to_UTF16LE_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32be_to_utf16le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-16LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-16LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF32BE_to_UTF16BE_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf32be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf32be_to_utf16be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-16BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-32BE", "-t", "UTF-16BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16LE_to_UTF16BE_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16le_to_utf16be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-16BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-16BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16LE_to_ASCII_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16le_to_ascii_with_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-c", "-f", "UTF-16LE", "-t", "ASCII"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-c", "-f", "UTF-16LE", "-t", "ASCII", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16LE_to_UTF32BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16le_to_utf32be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-32BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-32BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16LE_to_UTF32LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16le_to_utf32le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-32LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-32LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16LE_to_UTF8_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16le"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16le_to_utf8_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-8"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16LE", "-t", "UTF-8", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16BE_to_UTF16LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16be_to_utf16le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-16LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-16LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16BE_to_ASCII_conversion_with_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16be_to_ascii_with_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-c", "-f", "UTF-16BE", "-t", "ASCII"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-c", "-f", "UTF-16BE", "-t", "ASCII", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16BE_to_UTF32BE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16be_to_utf32be_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-32BE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-32BE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16BE_to_UTF32LE_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16be_to_utf32le_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-32LE"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-32LE", "-"], + input, + expected_output, + Vec::new(), + ); +} + +#[test] +fn iconv_UTF16BE_to_UTF8_conversion_without_c_flag() { + let cargo_manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + + let input_file = cargo_manifest_dir.join("tests/iconv/test_data_utf16be"); + let mut input: Vec = Vec::new(); + File::open(&input_file) + .unwrap() + .read_to_end(&mut input) + .unwrap(); + + let expected_output_file = + cargo_manifest_dir.join("tests/iconv/test_data_utf16be_to_utf8_without_c_flag"); + + let mut expected_output: Vec = Vec::new(); + File::open(&expected_output_file) + .unwrap() + .read_to_end(&mut expected_output) + .unwrap(); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-8"], + input.clone(), + expected_output.clone(), + Vec::new(), + ); + + iconv_test( + &["-f", "UTF-16BE", "-t", "UTF-8", "-"], + input, + expected_output, + Vec::new(), + ); +} diff --git a/i18n/tests/iconv/test_data_ascii b/i18n/tests/iconv/test_data_ascii new file mode 100644 index 00000000..6a9171ff --- /dev/null +++ b/i18n/tests/iconv/test_data_ascii @@ -0,0 +1 @@ +these are ascii characters 24234o1203572hkd f98882 2380 !@#$%~^&!(123423*/+*-/////////"""""" diff --git a/i18n/tests/iconv/test_data_ascii_to_utf16be_without_c_flag b/i18n/tests/iconv/test_data_ascii_to_utf16be_without_c_flag new file mode 100644 index 00000000..811e2f6e Binary files /dev/null and b/i18n/tests/iconv/test_data_ascii_to_utf16be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_ascii_to_utf16le_without_c_flag b/i18n/tests/iconv/test_data_ascii_to_utf16le_without_c_flag new file mode 100644 index 00000000..2d41d684 Binary files /dev/null and b/i18n/tests/iconv/test_data_ascii_to_utf16le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_ascii_to_utf32be_without_c_flag b/i18n/tests/iconv/test_data_ascii_to_utf32be_without_c_flag new file mode 100644 index 00000000..2e9559d7 Binary files /dev/null and b/i18n/tests/iconv/test_data_ascii_to_utf32be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_ascii_to_utf32le_without_c_flag b/i18n/tests/iconv/test_data_ascii_to_utf32le_without_c_flag new file mode 100644 index 00000000..9678fa20 Binary files /dev/null and b/i18n/tests/iconv/test_data_ascii_to_utf32le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16be b/i18n/tests/iconv/test_data_utf16be new file mode 100644 index 00000000..0f2d0d5c Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16be differ diff --git a/i18n/tests/iconv/test_data_utf16be_to_ascii_with_c_flag b/i18n/tests/iconv/test_data_utf16be_to_ascii_with_c_flag new file mode 100644 index 00000000..d05b1e10 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf16be_to_ascii_with_c_flag @@ -0,0 +1,19 @@ +This test data was originally written for UTF16BE + +It emojis + latin characters + + + + 8 + 6 + + + + + + + 7 + + + + diff --git a/i18n/tests/iconv/test_data_utf16be_to_utf16le_without_c_flag b/i18n/tests/iconv/test_data_utf16be_to_utf16le_without_c_flag new file mode 100644 index 00000000..03d35ffe Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16be_to_utf16le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16be_to_utf32be_without_c_flag b/i18n/tests/iconv/test_data_utf16be_to_utf32be_without_c_flag new file mode 100644 index 00000000..550d54ec Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16be_to_utf32be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16be_to_utf32le_without_c_flag b/i18n/tests/iconv/test_data_utf16be_to_utf32le_without_c_flag new file mode 100644 index 00000000..5c63b281 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16be_to_utf32le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16be_to_utf8_without_c_flag b/i18n/tests/iconv/test_data_utf16be_to_utf8_without_c_flag new file mode 100644 index 00000000..5b2fa73f --- /dev/null +++ b/i18n/tests/iconv/test_data_utf16be_to_utf8_without_c_flag @@ -0,0 +1,19 @@ +This test data was originally written for UTF16BE + +It emojis + latin characters + +๐Ÿ”ฅ ๐Ÿฃ ๐Ÿšซ ๐Ÿ…ฐ๏ธ ๐ŸŽ ๐Ÿค– ๐Ÿฒ ๐Ÿ”ณ ๐Ÿ”ท ๐Ÿฉ ๐Ÿ•น ๐Ÿ˜ ๐Ÿ›ซ ๐ŸŒŠ โ„น๏ธ ๐Ÿ˜ฆ ๐Ÿ” ๐Ÿ‡ ๐Ÿจ ๐Ÿฐ ๐Ÿ’ฅ ๐ŸŒ“ โ—พ๏ธ ๐Ÿ„ โŒ›๏ธ ๐Ÿ˜Ž ๐Ÿ‘ +๐Ÿ—ž ๐Ÿšฝ ๐ŸŽธ ๐Ÿ†— ๐Ÿ˜ฏ ๐Ÿ”ฌ ๐ŸŒท ๐Ÿ˜ž โ“‚๏ธ ๐Ÿšฃ ๐Ÿ”ฐ ๐Ÿญ ๐Ÿ’ฏ ๐ŸŸ ๐Ÿš ๐ŸŽป โ™จ๏ธ ๐Ÿš‡ ๐Ÿ’ฉ ๐Ÿ‹ ๐Ÿข โ›ต๏ธ ๐Ÿ†˜ ใŠ™๏ธ ๐Ÿ˜Š ๐Ÿ”ข ๐Ÿ—ฃ +๐Ÿถ โ†˜๏ธ ๐Ÿ˜‰ ๐Ÿ’‡ โš— ๐Ÿบ ๐Ÿ‘ฃ ๐Ÿ’ญ ๐ŸŒ› ๐Ÿ˜ฐ ๐Ÿ‘˜ ๐Ÿš  ๐Ÿฝ ๐Ÿ‘ ๐Ÿ˜ฝ โ„ข๏ธ ๐Ÿฟ ๐Ÿ‘„ ๐Ÿ‘€ ๐Ÿ™‹ ๐Ÿ–ฑ 8๏ธโƒฃ ๐Ÿ•ก ๐Ÿฌ ๐Ÿ“ฎ ๐Ÿœ ๐Ÿ–ผ ๐Ÿ“ฐ +๐ŸŒ™ ๐Ÿˆบ ๐Ÿ”ฃ ๐Ÿฒ โš” ๐Ÿš‚ ๐Ÿ”Ž ๐Ÿ“‘ 6๏ธโƒฃ ๐Ÿ”ค ๐Ÿ’– ๐ŸŸ โ™Œ๏ธ ๐Ÿ’ณ โ™‹๏ธ ๐Ÿ“ข โ†ช๏ธ ๐Ÿฅ โ›…๏ธ ๐Ÿ›ฃ ๐ŸŽ ๐Ÿ”ฑ ๐Ÿ˜ˆ ๐Ÿ˜‘ โฏ ๐ŸŽฅ ๐Ÿ’† ๐ŸŽ’ +๐Ÿ’ โ—ฝ๏ธ ๐Ÿ‘พ ๐Ÿ‘ž ๐Ÿ˜š ๐Ÿš ๐ŸŒพ ๐Ÿ““ ๐Ÿ” ๐Ÿ—„ ๐Ÿ•ฃ โ›ˆ ๐Ÿ•‰ โ˜  โž• ๐ŸŽฟ ๐Ÿ’˜ ๐Ÿ—พ ๐ŸŒค ๐Ÿ™‰ ๐Ÿ„ ๐Ÿšฟ ๐Ÿซ ๐Ÿ•ณ ๐Ÿ’™ ๐Ÿณ โšฐ ๐Ÿ’ง +๐Ÿ—‘ ๐ŸŽ“ ๐ŸŽ‘ โ™๏ธ ๐Ÿ“  ๐Ÿ’— ๐Ÿ„ ๐Ÿ— ๐Ÿƒ ๐ŸŽ— ๐Ÿ”ฝ ๐ŸŒ ๐Ÿ“‡ ๐Ÿ’ข ๐Ÿ˜… ๐Ÿ’• ๐Ÿ’บ ๐Ÿ” ๐Ÿ‘‹ ๐Ÿ ๐Ÿž ๐Ÿ˜ข ๐Ÿ…ฟ๏ธ ๐Ÿฑ ๐Ÿ›ฉ โ˜ฎ โ›ณ๏ธ ๐ŸŠ +๐Ÿ˜ฃ ๐ŸŒ ๐Ÿ’ƒ โ˜ข โ˜”๏ธ ๐Ÿ’Š ๐Ÿ—จ ๐Ÿšข โ›ท ๐Ÿค“ ๐Ÿ€ ๐Ÿ“ซ ๐Ÿ– ๐Ÿ’ ๐Ÿก ๐ŸŽŠ ๐Ÿ”ˆ ๐ŸŒž ๐Ÿ”† ๐Ÿ•Ž ๐ŸŽ‡ โ›ธ ๐ŸŒฟ ๐Ÿญ ๐Ÿ’ด โœจ โฐ ๐Ÿค” +๐Ÿ”ผ โ™๏ธ ๐Ÿ’ฑ โšก๏ธ ๐ŸŒฎ ๐Ÿ•› ๐ŸŒ— ๐Ÿ›ข ๐ŸŽ‹ ๐ŸŠ ๐Ÿ˜˜ โœ–๏ธ ๐ŸŒฐ ๐Ÿ—œ ๐Ÿ”ƒ ๐Ÿ‘น ๐Ÿ› ๐Ÿ’š ๐Ÿ ๐Ÿš’ ๐ŸŒถ ๐Ÿ‘ฉ ๐Ÿ›ฐ ๐Ÿšฉ ๐Ÿ’ ๐ŸŒซ ๐Ÿ˜ถ +๐Ÿ›ก โคด๏ธ โบ ๐Ÿš€ ๐Ÿค‘ ๐Ÿˆต ๐ŸŽ™ โ–ซ๏ธ ๐Ÿ‡ ๐Ÿšฏ ๐Ÿ“” ๐Ÿ‘ง ๐Ÿ˜ช ๐Ÿต ๐Ÿน ๐Ÿ•  ๐Ÿ•– ๐Ÿค— ๐Ÿ”œ ๐Ÿˆ โซ ๐Ÿ‘ ๐Ÿš– ๐Ÿš ๐Ÿš† ๐Ÿˆ ๐Ÿฅ +๐ŸŒ€ ๐Ÿ“ธ ๐ŸŽฎ ๐ŸšŒ ๐Ÿš” ๐Ÿ’ž โœŒ๏ธ โ—€๏ธ ๐Ÿ‘Œ โžฟ ๐ŸŒ‚ ๐ŸŒ‹ ๐ŸŽข โ˜ช ๐ŸŸ ๐ŸŒฏ ๐Ÿ”„ ๐Ÿ’Œ ๐Ÿ“น ๐Ÿ˜ฅ ๐Ÿฌ ๐Ÿ”บ ๐Ÿ™† ๐Ÿ˜ผ ๐Ÿš ใ€ฝ๏ธ ๐Ÿ‘ˆ +๐ŸšŠ ๐Ÿ‚ โ“ 7๏ธโƒฃ โœ‚๏ธ ๐Ÿญ ๐Ÿ”ป ๐Ÿ†š โ˜ธ ๐ŸŽŽ ๐Ÿˆฒ ๐Ÿ…พ๏ธ ๐Ÿ•ถ ๐Ÿฏ ๐Ÿ•ท ๐Ÿ’ ๐Ÿˆš๏ธ ๐Ÿ— โœ‰๏ธ ๐Ÿ“ง ๐Ÿšค ๐Ÿ†• โ‰๏ธ ๐Ÿ†Ž โ ๐Ÿš„ ๐Ÿˆณ ๐Ÿบ ๐Ÿผ +๐Ÿ“ฆ โ—ผ๏ธ ๐Ÿป โฌ›๏ธ ๐ŸŒฅ โธ ๐Ÿณ ๐Ÿ›ฌ ๐Ÿฆƒ โ›ฐ โ˜ฆ ๐Ÿ’’ ๐Ÿ–ฅ ๐Ÿ“ ๐Ÿซ โ˜ƒ ๐Ÿ•“ โ›‘ ๐Ÿ•ด ๐Ÿ–Š ๐Ÿ›€ ๐Ÿ‘ท ๐Ÿž โšœ ๐Ÿ“ž ๐ŸŒ  ๐ŸŒจ ๐Ÿ—‚ ๐Ÿ˜ฉ +๐Ÿ“‹ ๐Ÿ“ก ๐Ÿ“ฟ ๐ŸŽง ๐Ÿฉ ๐Ÿ˜ก ๐Ÿ’› โ›Ž ๐ŸŒ ๐Ÿ•š โš’ ๐Ÿ”” ๐Ÿ˜ป ๐Ÿ‘ฆ ๐Ÿ”ฆ ๐Ÿˆฏ๏ธ ๐Ÿšœ ๐Ÿ’” ๐Ÿšธ ๐Ÿ‘ฟ ๐Ÿข ยฎ๏ธ ๐Ÿ•ฏ ๐Ÿšฑ ๐Ÿ—“ โ†ฉ๏ธ ๐Ÿ‘› ๐Ÿ”š + +๐Œ€๐Œ๐Œ‚๐Œƒ๐Œ„๐Œ…๐Œ†๐Œ‡๐Œ‰๐ŒŠ๐Œ‹๐ŒŒ๐Œ๐Œ๐Œ๐Œ’๐Œ“๐Œ”๐Œ•๐Œ–๐Œ— diff --git a/i18n/tests/iconv/test_data_utf16le b/i18n/tests/iconv/test_data_utf16le new file mode 100644 index 00000000..370c71b8 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16le differ diff --git a/i18n/tests/iconv/test_data_utf16le_to_ascii_with_c_flag b/i18n/tests/iconv/test_data_utf16le_to_ascii_with_c_flag new file mode 100644 index 00000000..5c6316a5 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf16le_to_ascii_with_c_flag @@ -0,0 +1,8 @@ +this test data was originally generated for UTF16LE + +Contains emojis + some latin characters + + + + + diff --git a/i18n/tests/iconv/test_data_utf16le_to_utf16be_without_c_flag b/i18n/tests/iconv/test_data_utf16le_to_utf16be_without_c_flag new file mode 100644 index 00000000..cba93a1d Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16le_to_utf16be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16le_to_utf32be_without_c_flag b/i18n/tests/iconv/test_data_utf16le_to_utf32be_without_c_flag new file mode 100644 index 00000000..e3bd3cc2 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16le_to_utf32be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16le_to_utf32le_without_c_flag b/i18n/tests/iconv/test_data_utf16le_to_utf32le_without_c_flag new file mode 100644 index 00000000..ebf4bc7f Binary files /dev/null and b/i18n/tests/iconv/test_data_utf16le_to_utf32le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf16le_to_utf8_without_c_flag b/i18n/tests/iconv/test_data_utf16le_to_utf8_without_c_flag new file mode 100644 index 00000000..5b2ae9e7 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf16le_to_utf8_without_c_flag @@ -0,0 +1,8 @@ +this test data was originally generated for UTF16LE + +Contains emojis + some latin characters +๐Ÿ˜ฃ ๐Ÿ˜– ๐Ÿ˜ซ ๐Ÿ˜ฉ ๐Ÿฅบ ๐Ÿ˜ข ๐Ÿ˜ญ ๐Ÿ˜  ๐Ÿ˜ก ๐Ÿคฌ ๐Ÿคฏ ๐Ÿ˜ณ ๐Ÿฅต ๐Ÿฅถ ๐Ÿ˜ฑ ๐Ÿ˜จ ๐Ÿ˜ฐ ๐Ÿ˜ฅ ๐Ÿ˜“ ๐Ÿซฃ ๐Ÿค— +๐Ÿซก ๐Ÿค” ๐Ÿซข ๐Ÿคญ ๐Ÿคซ ๐Ÿคฅ ๐Ÿ˜ถ ๐Ÿ˜ ๐Ÿ˜‘ ๐Ÿ˜ฌ ๐Ÿซจ ๐Ÿซ  ๐Ÿ™„ ๐Ÿ˜ฏ ๐Ÿ˜ฆ ๐Ÿ˜ง ๐Ÿ˜ฎ ๐Ÿ˜ฒ ๐Ÿฅฑ ๐Ÿ˜ด ๐Ÿคค +๐Ÿ˜ช ๐Ÿ˜ต ๐Ÿซฅ ๐Ÿค ๐Ÿฅด ๐Ÿคข ๐Ÿคฎ ๐Ÿคง +๐Ÿ˜ท ๐Ÿค’ ๐Ÿค• ๐Ÿค‘ ๐Ÿค  ๐Ÿ˜ˆ +รช รซ รฌ รญ รฎ รฏ รฐ รฑ รฒ รณ รด diff --git a/i18n/tests/iconv/test_data_utf32be b/i18n/tests/iconv/test_data_utf32be new file mode 100644 index 00000000..c91213da Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32be differ diff --git a/i18n/tests/iconv/test_data_utf32be_to_ascii_with_c_flag b/i18n/tests/iconv/test_data_utf32be_to_ascii_with_c_flag new file mode 100644 index 00000000..2d8342c4 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf32be_to_ascii_with_c_flag @@ -0,0 +1,13 @@ +Hey there, this was originally written in UTF32BE + + + + 5 + + * + + + 7 + + +Well, we have to put these emojis just to make sure things are working well diff --git a/i18n/tests/iconv/test_data_utf32be_to_utf16be_without_c_flag b/i18n/tests/iconv/test_data_utf32be_to_utf16be_without_c_flag new file mode 100644 index 00000000..7ee4c87a Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32be_to_utf16be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32be_to_utf16le_without_c_flag b/i18n/tests/iconv/test_data_utf32be_to_utf16le_without_c_flag new file mode 100644 index 00000000..66c8e805 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32be_to_utf16le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32be_to_utf32le_without_c_flag b/i18n/tests/iconv/test_data_utf32be_to_utf32le_without_c_flag new file mode 100644 index 00000000..8db2e2ae Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32be_to_utf32le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32be_to_utf8_without_c_flag b/i18n/tests/iconv/test_data_utf32be_to_utf8_without_c_flag new file mode 100644 index 00000000..ee2ac081 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf32be_to_utf8_without_c_flag @@ -0,0 +1,13 @@ +Hey there, this was originally written in UTF32BE + +๐Ÿ“ฉ ๐ŸŽ‹ ๐Ÿฒ โ“‚๏ธ ๐Ÿฆ‚ โ™’๏ธ โฏ ๐Ÿ€„๏ธ ๐Ÿšˆ ๐Ÿ’ฎ ๐Ÿต ๐Ÿ‘ฃ ๐ŸŽ โšพ๏ธ ๐Ÿ•› ๐Ÿ’ง ๐Ÿ”ฏ ๐Ÿ™† ๐ŸŒญ ๐Ÿ“ผ ๐ŸŽƒ +๐Ÿ—„ ๐Ÿ”ƒ ๐ŸŒฉ ๐Ÿฆ‚ โ™จ๏ธ โญ๏ธ ๐Ÿ—พ ๐ŸŽบ ๐Ÿช โ™ฅ๏ธ ๐Ÿˆน ๐Ÿšฐ ๐Ÿ‘• โ›…๏ธ ๐Ÿ‘ง ๐Ÿธ ๐Ÿ“ฐ ๐Ÿ“  ๐Ÿ’‚ ๐ŸŽฟ ๐Ÿณ โ˜ข โ• +๐ŸŽถ ๐Ÿ›ฉ 5๏ธโƒฃ ๐Ÿ“บ ๐Ÿ’ค โ™ฅ๏ธ ๐Ÿ“‰ ๐Ÿ“ง ๐Ÿ›‹ โ‡๏ธ ๐Ÿ”ฝ ๐Ÿ›ค โ™“๏ธ ๐Ÿ”น โš“๏ธ ๐Ÿ’‡ ๐Ÿˆ ๐Ÿ‘ธ ๐Ÿ€ ๐Ÿˆณ โ›ต๏ธ ๐Ÿ”ง ๐Ÿ”† +๐Ÿ˜— ๐Ÿ‘… โ›ธ ๐Ÿšฟ ๐Ÿ“ˆ ๐ŸŽฅ ๐ŸŽฌ โ†™๏ธ ๐ŸŽซ โ˜‘๏ธ ๐ŸŒ‡ ๐Ÿ‘Š โŒ›๏ธ ๐Ÿˆท๏ธ โœ… ๐Ÿ’ถ ๐Ÿ‘ ๐Ÿ˜˜ ๐Ÿœ ๐Ÿš„ โšซ๏ธ ๐Ÿ›  โฌ†๏ธ ๐Ÿœ ๐ŸŽ‘ +๐Ÿ•˜ ๐Ÿ“– ๐Ÿ” ๐Ÿ”• ๐ŸŽš โœก ๐ŸŒด โ†—๏ธ ๐Ÿ– ๐Ÿ”ฉ *โƒฃ ๐Ÿ™ ๐Ÿ•‹ ๐ŸŽผ ๐Ÿ”ธ ๐Ÿ“น โฌ‡๏ธ ๐Ÿ’ถ ๐Ÿ˜œ ๐Ÿฐ โœณ๏ธ ๐Ÿ–ฒ ๐Ÿ…ฑ๏ธ ๐Ÿ“ฉ ๐Ÿšค +๐Ÿ’ˆ ๐ŸŽฆ ๐Ÿšณ ๐Ÿ’‰ ๐Ÿฅ โ˜˜ ๐ŸŒ— ๐Ÿ˜  ๐ŸŽ‹ ๐Ÿš ๐Ÿ‘„ ๐Ÿ—จ โœ’๏ธ ๐Ÿ›„ ๐Ÿ ๐ŸŒ€ ๐Ÿ—ฃ ๐Ÿ“ฌ ๐Ÿ‚ ๐ŸŒฒ ๐Ÿ‘ฐ ๐Ÿ†‘ ๐Ÿš† ๐ŸŽจ +๐Ÿš” ๐Ÿฝ ๐ŸŽข ๐Ÿป โช ๐ŸŒ ๐Ÿ“ฌ โญ๏ธ ๐Ÿ… ๐Ÿš ๐Ÿ›Ž ๐Ÿข ๐Ÿ˜† โžฟ ๐Ÿ–– โ˜ฏ ๐Ÿค” ๐Ÿ•ฏ ๐ŸŽง ๐Ÿณ ๐Ÿ•ท ๐ŸŒ… ๐Ÿ“จ ๐Ÿ˜  +๐Ÿ’ท ๐Ÿ˜ฟ ๐Ÿ ๐ŸŽฟ ๐ŸŽ ๐Ÿ‘น ๐Ÿฅ ๐ŸŽณ ๐Ÿ’ญ ๐Ÿšฐ ๐Ÿ’บ ๐Ÿ• ๐Ÿšผ ๐Ÿง โšฝ๏ธ ๐Ÿšซ ๐Ÿ•Š ๐Ÿซ 7๏ธโƒฃ ๐Ÿ’จ ๐ŸŒ” ๐Ÿ•’ +๐Ÿ“• ๐Ÿ’Œ ๐Ÿž ๐Ÿ’ž ๐Ÿš• โ™ฟ๏ธ ๐Ÿ”ฝ ๐Ÿ˜™ ๐Ÿ…ฑ๏ธ ๐Ÿ”‘ ๐Ÿ—ฝ ๐ŸŒง ๐Ÿ˜… + +Well, we have to put these emojis just to make sure things are working well diff --git a/i18n/tests/iconv/test_data_utf32le b/i18n/tests/iconv/test_data_utf32le new file mode 100644 index 00000000..534f0d3c Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32le differ diff --git a/i18n/tests/iconv/test_data_utf32le_to_ascii_with_c_flag b/i18n/tests/iconv/test_data_utf32le_to_ascii_with_c_flag new file mode 100644 index 00000000..3c918573 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf32le_to_ascii_with_c_flag @@ -0,0 +1,8 @@ +hello from the posixutils everyone +this is was originally written in UTF16LE + +there are lot of emojis here + + + + diff --git a/i18n/tests/iconv/test_data_utf32le_to_utf16be_without_c_flag b/i18n/tests/iconv/test_data_utf32le_to_utf16be_without_c_flag new file mode 100644 index 00000000..4ac5dfb1 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32le_to_utf16be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32le_to_utf16le_without_c_flag b/i18n/tests/iconv/test_data_utf32le_to_utf16le_without_c_flag new file mode 100644 index 00000000..71833697 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32le_to_utf16le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32le_to_utf32be_without_c_flag b/i18n/tests/iconv/test_data_utf32le_to_utf32be_without_c_flag new file mode 100644 index 00000000..f5239438 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf32le_to_utf32be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf32le_to_utf8_without_c_flag b/i18n/tests/iconv/test_data_utf32le_to_utf8_without_c_flag new file mode 100644 index 00000000..fb4f24b6 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf32le_to_utf8_without_c_flag @@ -0,0 +1,8 @@ +hello from the posixutils everyone +this is was originally written in UTF16LE + +there are lot of emojis here +๐Ÿ’‡ ๐Ÿ‘ป ๐Ÿ”‚ ๐Ÿ—“ ๐Ÿ—ป ๐Ÿšด ๐ŸŽ‰ ๐Ÿ“ ๐Ÿ˜ฎ โฌ ๐ŸŒ ๐Ÿšญ ๐Ÿ˜ฏ ๐Ÿ™ ๐Ÿด ๐Ÿ™ ๐Ÿ’„ ๐Ÿš ๐Ÿ’ถ ๐Ÿ˜€ ๐ŸŠ โšก๏ธ ๐Ÿ“‹ โ—ฝ๏ธ โ†—๏ธ โบ ๐Ÿ—ฟ ๐Ÿค ๐Ÿข ๐Ÿก ๐Ÿ˜ˆ ๐Ÿš โธ ๐ŸŽ› ๐Ÿˆน +โ›ต๏ธ ๐Ÿธ ๐Ÿ” โบ ๐Ÿ‘ฝ ๐Ÿ‘ธ ๐Ÿบ ๐Ÿ”ฎ ๐Ÿฝ ๐Ÿš† ๐Ÿšฐ ๐Ÿ”ฉ ๐Ÿ™Ž ๐Ÿ˜™ ๐ŸŽ— ๐Ÿ“ธ ๐Ÿ‘€ ๐Ÿšซ ๐Ÿ™€ ๐ŸŒธ โ˜  ๐Ÿช ๐Ÿ˜œ โฑ ๐Ÿ’Œ ๐Ÿ˜ต ๐Ÿ‘น ๐Ÿ˜ˆ ๐Ÿ›Ž โบ ๐ŸŒ โ™Œ๏ธ โ›ˆ ๐Ÿค– ๐Ÿ“ง ๐Ÿ“„ ๐Ÿ”ฒ +โ›“ โ›ณ๏ธ ๐Ÿ“ผ ๐Ÿ˜ ๐ŸŒณ ๐Ÿš โ˜ฏ ๐Ÿ’Ž ๐Ÿƒ ๐Ÿข ๐Ÿ” ๐Ÿ˜œ ๐Ÿ„ ๐Ÿ˜Œ โฐ โ—ฝ๏ธ ๐Ÿ˜ญ ๐Ÿ’˜ โ˜€๏ธ โœ”๏ธ ๐Ÿ“ซ ๐Ÿšฝ ๐Ÿ–จ โ†ช๏ธ ๐Ÿ›‹ โœˆ๏ธ ๐Ÿ’… ๐Ÿ“Š โ‰๏ธ ๐Ÿ”ป ๐Ÿ’ท ๐ŸŽ โšพ๏ธ โ†˜๏ธ ๐ŸŽ† ๐ŸŒฟ ๐Ÿš€ ๐ŸŽฉ +โ˜บ๏ธ ๐Ÿ† ๐ŸŒฒ ๐ŸŒฑ ๐ŸŠ ๐Ÿ‘ฏ โ—พ๏ธ ๐Ÿ’† ๐Ÿ•ค ๐Ÿ’Š diff --git a/i18n/tests/iconv/test_data_utf8 b/i18n/tests/iconv/test_data_utf8 new file mode 100644 index 00000000..457e42ce --- /dev/null +++ b/i18n/tests/iconv/test_data_utf8 @@ -0,0 +1 @@ +test 1๐Ÿ˜›๐Ÿ˜text 2๐Ÿ˜œ๐Ÿคช๐Ÿคจ๐Ÿง๐Ÿค“๐Ÿ˜Ž๐Ÿฅธ๐Ÿคฉ๐Ÿฅณtext 3๐Ÿฅธ diff --git a/i18n/tests/iconv/test_data_utf8_to_ascii_with_c_flag b/i18n/tests/iconv/test_data_utf8_to_ascii_with_c_flag new file mode 100644 index 00000000..d40b14c7 --- /dev/null +++ b/i18n/tests/iconv/test_data_utf8_to_ascii_with_c_flag @@ -0,0 +1 @@ +test 1text 2text 3 diff --git a/i18n/tests/iconv/test_data_utf8_to_utf16be_without_c_flag b/i18n/tests/iconv/test_data_utf8_to_utf16be_without_c_flag new file mode 100644 index 00000000..1ac9ebe7 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf8_to_utf16be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf8_to_utf16le_without_c_flag b/i18n/tests/iconv/test_data_utf8_to_utf16le_without_c_flag new file mode 100644 index 00000000..9f508d57 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf8_to_utf16le_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf8_to_utf32be_without_c_flag b/i18n/tests/iconv/test_data_utf8_to_utf32be_without_c_flag new file mode 100644 index 00000000..4163b3d8 Binary files /dev/null and b/i18n/tests/iconv/test_data_utf8_to_utf32be_without_c_flag differ diff --git a/i18n/tests/iconv/test_data_utf8_to_utf32le_without_c_flag b/i18n/tests/iconv/test_data_utf8_to_utf32le_without_c_flag new file mode 100644 index 00000000..3de2b5eb Binary files /dev/null and b/i18n/tests/iconv/test_data_utf8_to_utf32le_without_c_flag differ