Skip to content

Commit

Permalink
[char_property] Implement *CharProperty traits
Browse files Browse the repository at this point in the history
Character Properties are of different kinds and shapes, and as UNIC
components grow, we need a better way to be able to categorize them by
their shape, and a way to make sure we have consistent, noncolliding
API for them.

This is the first step into building a CharProperty taxonomy, with as
little as possibly needed to provide the assurances desired.

We hope that the implementation can be improved over time with new
features added to the language. There's already some proposals in this
front. See these discussions for more details:

* [Traits as contract, without changes to call-sites](https://users.rust-lang.org/t/traits-as-contract-without-changes-to-call-sites/11938/11>)

* [RFC: delegation of implementation](rust-lang/rfcs#1406)
  • Loading branch information
behnam committed Aug 9, 2017
1 parent ecd35d0 commit 6888740
Show file tree
Hide file tree
Showing 15 changed files with 266 additions and 16 deletions.
7 changes: 7 additions & 0 deletions unic/ucd/age/src/age.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use std::fmt;
use unic_utils::CharDataTable;

pub use unic_ucd_core::UnicodeVersion;
use unic_utils::CharProperty;


/// Represents values of the Unicode character property
Expand Down Expand Up @@ -41,6 +42,12 @@ pub enum Age {
Unassigned, // Unassigned is older (larger) than any age
}

impl CharProperty for Age {
fn of(ch: char) -> Self {
Self::of(ch)
}
}

use Age::{Assigned, Unassigned};

pub const AGE_TABLE: &'static [(char, char, Age)] = include!("tables/age_values.rsv");
Expand Down
2 changes: 1 addition & 1 deletion unic/ucd/age/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// except according to those terms.


#![forbid(unsafe_code)]
#![forbid(unsafe_code, unconditional_recursion)]
#![deny(missing_docs)]

//! # UNIC — UCD — Character Age
Expand Down
55 changes: 54 additions & 1 deletion unic/ucd/bidi/src/bidi_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.


use std::fmt;

use unic_utils::CharDataTable;
use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty};


/// Represents the Unicode character
/// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property, also known as the
Expand Down Expand Up @@ -48,6 +50,18 @@ pub enum BidiClass {
// [UNIC_UPDATE_ON_UNICODE_UPDATE] Source: `tables/bidi_class_type.rsv`
}

impl CharProperty for BidiClass {
fn of(ch: char) -> Self {
Self::of(ch)
}
}

impl EnumeratedCharProperty for BidiClass {
fn all_values() -> &'static [Self] {
Self::all_values()
}
}


/// Abbreviated name aliases for
/// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property.
Expand Down Expand Up @@ -114,6 +128,37 @@ impl BidiClass {
*BIDI_CLASS_TABLE.find_or(ch, &L)
}

/// Exhaustive list of all `BidiClass` property values.
pub fn all_values() -> &'static [BidiClass] {
use BidiClass::*;
const ALL_VALUES: &[BidiClass] = &[
ArabicLetter,
ArabicNumber,
ParagraphSeparator,
BoundaryNeutral,
CommonSeparator,
EuropeanNumber,
EuropeanSeparator,
EuropeanTerminator,
FirstStrongIsolate,
LeftToRight,
LeftToRightEmbedding,
LeftToRightIsolate,
LeftToRightOverride,
NonspacingMark,
OtherNeutral,
PopDirectionalFormat,
PopDirectionalIsolate,
RightToLeft,
RightToLeftEmbedding,
RightToLeftIsolate,
RightToLeftOverride,
SegmentSeparator,
WhiteSpace,
];
ALL_VALUES
}

/// Abbreviated name of the *Bidi_Class* property value.
///
/// <http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt#Bidi_Class>
Expand Down Expand Up @@ -216,6 +261,14 @@ impl BidiClass {
}
}


impl Default for BidiClass {
fn default() -> Self {
BidiClass::LeftToRight
}
}


impl fmt::Display for BidiClass {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.display())
Expand Down
2 changes: 1 addition & 1 deletion unic/ucd/bidi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// except according to those terms.


#![forbid(unsafe_code)]
#![forbid(unsafe_code, unconditional_recursion)]
#![deny(missing_docs)]

//! # UNIC — UCD — Bidi
Expand Down
2 changes: 1 addition & 1 deletion unic/ucd/category/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ exclude = []
travis-ci = { repository = "behnam/rust-unic", branch = "master" }

[dependencies]
matches = "0.1"
unic-ucd-core = { path = "../core/", version = "0.5.0" }
unic-utils = { path = "../../utils/", version = "0.5.0" }
matches = "0.1"
55 changes: 51 additions & 4 deletions unic/ucd/category/src/category.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use unic_utils::CharDataTable;

use std::fmt;

use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty};


/// Represents the Unicode Character
/// [*General_Category*](http://unicode.org/reports/tr44/#General_Category) property.
///
/// This is a useful breakdown into various character types which can be used as a default
/// categorization in implementations. For the property values, see
/// [*General_Category Values*](http://unicode.org/reports/tr44/#General_Category_Values).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum GeneralCategory {
/// An uppercase letter (Short form: `Lu`)
UppercaseLetter,
Expand Down Expand Up @@ -80,6 +84,21 @@ pub enum GeneralCategory {
Unassigned,
}


impl CharProperty for GeneralCategory {
fn of(ch: char) -> Self {
Self::of(ch)
}
}


impl EnumeratedCharProperty for GeneralCategory {
fn all_values() -> &'static [Self] {
Self::all_values()
}
}


pub mod abbr_names {
pub use super::GeneralCategory::UppercaseLetter as Lu;
pub use super::GeneralCategory::LowercaseLetter as Ll;
Expand Down Expand Up @@ -124,8 +143,6 @@ impl GeneralCategory {
}

/// Exhaustive list of all `GeneralCategory` property values.
///
/// Reference: <http://unicode.org/reports/tr44/#General_Category_Values>
pub fn all_values() -> &'static [GeneralCategory] {
use GeneralCategory::*;
const ALL_VALUES: &[GeneralCategory] = &[
Expand Down Expand Up @@ -162,8 +179,16 @@ impl GeneralCategory {
];
ALL_VALUES
}

/// Human-readable description of the property value.
// TODO: Needs to be improved by returning long-name with underscores replaced by space.
#[inline]
pub fn display(&self) -> String {
format!("{:?}", self).to_owned()
}
}


impl GeneralCategory {
/// `Lu` | `Ll` | `Lt` (Short form: `LC`)
pub fn is_cased_letter(&self) -> bool {
Expand Down Expand Up @@ -206,6 +231,21 @@ impl GeneralCategory {
}
}


impl Default for GeneralCategory {
fn default() -> Self {
GeneralCategory::Unassigned
}
}


impl fmt::Display for GeneralCategory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.display())
}
}


#[cfg(test)]
mod tests {
use super::GeneralCategory as GC;
Expand Down Expand Up @@ -304,4 +344,11 @@ mod tests {
assert_eq!(GC::of(c), GC::Unassigned);
}
}

#[test]
fn test_display() {
//assert_eq!(format!("{}", GC::UppercaseLetter), "Uppercase Letter");
assert_eq!(format!("{}", GC::UppercaseLetter), "UppercaseLetter");
assert_eq!(format!("{}", GC::Unassigned), "Unassigned");
}
}
3 changes: 2 additions & 1 deletion unic/ucd/category/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![deny(unsafe_code, missing_docs)]
#![deny(unsafe_code, missing_docs, unconditional_recursion)]

//! # UNIC — UCD — Category
//!
Expand Down Expand Up @@ -38,6 +38,7 @@

#[macro_use]
extern crate matches;

extern crate unic_ucd_core;
extern crate unic_utils;

Expand Down
2 changes: 1 addition & 1 deletion unic/ucd/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// except according to those terms.


#![forbid(unsafe_code, missing_docs)]
#![forbid(unsafe_code, missing_docs, unconditional_recursion)]

//! # UNIC — UCD — Core
//!
Expand Down
31 changes: 29 additions & 2 deletions unic/ucd/normal/src/canonical_combining_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
//! Reference: <http://unicode.org/reports/tr44/#Canonical_Combining_Class_Values>


use unic_utils::CharDataTable;
use std::fmt;

use unic_utils::{CharDataTable, CharProperty};


/// Represents *Canonical_Combining_Class* property of a Unicode character.
Expand Down Expand Up @@ -82,15 +84,34 @@ pub mod values {
}


impl CharProperty for CanonicalCombiningClass {
fn of(ch: char) -> Self {
Self::of(ch)
}
}


const CANONICAL_COMBINING_CLASS_VALUES: &'static [(char, char, CanonicalCombiningClass)] =
include!("tables/canonical_combining_class_values.rsv");


impl CanonicalCombiningClass {
/// Find the character *Canonical_Combining_Class* property value.
pub fn of(ch: char) -> CanonicalCombiningClass {
*CANONICAL_COMBINING_CLASS_VALUES.find_or(ch, &CanonicalCombiningClass(0))
}

/// Human-readable description of the property value.
// TODO: Needs to be improved by returning long-name with underscores replaced by space.
#[inline]
pub fn display(&self) -> String {
format!("{}", self.number())
}
}

impl fmt::Display for CanonicalCombiningClass {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.display())
}
}


Expand Down Expand Up @@ -226,4 +247,10 @@ mod tests {
assert_eq!(CCC::of('\u{0315}').number(), 232);
assert_eq!(CCC::of('\u{1e94a}').number(), 7);
}

#[test]
fn test_display() {
assert_eq!(format!("{}", CCC::of('\u{0000}')), "0");
assert_eq!(format!("{}", CCC::of('\u{0300}')), "230");
}
}
Loading

0 comments on commit 6888740

Please sign in to comment.