-
Notifications
You must be signed in to change notification settings - Fork 11.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[framework] Implements ascii + utf8 strings all over again #18462
Changes from all commits
ad44730
0409e94
221836b
79738b7
0f2a361
14c5d73
1cb32e8
186496a
6531c7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,13 @@ | |
/// The `ASCII` module defines basic string and char newtypes in Move that verify | ||
/// that characters are valid ASCII, and that strings consist of only valid ASCII characters. | ||
module std::ascii { | ||
|
||
// Allows calling `.to_string()` to convert an `ascii::String` into as `string::String` | ||
public use fun std::string::from_ascii as String.to_string; | ||
|
||
/// An invalid ASCII character was encountered when creating an ASCII string. | ||
const EINVALID_ASCII_CHARACTER: u64 = 0x10000; | ||
const EInvalidASCIICharacter: u64 = 0x10000; | ||
/// An invalid index was encountered when creating a substring. | ||
const EInvalidIndex: u64 = 0x10001; | ||
|
||
/// The `String` struct holds a vector of bytes that all represent | ||
/// valid ASCII characters. Note that these ASCII characters may not all | ||
|
@@ -27,82 +28,139 @@ module std::ascii { | |
|
||
/// Convert a `byte` into a `Char` that is checked to make sure it is valid ASCII. | ||
public fun char(byte: u8): Char { | ||
assert!(is_valid_char(byte), EINVALID_ASCII_CHARACTER); | ||
assert!(is_valid_char(byte), EInvalidASCIICharacter); | ||
Char { byte } | ||
} | ||
|
||
/// Convert a vector of bytes `bytes` into an `String`. Aborts if | ||
/// `bytes` contains non-ASCII characters. | ||
public fun string(bytes: vector<u8>): String { | ||
let x = try_string(bytes); | ||
assert!(x.is_some(), EINVALID_ASCII_CHARACTER); | ||
x.destroy_some() | ||
let x = try_string(bytes); | ||
assert!(x.is_some(), EInvalidASCIICharacter); | ||
x.destroy_some() | ||
} | ||
|
||
/// Convert a vector of bytes `bytes` into an `String`. Returns | ||
/// `Some(<ascii_string>)` if the `bytes` contains all valid ASCII | ||
/// characters. Otherwise returns `None`. | ||
public fun try_string(bytes: vector<u8>): Option<String> { | ||
let len = bytes.length(); | ||
let mut i = 0; | ||
while (i < len) { | ||
let possible_byte = bytes[i]; | ||
if (!is_valid_char(possible_byte)) return option::none(); | ||
i = i + 1; | ||
}; | ||
option::some(String { bytes }) | ||
let is_valid = bytes.all!(|byte| is_valid_char(*byte)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔥 |
||
if (is_valid) option::some(String { bytes }) | ||
else option::none() | ||
} | ||
|
||
/// Returns `true` if all characters in `string` are printable characters | ||
/// Returns `false` otherwise. Not all `String`s are printable strings. | ||
public fun all_characters_printable(string: &String): bool { | ||
let len = string.bytes.length(); | ||
let mut i = 0; | ||
while (i < len) { | ||
let byte = string.bytes[i]; | ||
if (!is_printable_char(byte)) return false; | ||
i = i + 1; | ||
}; | ||
true | ||
string.bytes.all!(|byte| is_printable_char(*byte)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔥 🔥 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
|
||
/// Push a `Char` to the end of the `string`. | ||
public fun push_char(string: &mut String, char: Char) { | ||
string.bytes.push_back(char.byte); | ||
} | ||
|
||
/// Pop a `Char` from the end of the `string`. | ||
public fun pop_char(string: &mut String): Char { | ||
Char { byte: string.bytes.pop_back() } | ||
} | ||
|
||
/// Returns the length of the `string` in bytes. | ||
public fun length(string: &String): u64 { | ||
string.as_bytes().length() | ||
} | ||
|
||
/// Append the `other` string to the end of `string`. | ||
public fun append(string: &mut String, other: String) { | ||
string.bytes.append(other.into_bytes()) | ||
} | ||
|
||
/// Insert the `other` string at the `at` index of `string`. | ||
public fun insert(s: &mut String, at: u64, o: String) { | ||
assert!(at <= s.length(), EInvalidIndex); | ||
o.into_bytes().destroy!(|e| s.bytes.insert(e, at)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔥 🔥 🔥 |
||
} | ||
|
||
/// Copy the slice of the `string` from `i` to `j` into a new `String`. | ||
public fun substring(string: &String, i: u64, j: u64): String { | ||
assert!(i <= j && j <= string.length(), EInvalidIndex); | ||
let mut bytes = vector[]; | ||
i.range_do!(j, |i| bytes.push_back(string.bytes[i])); | ||
Comment on lines
+87
to
+88
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might prefer |
||
String { bytes } | ||
} | ||
|
||
/// Get the inner bytes of the `string` as a reference | ||
public fun as_bytes(string: &String): &vector<u8> { | ||
&string.bytes | ||
&string.bytes | ||
} | ||
|
||
/// Unpack the `string` to get its backing bytes | ||
public fun into_bytes(string: String): vector<u8> { | ||
let String { bytes } = string; | ||
bytes | ||
let String { bytes } = string; | ||
bytes | ||
} | ||
|
||
/// Unpack the `char` into its underlying byte. | ||
/// Unpack the `char` into its underlying bytes. | ||
public fun byte(char: Char): u8 { | ||
let Char { byte } = char; | ||
byte | ||
let Char { byte } = char; | ||
byte | ||
} | ||
|
||
/// Returns `true` if `b` is a valid ASCII character. Returns `false` otherwise. | ||
/// Returns `true` if `b` is a valid ASCII character. | ||
/// Returns `false` otherwise. | ||
public fun is_valid_char(b: u8): bool { | ||
b <= 0x7F | ||
b <= 0x7F | ||
} | ||
|
||
/// Returns `true` if `byte` is an printable ASCII character. Returns `false` otherwise. | ||
/// Returns `true` if `byte` is an printable ASCII character. | ||
/// Returns `false` otherwise. | ||
public fun is_printable_char(byte: u8): bool { | ||
byte >= 0x20 && // Disallow metacharacters | ||
byte <= 0x7E // Don't allow DEL metacharacter | ||
byte >= 0x20 && // Disallow metacharacters | ||
byte <= 0x7E // Don't allow DEL metacharacter | ||
} | ||
|
||
/// Returns `true` if `string` is empty. | ||
public fun is_empty(string: &String): bool { | ||
string.bytes.is_empty() | ||
} | ||
|
||
/// Convert a `string` to its uppercase equivalent. | ||
public fun to_uppercase(string: &String): String { | ||
let bytes = string.as_bytes().map_ref!(|byte| char_to_uppercase(*byte)); | ||
String { bytes } | ||
} | ||
|
||
/// Convert a `string` to its lowercase equivalent. | ||
public fun to_lowercase(string: &String): String { | ||
let bytes = string.as_bytes().map_ref!(|byte| char_to_lowercase(*byte)); | ||
String { bytes } | ||
Comment on lines
+129
to
+136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔥 🔥 🔥 🔥 |
||
} | ||
|
||
/// Computes the index of the first occurrence of the `substr` in the `string`. | ||
/// Returns the length of the `string` if the `substr` is not found. | ||
/// Returns 0 if the `substr` is empty. | ||
Comment on lines
+140
to
+141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really hate this API, but I agree it is the right thing to do for consistency There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, not a fan. |
||
public fun index_of(string: &String, substr: &String): u64 { | ||
let mut i = 0; | ||
let (n, m) = (string.length(), substr.length()); | ||
if (n < m) return n; | ||
while (i <= n - m) { | ||
let mut j = 0; | ||
while (j < m && string.bytes[i + j] == substr.bytes[j]) j = j + 1; | ||
if (j == m) return i; | ||
i = i + 1; | ||
}; | ||
n | ||
} | ||
|
||
/// Convert a `char` to its lowercase equivalent. | ||
fun char_to_uppercase(byte: u8): u8 { | ||
if (byte >= 0x61 && byte <= 0x7A) byte - 0x20 | ||
else byte | ||
} | ||
|
||
/// Convert a `char` to its lowercase equivalent. | ||
fun char_to_lowercase(byte: u8): u8 { | ||
if (byte >= 0x41 && byte <= 0x5A) byte + 0x20 | ||
else byte | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ughhhh but fine this is the right thing to do
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Idk I think we should at some point switch all of the std/sui errors to using clever errors.
It will be a pain, and maybe break someone somewhere, but I feel like it is the right thing to do long term