Skip to content

Commit

Permalink
[framework] Implements ascii + utf8 strings all over again (#18462)
Browse files Browse the repository at this point in the history
## Description 

This PR rolls #17380 (successfully merged once) yet again. 

## Test plan 

Features tests.

## Release notes

Check each box that your changes affect. If none of the boxes relate to
your changes, release notes aren't required.

For each box you select, include information after the relevant heading
that describes the impact of your changes that a user might notice and
any actions they must take to implement updates.

- [x] Protocol: This change updates the [ascii
module](https://docs.sui.io/references/framework/move-stdlib/ascii) in
the following ways:
Adds new methods to `std::ascii`:
- `ascii::append(&mut String, String)`
- `ascii::is_empty(): bool`
- `ascii::substring(&String, i, j): String`
- `ascii::index_of(&String, &String): u64`
- `ascii::to_uppercase(&String): String`
- `ascii::to_lowercase(&String): String`

These additions make the ASCII interface more similar to the UTF8 one.

Renames:
- `string::bytes() to string::as_bytes()`
- `string::sub_string() to string::substring()`

Deprecates:
- `string::sub_string` in favour of `string::substring`
- `string::bytes` in favour of `string::as_bytes`

Additional changes:
- updates `std::type_name` to use `std::substring`
- removes use statements for implicit imports
- renames constants from `E_INDEX` to conventional `EIndexOutOfBounds`
  • Loading branch information
damirka authored Jun 29, 2024
1 parent adf395f commit f6f2584
Show file tree
Hide file tree
Showing 22 changed files with 758 additions and 195 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ A: object(0,0)
task 1 'publish'. lines 8-29:
created: object(1,0)
mutated: object(0,1)
gas summary: computation_cost: 1000000, storage_cost: 6049600, storage_rebate: 0, non_refundable_storage_fee: 0
gas summary: computation_cost: 1000000, storage_cost: 6072400, storage_rebate: 0, non_refundable_storage_fee: 0

task 2 'programmable'. lines 31-34:
Error: Transaction Effects Status: Invalid command argument at 0. The argument cannot be deserialized into a value of the specified type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module test::m1 {
while (!vector::is_empty(&v)) {
let opt = vector::pop_back(&mut v);
if (option::is_some(&opt)) {
string::utf8(*string::bytes(&option::destroy_some(opt)));
string::utf8(*string::as_bytes(&option::destroy_some(opt)));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ A: object(0,0)
task 1 'publish'. lines 6-49:
created: object(1,0)
mutated: object(0,1)
gas summary: computation_cost: 1000000, storage_cost: 8709600, storage_rebate: 0, non_refundable_storage_fee: 0
gas summary: computation_cost: 1000000, storage_cost: 8732400, storage_rebate: 0, non_refundable_storage_fee: 0

task 2 'programmable'. lines 51-57:
mutated: object(0,1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module test::m1 {

public entry fun vec_string(mut v: vector<String>) {
while (!vector::is_empty(&v)) {
string::utf8(*string::bytes(&vector::pop_back(&mut v)));
string::utf8(*string::as_bytes(&vector::pop_back(&mut v)));
}
}

Expand All @@ -29,7 +29,7 @@ module test::m1 {
while (!vector::is_empty(&v)) {
let opt = vector::pop_back(&mut v);
if (option::is_some(&opt)) {
string::utf8(*string::bytes(&option::destroy_some(opt)));
string::utf8(*string::as_bytes(&option::destroy_some(opt)));
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions crates/sui-axelar-cgp/move/sources/validators.move
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ module axelar::validators {
let mut data = vector[];
vector::append(&mut data, address::to_bytes(cmd_id));
vector::append(&mut data, address::to_bytes(target_id));
vector::append(&mut data, *string::bytes(&source_chain));
vector::append(&mut data, *string::bytes(&source_address));
vector::append(&mut data, *string::as_bytes(&source_chain));
vector::append(&mut data, *string::as_bytes(&source_address));
vector::append(&mut data, payload_hash);

table::add(&mut axelar.approvals, cmd_id, Approval {
Expand Down Expand Up @@ -205,8 +205,8 @@ module axelar::validators {
let mut data = vector[];
vector::append(&mut data, address::to_bytes(cmd_id));
vector::append(&mut data, address::to_bytes(target_id));
vector::append(&mut data, *string::bytes(&source_chain));
vector::append(&mut data, *string::bytes(&source_address));
vector::append(&mut data, *string::as_bytes(&source_chain));
vector::append(&mut data, *string::as_bytes(&source_address));
vector::append(&mut data, hash::keccak256(&payload));

assert!(hash::keccak256(&data) == approval_hash, EPayloadHashMismatch);
Expand Down
323 changes: 290 additions & 33 deletions crates/sui-framework/docs/move-stdlib/ascii.md

Large diffs are not rendered by default.

155 changes: 119 additions & 36 deletions crates/sui-framework/docs/move-stdlib/string.md

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions crates/sui-framework/docs/move-stdlib/type_name.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,15 @@ u8, u16, u32, u64, u128, u256, bool, address, vector.
bytes == &b"u128" ||
bytes == &b"u256" ||
bytes == &b"<b>address</b>" ||
(bytes.length() &gt;= 6 &&
bytes[0] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_V">ASCII_V</a> &&
bytes[1] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_E">ASCII_E</a> &&
bytes[2] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_C">ASCII_C</a> &&
bytes[3] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_T">ASCII_T</a> &&
bytes[4] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_O">ASCII_O</a> &&
bytes[5] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_R">ASCII_R</a>)

(
bytes.length() &gt;= 6 &&
bytes[0] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_V">ASCII_V</a> &&
bytes[1] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_E">ASCII_E</a> &&
bytes[2] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_C">ASCII_C</a> &&
bytes[3] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_T">ASCII_T</a> &&
bytes[4] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_O">ASCII_O</a> &&
bytes[5] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_R">ASCII_R</a>,
)
}
</code></pre>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ Sender is not @0x0 the system address.


<pre><code><b>fun</b> <a href="../sui-framework/authenticator_state.md#0x2_authenticator_state_string_bytes_lt">string_bytes_lt</a>(a: &String, b: &String): bool {
<b>let</b> a_bytes = a.bytes();
<b>let</b> b_bytes = b.bytes();
<b>let</b> a_bytes = a.as_bytes();
<b>let</b> b_bytes = b.as_bytes();

<b>if</b> (a_bytes.length() &lt; b_bytes.length()) {
<b>true</b>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Returns true if <code><b>address</b></code> was created using zklogin with the g
address_seed: u256,
issuer: &String,
): bool {
<a href="zklogin_verified_issuer.md#0x2_zklogin_verified_issuer_check_zklogin_issuer_internal">check_zklogin_issuer_internal</a>(<b>address</b>, address_seed, issuer.bytes())
<a href="zklogin_verified_issuer.md#0x2_zklogin_verified_issuer_check_zklogin_issuer_internal">check_zklogin_issuer_internal</a>(<b>address</b>, address_seed, issuer.as_bytes())
}
</code></pre>

Expand Down
124 changes: 91 additions & 33 deletions crates/sui-framework/packages/move-stdlib/sources/ascii.move
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
/// The `ASCII` module defines basic string and char newtypes in Move that verify
/// that characters are valid ASCII, and that strings consist of only valid ASCII characters.
module std::ascii {

// Allows calling `.to_string()` to convert an `ascii::String` into as `string::String`
public use fun std::string::from_ascii as String.to_string;

/// An invalid ASCII character was encountered when creating an ASCII string.
const EINVALID_ASCII_CHARACTER: u64 = 0x10000;
const EInvalidASCIICharacter: u64 = 0x10000;
/// An invalid index was encountered when creating a substring.
const EInvalidIndex: u64 = 0x10001;

/// The `String` struct holds a vector of bytes that all represent
/// valid ASCII characters. Note that these ASCII characters may not all
Expand All @@ -27,82 +28,139 @@ module std::ascii {

/// Convert a `byte` into a `Char` that is checked to make sure it is valid ASCII.
public fun char(byte: u8): Char {
assert!(is_valid_char(byte), EINVALID_ASCII_CHARACTER);
assert!(is_valid_char(byte), EInvalidASCIICharacter);
Char { byte }
}

/// Convert a vector of bytes `bytes` into an `String`. Aborts if
/// `bytes` contains non-ASCII characters.
public fun string(bytes: vector<u8>): String {
let x = try_string(bytes);
assert!(x.is_some(), EINVALID_ASCII_CHARACTER);
x.destroy_some()
let x = try_string(bytes);
assert!(x.is_some(), EInvalidASCIICharacter);
x.destroy_some()
}

/// Convert a vector of bytes `bytes` into an `String`. Returns
/// `Some(<ascii_string>)` if the `bytes` contains all valid ASCII
/// characters. Otherwise returns `None`.
public fun try_string(bytes: vector<u8>): Option<String> {
let len = bytes.length();
let mut i = 0;
while (i < len) {
let possible_byte = bytes[i];
if (!is_valid_char(possible_byte)) return option::none();
i = i + 1;
};
option::some(String { bytes })
let is_valid = bytes.all!(|byte| is_valid_char(*byte));
if (is_valid) option::some(String { bytes })
else option::none()
}

/// Returns `true` if all characters in `string` are printable characters
/// Returns `false` otherwise. Not all `String`s are printable strings.
public fun all_characters_printable(string: &String): bool {
let len = string.bytes.length();
let mut i = 0;
while (i < len) {
let byte = string.bytes[i];
if (!is_printable_char(byte)) return false;
i = i + 1;
};
true
string.bytes.all!(|byte| is_printable_char(*byte))
}

/// Push a `Char` to the end of the `string`.
public fun push_char(string: &mut String, char: Char) {
string.bytes.push_back(char.byte);
}

/// Pop a `Char` from the end of the `string`.
public fun pop_char(string: &mut String): Char {
Char { byte: string.bytes.pop_back() }
}

/// Returns the length of the `string` in bytes.
public fun length(string: &String): u64 {
string.as_bytes().length()
}

/// Append the `other` string to the end of `string`.
public fun append(string: &mut String, other: String) {
string.bytes.append(other.into_bytes())
}

/// Insert the `other` string at the `at` index of `string`.
public fun insert(s: &mut String, at: u64, o: String) {
assert!(at <= s.length(), EInvalidIndex);
o.into_bytes().destroy!(|e| s.bytes.insert(e, at));
}

/// Copy the slice of the `string` from `i` to `j` into a new `String`.
public fun substring(string: &String, i: u64, j: u64): String {
assert!(i <= j && j <= string.length(), EInvalidIndex);
let mut bytes = vector[];
i.range_do!(j, |i| bytes.push_back(string.bytes[i]));
String { bytes }
}

/// Get the inner bytes of the `string` as a reference
public fun as_bytes(string: &String): &vector<u8> {
&string.bytes
&string.bytes
}

/// Unpack the `string` to get its backing bytes
public fun into_bytes(string: String): vector<u8> {
let String { bytes } = string;
bytes
let String { bytes } = string;
bytes
}

/// Unpack the `char` into its underlying byte.
/// Unpack the `char` into its underlying bytes.
public fun byte(char: Char): u8 {
let Char { byte } = char;
byte
let Char { byte } = char;
byte
}

/// Returns `true` if `b` is a valid ASCII character. Returns `false` otherwise.
/// Returns `true` if `b` is a valid ASCII character.
/// Returns `false` otherwise.
public fun is_valid_char(b: u8): bool {
b <= 0x7F
b <= 0x7F
}

/// Returns `true` if `byte` is an printable ASCII character. Returns `false` otherwise.
/// Returns `true` if `byte` is an printable ASCII character.
/// Returns `false` otherwise.
public fun is_printable_char(byte: u8): bool {
byte >= 0x20 && // Disallow metacharacters
byte <= 0x7E // Don't allow DEL metacharacter
byte >= 0x20 && // Disallow metacharacters
byte <= 0x7E // Don't allow DEL metacharacter
}

/// Returns `true` if `string` is empty.
public fun is_empty(string: &String): bool {
string.bytes.is_empty()
}

/// Convert a `string` to its uppercase equivalent.
public fun to_uppercase(string: &String): String {
let bytes = string.as_bytes().map_ref!(|byte| char_to_uppercase(*byte));
String { bytes }
}

/// Convert a `string` to its lowercase equivalent.
public fun to_lowercase(string: &String): String {
let bytes = string.as_bytes().map_ref!(|byte| char_to_lowercase(*byte));
String { bytes }
}

/// Computes the index of the first occurrence of the `substr` in the `string`.
/// Returns the length of the `string` if the `substr` is not found.
/// Returns 0 if the `substr` is empty.
public fun index_of(string: &String, substr: &String): u64 {
let mut i = 0;
let (n, m) = (string.length(), substr.length());
if (n < m) return n;
while (i <= n - m) {
let mut j = 0;
while (j < m && string.bytes[i + j] == substr.bytes[j]) j = j + 1;
if (j == m) return i;
i = i + 1;
};
n
}

/// Convert a `char` to its lowercase equivalent.
fun char_to_uppercase(byte: u8): u8 {
if (byte >= 0x61 && byte <= 0x7A) byte - 0x20
else byte
}

/// Convert a `char` to its lowercase equivalent.
fun char_to_lowercase(byte: u8): u8 {
if (byte >= 0x41 && byte <= 0x5A) byte + 0x20
else byte
}
}
Loading

0 comments on commit f6f2584

Please sign in to comment.