Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[framework] Implements ascii + utf8 strings all over again #18462

Merged
merged 9 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ A: object(0,0)
task 1 'publish'. lines 8-29:
created: object(1,0)
mutated: object(0,1)
gas summary: computation_cost: 1000000, storage_cost: 6049600, storage_rebate: 0, non_refundable_storage_fee: 0
gas summary: computation_cost: 1000000, storage_cost: 6072400, storage_rebate: 0, non_refundable_storage_fee: 0

task 2 'programmable'. lines 31-34:
Error: Transaction Effects Status: Invalid command argument at 0. The argument cannot be deserialized into a value of the specified type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module test::m1 {
while (!vector::is_empty(&v)) {
let opt = vector::pop_back(&mut v);
if (option::is_some(&opt)) {
string::utf8(*string::bytes(&option::destroy_some(opt)));
string::utf8(*string::as_bytes(&option::destroy_some(opt)));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ A: object(0,0)
task 1 'publish'. lines 6-49:
created: object(1,0)
mutated: object(0,1)
gas summary: computation_cost: 1000000, storage_cost: 8709600, storage_rebate: 0, non_refundable_storage_fee: 0
gas summary: computation_cost: 1000000, storage_cost: 8732400, storage_rebate: 0, non_refundable_storage_fee: 0

task 2 'programmable'. lines 51-57:
mutated: object(0,1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module test::m1 {

public entry fun vec_string(mut v: vector<String>) {
while (!vector::is_empty(&v)) {
string::utf8(*string::bytes(&vector::pop_back(&mut v)));
string::utf8(*string::as_bytes(&vector::pop_back(&mut v)));
}
}

Expand All @@ -29,7 +29,7 @@ module test::m1 {
while (!vector::is_empty(&v)) {
let opt = vector::pop_back(&mut v);
if (option::is_some(&opt)) {
string::utf8(*string::bytes(&option::destroy_some(opt)));
string::utf8(*string::as_bytes(&option::destroy_some(opt)));
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions crates/sui-axelar-cgp/move/sources/validators.move
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ module axelar::validators {
let mut data = vector[];
vector::append(&mut data, address::to_bytes(cmd_id));
vector::append(&mut data, address::to_bytes(target_id));
vector::append(&mut data, *string::bytes(&source_chain));
vector::append(&mut data, *string::bytes(&source_address));
vector::append(&mut data, *string::as_bytes(&source_chain));
vector::append(&mut data, *string::as_bytes(&source_address));
vector::append(&mut data, payload_hash);

table::add(&mut axelar.approvals, cmd_id, Approval {
Expand Down Expand Up @@ -205,8 +205,8 @@ module axelar::validators {
let mut data = vector[];
vector::append(&mut data, address::to_bytes(cmd_id));
vector::append(&mut data, address::to_bytes(target_id));
vector::append(&mut data, *string::bytes(&source_chain));
vector::append(&mut data, *string::bytes(&source_address));
vector::append(&mut data, *string::as_bytes(&source_chain));
vector::append(&mut data, *string::as_bytes(&source_address));
vector::append(&mut data, hash::keccak256(&payload));

assert!(hash::keccak256(&data) == approval_hash, EPayloadHashMismatch);
Expand Down
323 changes: 290 additions & 33 deletions crates/sui-framework/docs/move-stdlib/ascii.md

Large diffs are not rendered by default.

155 changes: 119 additions & 36 deletions crates/sui-framework/docs/move-stdlib/string.md

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions crates/sui-framework/docs/move-stdlib/type_name.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,15 @@ u8, u16, u32, u64, u128, u256, bool, address, vector.
bytes == &b"u128" ||
bytes == &b"u256" ||
bytes == &b"<b>address</b>" ||
(bytes.length() &gt;= 6 &&
bytes[0] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_V">ASCII_V</a> &&
bytes[1] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_E">ASCII_E</a> &&
bytes[2] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_C">ASCII_C</a> &&
bytes[3] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_T">ASCII_T</a> &&
bytes[4] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_O">ASCII_O</a> &&
bytes[5] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_R">ASCII_R</a>)

(
bytes.length() &gt;= 6 &&
bytes[0] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_V">ASCII_V</a> &&
bytes[1] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_E">ASCII_E</a> &&
bytes[2] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_C">ASCII_C</a> &&
bytes[3] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_T">ASCII_T</a> &&
bytes[4] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_O">ASCII_O</a> &&
bytes[5] == <a href="../move-stdlib/type_name.md#0x1_type_name_ASCII_R">ASCII_R</a>,
)
}
</code></pre>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ Sender is not @0x0 the system address.


<pre><code><b>fun</b> <a href="../sui-framework/authenticator_state.md#0x2_authenticator_state_string_bytes_lt">string_bytes_lt</a>(a: &String, b: &String): bool {
<b>let</b> a_bytes = a.bytes();
<b>let</b> b_bytes = b.bytes();
<b>let</b> a_bytes = a.as_bytes();
<b>let</b> b_bytes = b.as_bytes();

<b>if</b> (a_bytes.length() &lt; b_bytes.length()) {
<b>true</b>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Returns true if <code><b>address</b></code> was created using zklogin with the g
address_seed: u256,
issuer: &String,
): bool {
<a href="zklogin_verified_issuer.md#0x2_zklogin_verified_issuer_check_zklogin_issuer_internal">check_zklogin_issuer_internal</a>(<b>address</b>, address_seed, issuer.bytes())
<a href="zklogin_verified_issuer.md#0x2_zklogin_verified_issuer_check_zklogin_issuer_internal">check_zklogin_issuer_internal</a>(<b>address</b>, address_seed, issuer.as_bytes())
}
</code></pre>

Expand Down
124 changes: 91 additions & 33 deletions crates/sui-framework/packages/move-stdlib/sources/ascii.move
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
/// The `ASCII` module defines basic string and char newtypes in Move that verify
/// that characters are valid ASCII, and that strings consist of only valid ASCII characters.
module std::ascii {

// Allows calling `.to_string()` to convert an `ascii::String` into as `string::String`
public use fun std::string::from_ascii as String.to_string;

/// An invalid ASCII character was encountered when creating an ASCII string.
const EINVALID_ASCII_CHARACTER: u64 = 0x10000;
const EInvalidASCIICharacter: u64 = 0x10000;
/// An invalid index was encountered when creating a substring.
const EInvalidIndex: u64 = 0x10001;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ughhhh but fine this is the right thing to do

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idk I think we should at some point switch all of the std/sui errors to using clever errors.
It will be a pain, and maybe break someone somewhere, but I feel like it is the right thing to do long term


/// The `String` struct holds a vector of bytes that all represent
/// valid ASCII characters. Note that these ASCII characters may not all
Expand All @@ -27,82 +28,139 @@ module std::ascii {

/// Convert a `byte` into a `Char` that is checked to make sure it is valid ASCII.
public fun char(byte: u8): Char {
assert!(is_valid_char(byte), EINVALID_ASCII_CHARACTER);
assert!(is_valid_char(byte), EInvalidASCIICharacter);
Char { byte }
}

/// Convert a vector of bytes `bytes` into an `String`. Aborts if
/// `bytes` contains non-ASCII characters.
public fun string(bytes: vector<u8>): String {
let x = try_string(bytes);
assert!(x.is_some(), EINVALID_ASCII_CHARACTER);
x.destroy_some()
let x = try_string(bytes);
assert!(x.is_some(), EInvalidASCIICharacter);
x.destroy_some()
}

/// Convert a vector of bytes `bytes` into an `String`. Returns
/// `Some(<ascii_string>)` if the `bytes` contains all valid ASCII
/// characters. Otherwise returns `None`.
public fun try_string(bytes: vector<u8>): Option<String> {
let len = bytes.length();
let mut i = 0;
while (i < len) {
let possible_byte = bytes[i];
if (!is_valid_char(possible_byte)) return option::none();
i = i + 1;
};
option::some(String { bytes })
let is_valid = bytes.all!(|byte| is_valid_char(*byte));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥

if (is_valid) option::some(String { bytes })
else option::none()
}

/// Returns `true` if all characters in `string` are printable characters
/// Returns `false` otherwise. Not all `String`s are printable strings.
public fun all_characters_printable(string: &String): bool {
let len = string.bytes.length();
let mut i = 0;
while (i < len) {
let byte = string.bytes[i];
if (!is_printable_char(byte)) return false;
i = i + 1;
};
true
string.bytes.all!(|byte| is_printable_char(*byte))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥 🔥

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

/// Push a `Char` to the end of the `string`.
public fun push_char(string: &mut String, char: Char) {
string.bytes.push_back(char.byte);
}

/// Pop a `Char` from the end of the `string`.
public fun pop_char(string: &mut String): Char {
Char { byte: string.bytes.pop_back() }
}

/// Returns the length of the `string` in bytes.
public fun length(string: &String): u64 {
string.as_bytes().length()
}

/// Append the `other` string to the end of `string`.
public fun append(string: &mut String, other: String) {
string.bytes.append(other.into_bytes())
}

/// Insert the `other` string at the `at` index of `string`.
public fun insert(s: &mut String, at: u64, o: String) {
assert!(at <= s.length(), EInvalidIndex);
o.into_bytes().destroy!(|e| s.bytes.insert(e, at));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥 🔥 🔥

}

/// Copy the slice of the `string` from `i` to `j` into a new `String`.
public fun substring(string: &String, i: u64, j: u64): String {
assert!(i <= j && j <= string.length(), EInvalidIndex);
let mut bytes = vector[];
i.range_do!(j, |i| bytes.push_back(string.bytes[i]));
Comment on lines +87 to +88
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might prefer vector::tabulate for this one. But this one is good too :)

String { bytes }
}

/// Get the inner bytes of the `string` as a reference
public fun as_bytes(string: &String): &vector<u8> {
&string.bytes
&string.bytes
}

/// Unpack the `string` to get its backing bytes
public fun into_bytes(string: String): vector<u8> {
let String { bytes } = string;
bytes
let String { bytes } = string;
bytes
}

/// Unpack the `char` into its underlying byte.
/// Unpack the `char` into its underlying bytes.
public fun byte(char: Char): u8 {
let Char { byte } = char;
byte
let Char { byte } = char;
byte
}

/// Returns `true` if `b` is a valid ASCII character. Returns `false` otherwise.
/// Returns `true` if `b` is a valid ASCII character.
/// Returns `false` otherwise.
public fun is_valid_char(b: u8): bool {
b <= 0x7F
b <= 0x7F
}

/// Returns `true` if `byte` is an printable ASCII character. Returns `false` otherwise.
/// Returns `true` if `byte` is an printable ASCII character.
/// Returns `false` otherwise.
public fun is_printable_char(byte: u8): bool {
byte >= 0x20 && // Disallow metacharacters
byte <= 0x7E // Don't allow DEL metacharacter
byte >= 0x20 && // Disallow metacharacters
byte <= 0x7E // Don't allow DEL metacharacter
}

/// Returns `true` if `string` is empty.
public fun is_empty(string: &String): bool {
string.bytes.is_empty()
}

/// Convert a `string` to its uppercase equivalent.
public fun to_uppercase(string: &String): String {
let bytes = string.as_bytes().map_ref!(|byte| char_to_uppercase(*byte));
String { bytes }
}

/// Convert a `string` to its lowercase equivalent.
public fun to_lowercase(string: &String): String {
let bytes = string.as_bytes().map_ref!(|byte| char_to_lowercase(*byte));
String { bytes }
Comment on lines +129 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥 🔥 🔥 🔥

}

/// Computes the index of the first occurrence of the `substr` in the `string`.
/// Returns the length of the `string` if the `substr` is not found.
/// Returns 0 if the `substr` is empty.
Comment on lines +140 to +141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really hate this API, but I agree it is the right thing to do for consistency

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, not a fan.

public fun index_of(string: &String, substr: &String): u64 {
let mut i = 0;
let (n, m) = (string.length(), substr.length());
if (n < m) return n;
while (i <= n - m) {
let mut j = 0;
while (j < m && string.bytes[i + j] == substr.bytes[j]) j = j + 1;
if (j == m) return i;
i = i + 1;
};
n
}

/// Convert a `char` to its lowercase equivalent.
fun char_to_uppercase(byte: u8): u8 {
if (byte >= 0x61 && byte <= 0x7A) byte - 0x20
else byte
}

/// Convert a `char` to its lowercase equivalent.
fun char_to_lowercase(byte: u8): u8 {
if (byte >= 0x41 && byte <= 0x5A) byte + 0x20
else byte
}
}
Loading
Loading