Skip to content

Commit

Permalink
chore - codegen: extract c string escaping to a separate file (#1028)
Browse files Browse the repository at this point in the history
  • Loading branch information
gibbz00 authored Apr 19, 2024
1 parent 30b22d3 commit 0381b93
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 138 deletions.
141 changes: 3 additions & 138 deletions prost-build/src/code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ use crate::ident::{strip_enum_prefix, to_snake, to_upper_camel};
use crate::message_graph::MessageGraph;
use crate::{BytesType, Config, MapType};

mod c_escaping;
use c_escaping::unescape_c_escape_string;

#[derive(PartialEq)]
enum Syntax {
Proto2,
Expand Down Expand Up @@ -1076,112 +1079,6 @@ fn can_pack(field: &FieldDescriptorProto) -> bool {
)
}

/// Based on [`google::protobuf::UnescapeCEscapeString`][1]
/// [1]: https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/stubs/strutil.cc#L312-L322
fn unescape_c_escape_string(s: &str) -> Vec<u8> {
let src = s.as_bytes();
let len = src.len();
let mut dst = Vec::new();

let mut p = 0;

while p < len {
if src[p] != b'\\' {
dst.push(src[p]);
p += 1;
} else {
p += 1;
if p == len {
panic!(
"invalid c-escaped default binary value ({}): ends with '\'",
s
)
}
match src[p] {
b'a' => {
dst.push(0x07);
p += 1;
}
b'b' => {
dst.push(0x08);
p += 1;
}
b'f' => {
dst.push(0x0C);
p += 1;
}
b'n' => {
dst.push(0x0A);
p += 1;
}
b'r' => {
dst.push(0x0D);
p += 1;
}
b't' => {
dst.push(0x09);
p += 1;
}
b'v' => {
dst.push(0x0B);
p += 1;
}
b'\\' => {
dst.push(0x5C);
p += 1;
}
b'?' => {
dst.push(0x3F);
p += 1;
}
b'\'' => {
dst.push(0x27);
p += 1;
}
b'"' => {
dst.push(0x22);
p += 1;
}
b'0'..=b'7' => {
debug!("another octal: {}, offset: {}", s, &s[p..]);
let mut octal = 0;
for _ in 0..3 {
if p < len && src[p] >= b'0' && src[p] <= b'7' {
debug!("\toctal: {}", octal);
octal = octal * 8 + (src[p] - b'0');
p += 1;
} else {
break;
}
}
dst.push(octal);
}
b'x' | b'X' => {
if p + 3 > len {
panic!(
"invalid c-escaped default binary value ({}): incomplete hex value",
s
)
}
match u8::from_str_radix(&s[p + 1..p + 3], 16) {
Ok(b) => dst.push(b),
_ => panic!(
"invalid c-escaped default binary value ({}): invalid hex value",
&s[p..p + 2]
),
}
p += 3;
}
_ => panic!(
"invalid c-escaped default binary value ({}): invalid escape",
s
),
}
}
}
dst
}

struct EnumVariantMapping<'a> {
path_idx: usize,
proto_name: &'a str,
Expand Down Expand Up @@ -1262,35 +1159,3 @@ impl BytesType {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_unescape_c_escape_string() {
assert_eq!(
&b"hello world"[..],
&unescape_c_escape_string("hello world")[..]
);

assert_eq!(&b"\0"[..], &unescape_c_escape_string(r#"\0"#)[..]);

assert_eq!(
&[0o012, 0o156],
&unescape_c_escape_string(r#"\012\156"#)[..]
);
assert_eq!(&[0x01, 0x02], &unescape_c_escape_string(r#"\x01\x02"#)[..]);

assert_eq!(
&b"\0\x01\x07\x08\x0C\n\r\t\x0B\\\'\"\xFE"[..],
&unescape_c_escape_string(r#"\0\001\a\b\f\n\r\t\v\\\'\"\xfe"#)[..]
);
}

#[test]
#[should_panic(expected = "incomplete hex value")]
fn test_unescape_c_escape_string_incomplete_hex_value() {
unescape_c_escape_string(r#"\x1"#);
}
}
139 changes: 139 additions & 0 deletions prost-build/src/code_generator/c_escaping.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
use log::debug;

/// Based on [`google::protobuf::UnescapeCEscapeString`][1]
/// [1]: https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/stubs/strutil.cc#L312-L322
pub(super) fn unescape_c_escape_string(s: &str) -> Vec<u8> {
let src = s.as_bytes();
let len = src.len();
let mut dst = Vec::new();

let mut p = 0;

while p < len {
if src[p] != b'\\' {
dst.push(src[p]);
p += 1;
} else {
p += 1;
if p == len {
panic!(
"invalid c-escaped default binary value ({}): ends with '\'",
s
)
}
match src[p] {
b'a' => {
dst.push(0x07);
p += 1;
}
b'b' => {
dst.push(0x08);
p += 1;
}
b'f' => {
dst.push(0x0C);
p += 1;
}
b'n' => {
dst.push(0x0A);
p += 1;
}
b'r' => {
dst.push(0x0D);
p += 1;
}
b't' => {
dst.push(0x09);
p += 1;
}
b'v' => {
dst.push(0x0B);
p += 1;
}
b'\\' => {
dst.push(0x5C);
p += 1;
}
b'?' => {
dst.push(0x3F);
p += 1;
}
b'\'' => {
dst.push(0x27);
p += 1;
}
b'"' => {
dst.push(0x22);
p += 1;
}
b'0'..=b'7' => {
debug!("another octal: {}, offset: {}", s, &s[p..]);
let mut octal = 0;
for _ in 0..3 {
if p < len && src[p] >= b'0' && src[p] <= b'7' {
debug!("\toctal: {}", octal);
octal = octal * 8 + (src[p] - b'0');
p += 1;
} else {
break;
}
}
dst.push(octal);
}
b'x' | b'X' => {
if p + 3 > len {
panic!(
"invalid c-escaped default binary value ({}): incomplete hex value",
s
)
}
match u8::from_str_radix(&s[p + 1..p + 3], 16) {
Ok(b) => dst.push(b),
_ => panic!(
"invalid c-escaped default binary value ({}): invalid hex value",
&s[p..p + 2]
),
}
p += 3;
}
_ => panic!(
"invalid c-escaped default binary value ({}): invalid escape",
s
),
}
}
}
dst
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_unescape_c_escape_string() {
assert_eq!(
&b"hello world"[..],
&unescape_c_escape_string("hello world")[..]
);

assert_eq!(&b"\0"[..], &unescape_c_escape_string(r#"\0"#)[..]);

assert_eq!(
&[0o012, 0o156],
&unescape_c_escape_string(r#"\012\156"#)[..]
);
assert_eq!(&[0x01, 0x02], &unescape_c_escape_string(r#"\x01\x02"#)[..]);

assert_eq!(
&b"\0\x01\x07\x08\x0C\n\r\t\x0B\\\'\"\xFE"[..],
&unescape_c_escape_string(r#"\0\001\a\b\f\n\r\t\v\\\'\"\xfe"#)[..]
);
}

#[test]
#[should_panic(expected = "incomplete hex value")]
fn test_unescape_c_escape_string_incomplete_hex_value() {
unescape_c_escape_string(r#"\x1"#);
}
}

0 comments on commit 0381b93

Please sign in to comment.