//! Protobuf "text format" implementation. //! //! Text format message look like this: //! //! ```text,ignore //! size: 17 //! color: "red" //! children { //! size: 18 //! color: "blue" //! } //! children { //! size: 19 //! color: "green" //! } //! ``` //! //! This format is not specified, but it is implemented by all official //! protobuf implementations, including `protoc` command which can decode //! and encode messages using text format. use crate::message::Message; use crate::reflect::ReflectFieldRef; use crate::reflect::ReflectValueRef; use std; use std::fmt; use std::fmt::Write; mod print; // Used by text format parser and by pure-rust codegen parsed // this it is public but hidden module. // https://github.com/rust-lang/rust/issues/44663 #[doc(hidden)] pub mod lexer; use self::print::print_str_to; #[doc(hidden)] pub use self::print::quote_bytes_to; #[doc(hidden)] pub use self::print::quote_escape_bytes; use crate::text_format::print::quote_escape_bytes_to; #[doc(hidden)] pub fn unescape_string(string: &str) -> Vec { fn parse_if_digit(chars: &mut std::str::Chars) -> u8 { let mut copy = chars.clone(); let f = match copy.next() { None => return 0, Some(f) => f, }; let d = match f { '0'..='9' => (f as u8 - b'0'), _ => return 0, }; *chars = copy; d } fn parse_hex_digit(chars: &mut std::str::Chars) -> u8 { match chars.next().unwrap() { c @ '0'..='9' => (c as u8) - b'0', c @ 'a'..='f' => (c as u8) - b'a' + 10, c @ 'A'..='F' => (c as u8) - b'A' + 10, _ => panic!("incorrect hex escape"), } } fn parse_escape_rem(chars: &mut std::str::Chars) -> u8 { let n = chars.next().unwrap(); match n { 'a' => return b'\x07', 'b' => return b'\x08', 'f' => return b'\x0c', 'n' => return b'\n', 'r' => return b'\r', 't' => return b'\t', 'v' => return b'\x0b', '"' => return b'"', '\'' => return b'\'', '0'..='9' => { let d1 = n as u8 - b'0'; let d2 = parse_if_digit(chars); let d3 = parse_if_digit(chars); return (d1 * 64 + d2 * 8 + d3) as u8; } 'x' => { let d1 = parse_hex_digit(chars); let d2 = parse_hex_digit(chars); return d1 * 16 + d2; } c => return c as u8, // TODO: validate ASCII }; } let mut chars = string.chars(); let mut r = Vec::new(); loop { let f = match chars.next() { None => return r, Some(f) => f, }; if f == '\\' { r.push(parse_escape_rem(&mut chars)); } else { r.push(f as u8); // TODO: escape UTF-8 } } } fn do_indent(buf: &mut String, pretty: bool, indent: usize) { if pretty && indent > 0 { for _ in 0..indent { buf.push_str(" "); } } } fn print_start_field( buf: &mut String, pretty: bool, indent: usize, first: &mut bool, field_name: &str, ) { if !*first && !pretty { buf.push_str(" "); } do_indent(buf, pretty, indent); *first = false; buf.push_str(field_name); } fn print_end_field(buf: &mut String, pretty: bool) { if pretty { buf.push_str("\n"); } } fn print_field( buf: &mut String, pretty: bool, indent: usize, first: &mut bool, field_name: &str, value: ReflectValueRef, ) { print_start_field(buf, pretty, indent, first, field_name); match value { ReflectValueRef::Message(m) => { buf.push_str(" {"); if pretty { buf.push_str("\n"); } print_to_internal(m, buf, pretty, indent + 1); do_indent(buf, pretty, indent); buf.push_str("}"); } ReflectValueRef::Enum(e) => { buf.push_str(": "); buf.push_str(e.name()); } ReflectValueRef::String(s) => { buf.push_str(": "); print_str_to(s, buf); } ReflectValueRef::Bytes(b) => { buf.push_str(": "); quote_escape_bytes_to(b, buf); } ReflectValueRef::I32(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::I64(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::U32(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::U64(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::Bool(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::F32(v) => { write!(buf, ": {}", v).unwrap(); } ReflectValueRef::F64(v) => { write!(buf, ": {}", v).unwrap(); } } print_end_field(buf, pretty); } fn print_to_internal(m: &dyn Message, buf: &mut String, pretty: bool, indent: usize) { let d = m.descriptor(); let mut first = true; for f in d.fields() { match f.get_reflect(m) { ReflectFieldRef::Map(map) => { for (k, v) in map { print_start_field(buf, pretty, indent, &mut first, f.name()); buf.push_str(" {"); if pretty { buf.push_str("\n"); } let mut entry_first = true; print_field(buf, pretty, indent + 1, &mut entry_first, "key", k.as_ref()); print_field( buf, pretty, indent + 1, &mut entry_first, "value", v.as_ref(), ); do_indent(buf, pretty, indent); buf.push_str("}"); print_end_field(buf, pretty); } } ReflectFieldRef::Repeated(repeated) => { // TODO: do not print zeros for v3 for v in repeated { print_field(buf, pretty, indent, &mut first, f.name(), v.as_ref()); } } ReflectFieldRef::Optional(optional) => { if let Some(v) = optional { print_field(buf, pretty, indent, &mut first, f.name(), v); } } } } // TODO: unknown fields } /// Text-format pub fn print_to(m: &dyn Message, buf: &mut String) { print_to_internal(m, buf, false, 0) } fn print_to_string_internal(m: &dyn Message, pretty: bool) -> String { let mut r = String::new(); print_to_internal(m, &mut r, pretty, 0); r.to_string() } /// Text-format pub fn print_to_string(m: &dyn Message) -> String { print_to_string_internal(m, false) } /// Text-format to `fmt::Formatter`. pub fn fmt(m: &dyn Message, f: &mut fmt::Formatter) -> fmt::Result { let pretty = f.alternate(); f.write_str(&print_to_string_internal(m, pretty)) } #[cfg(test)] mod test { fn escape(data: &[u8]) -> String { let mut s = String::with_capacity(data.len() * 4); super::quote_bytes_to(data, &mut s); s } fn test_escape_unescape(text: &str, escaped: &str) { assert_eq!(text.as_bytes(), &super::unescape_string(escaped)[..]); assert_eq!(escaped, &escape(text.as_bytes())[..]); } #[test] fn test_print_to_bytes() { assert_eq!("ab", escape(b"ab")); assert_eq!("a\\\\023", escape(b"a\\023")); assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\")); assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes())); } #[test] fn test_unescape_string() { test_escape_unescape("", ""); test_escape_unescape("aa", "aa"); test_escape_unescape("\n", "\\n"); test_escape_unescape("\r", "\\r"); test_escape_unescape("\t", "\\t"); test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275"); // hex assert_eq!(b"aaa\x01bbb", &super::unescape_string("aaa\\x01bbb")[..]); assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]); assert_eq!(b"aaa\xcdbbb", &super::unescape_string("aaa\\xCDbbb")[..]); // quotes assert_eq!(b"aaa\"bbb", &super::unescape_string("aaa\\\"bbb")[..]); assert_eq!(b"aaa\'bbb", &super::unescape_string("aaa\\\'bbb")[..]); } }