bevy_reflect/path/
parse.rs

1use std::{
2    fmt::{self, Write},
3    num::ParseIntError,
4    str::from_utf8_unchecked,
5};
6
7use thiserror::Error;
8
9use super::{Access, ReflectPathError};
10
11/// An error that occurs when parsing reflect path strings.
12#[derive(Debug, PartialEq, Eq, Error)]
13#[error(transparent)]
14pub struct ParseError<'a>(Error<'a>);
15
16/// A parse error for a path string.
17#[derive(Debug, PartialEq, Eq, Error)]
18enum Error<'a> {
19    #[error("expected an identifier, but reached end of path string")]
20    NoIdent,
21
22    #[error("expected an identifier, got '{0}' instead")]
23    ExpectedIdent(Token<'a>),
24
25    #[error("failed to parse index as integer")]
26    InvalidIndex(#[from] ParseIntError),
27
28    #[error("a '[' wasn't closed, reached end of path string before finding a ']'")]
29    Unclosed,
30
31    #[error("a '[' wasn't closed properly, got '{0}' instead")]
32    BadClose(Token<'a>),
33
34    #[error("a ']' was found before an opening '['")]
35    CloseBeforeOpen,
36}
37
38pub(super) struct PathParser<'a> {
39    path: &'a str,
40    remaining: &'a [u8],
41}
42impl<'a> PathParser<'a> {
43    pub(super) fn new(path: &'a str) -> Self {
44        let remaining = path.as_bytes();
45        PathParser { path, remaining }
46    }
47
48    fn next_token(&mut self) -> Option<Token<'a>> {
49        let to_parse = self.remaining;
50
51        // Return with `None` if empty.
52        let (first_byte, remaining) = to_parse.split_first()?;
53
54        if let Some(token) = Token::symbol_from_byte(*first_byte) {
55            self.remaining = remaining; // NOTE: all symbols are ASCII
56            return Some(token);
57        }
58        // We are parsing either `0123` or `field`.
59        // If we do not find a subsequent token, we are at the end of the parse string.
60        let ident_len = to_parse.iter().position(|t| Token::SYMBOLS.contains(t));
61        let (ident, remaining) = to_parse.split_at(ident_len.unwrap_or(to_parse.len()));
62        // SAFETY: This relies on `self.remaining` always remaining valid UTF8:
63        // - self.remaining is a slice derived from self.path (valid &str)
64        // - The slice's end is either the same as the valid &str or
65        //   the last byte before an ASCII utf-8 character (ie: it is a char
66        //   boundary).
67        // - The slice always starts after a symbol ie: an ASCII character's boundary.
68        #[allow(unsafe_code)]
69        let ident = unsafe { from_utf8_unchecked(ident) };
70
71        self.remaining = remaining;
72        Some(Token::Ident(Ident(ident)))
73    }
74
75    fn next_ident(&mut self) -> Result<Ident<'a>, Error<'a>> {
76        match self.next_token() {
77            Some(Token::Ident(ident)) => Ok(ident),
78            Some(other) => Err(Error::ExpectedIdent(other)),
79            None => Err(Error::NoIdent),
80        }
81    }
82
83    fn access_following(&mut self, token: Token<'a>) -> Result<Access<'a>, Error<'a>> {
84        match token {
85            Token::Dot => Ok(self.next_ident()?.field()),
86            Token::Pound => self.next_ident()?.field_index(),
87            Token::Ident(ident) => Ok(ident.field()),
88            Token::CloseBracket => Err(Error::CloseBeforeOpen),
89            Token::OpenBracket => {
90                let index_ident = self.next_ident()?.list_index()?;
91                match self.next_token() {
92                    Some(Token::CloseBracket) => Ok(index_ident),
93                    Some(other) => Err(Error::BadClose(other)),
94                    None => Err(Error::Unclosed),
95                }
96            }
97        }
98    }
99
100    fn offset(&self) -> usize {
101        self.path.len() - self.remaining.len()
102    }
103}
104impl<'a> Iterator for PathParser<'a> {
105    type Item = (Result<Access<'a>, ReflectPathError<'a>>, usize);
106
107    fn next(&mut self) -> Option<Self::Item> {
108        let token = self.next_token()?;
109        let offset = self.offset();
110        Some((
111            self.access_following(token)
112                .map_err(|error| ReflectPathError::ParseError {
113                    offset,
114                    path: self.path,
115                    error: ParseError(error),
116                }),
117            offset,
118        ))
119    }
120}
121
122#[derive(Debug, PartialEq, Eq)]
123struct Ident<'a>(&'a str);
124
125impl<'a> Ident<'a> {
126    fn field(self) -> Access<'a> {
127        let field = |_| Access::Field(self.0.into());
128        self.0.parse().map(Access::TupleIndex).unwrap_or_else(field)
129    }
130    fn field_index(self) -> Result<Access<'a>, Error<'a>> {
131        Ok(Access::FieldIndex(self.0.parse()?))
132    }
133    fn list_index(self) -> Result<Access<'a>, Error<'a>> {
134        Ok(Access::ListIndex(self.0.parse()?))
135    }
136}
137
138// NOTE: We use repr(u8) so that the `match byte` in `Token::symbol_from_byte`
139// becomes a "check `byte` is one of SYMBOLS and forward its value" this makes
140// the optimizer happy, and shaves off a few cycles.
141#[derive(Debug, PartialEq, Eq)]
142#[repr(u8)]
143enum Token<'a> {
144    Dot = b'.',
145    Pound = b'#',
146    OpenBracket = b'[',
147    CloseBracket = b']',
148    Ident(Ident<'a>),
149}
150impl fmt::Display for Token<'_> {
151    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
152        match self {
153            Token::Dot => f.write_char('.'),
154            Token::Pound => f.write_char('#'),
155            Token::OpenBracket => f.write_char('['),
156            Token::CloseBracket => f.write_char(']'),
157            Token::Ident(ident) => f.write_str(ident.0),
158        }
159    }
160}
161impl<'a> Token<'a> {
162    const SYMBOLS: &'static [u8] = b".#[]";
163    fn symbol_from_byte(byte: u8) -> Option<Self> {
164        match byte {
165            b'.' => Some(Self::Dot),
166            b'#' => Some(Self::Pound),
167            b'[' => Some(Self::OpenBracket),
168            b']' => Some(Self::CloseBracket),
169            _ => None,
170        }
171    }
172}
173
174#[cfg(test)]
175mod test {
176    use super::*;
177    use crate::path::ParsedPath;
178
179    #[test]
180    fn parse_invalid() {
181        assert_eq!(
182            ParsedPath::parse_static("x.."),
183            Err(ReflectPathError::ParseError {
184                error: ParseError(Error::ExpectedIdent(Token::Dot)),
185                offset: 2,
186                path: "x..",
187            }),
188        );
189        assert!(matches!(
190            ParsedPath::parse_static("y[badindex]"),
191            Err(ReflectPathError::ParseError {
192                error: ParseError(Error::InvalidIndex(_)),
193                offset: 2,
194                path: "y[badindex]",
195            }),
196        ));
197    }
198}