• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! This module implements syntax validation that the parser doesn't handle.
2 //!
3 //! A failed validation emits a diagnostic.
4 
5 mod block;
6 
7 use rowan::Direction;
8 use rustc_lexer::unescape::{self, unescape_literal, Mode};
9 
10 use crate::{
11     algo,
12     ast::{self, HasAttrs, HasVisibility, IsString},
13     match_ast, AstNode, SyntaxError,
14     SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS},
15     SyntaxNode, SyntaxToken, TextSize, T,
16 };
17 
validate(root: &SyntaxNode) -> Vec<SyntaxError>18 pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
19     // FIXME:
20     // * Add unescape validation of raw string literals and raw byte string literals
21     // * Add validation of doc comments are being attached to nodes
22 
23     let mut errors = Vec::new();
24     for node in root.descendants() {
25         match_ast! {
26             match node {
27                 ast::Literal(it) => validate_literal(it, &mut errors),
28                 ast::Const(it) => validate_const(it, &mut errors),
29                 ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
30                 ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
31                 ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
32                 ast::Visibility(it) => validate_visibility(it, &mut errors),
33                 ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
34                 ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
35                 ast::RefType(it) => validate_trait_object_ref_ty(it, &mut errors),
36                 ast::PtrType(it) => validate_trait_object_ptr_ty(it, &mut errors),
37                 ast::FnPtrType(it) => validate_trait_object_fn_ptr_ret_ty(it, &mut errors),
38                 ast::MacroRules(it) => validate_macro_rules(it, &mut errors),
39                 ast::LetExpr(it) => validate_let_expr(it, &mut errors),
40                 _ => (),
41             }
42         }
43     }
44     errors
45 }
46 
rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool)47 fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool) {
48     use unescape::EscapeError as EE;
49 
50     #[rustfmt::skip]
51     let err_message = match err {
52         EE::ZeroChars => {
53             "Literal must not be empty"
54         }
55         EE::MoreThanOneChar => {
56             "Literal must be one character long"
57         }
58         EE::LoneSlash => {
59             "Character must be escaped: `\\`"
60         }
61         EE::InvalidEscape => {
62             "Invalid escape"
63         }
64         EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
65             "Character must be escaped: `\r`"
66         }
67         EE::EscapeOnlyChar => {
68             "Escape character `\\` must be escaped itself"
69         }
70         EE::TooShortHexEscape => {
71             "ASCII hex escape code must have exactly two digits"
72         }
73         EE::InvalidCharInHexEscape => {
74             "ASCII hex escape code must contain only hex characters"
75         }
76         EE::OutOfRangeHexEscape => {
77             "ASCII hex escape code must be at most 0x7F"
78         }
79         EE::NoBraceInUnicodeEscape => {
80             "Missing `{` to begin the unicode escape"
81         }
82         EE::InvalidCharInUnicodeEscape => {
83             "Unicode escape must contain only hex characters and underscores"
84         }
85         EE::EmptyUnicodeEscape => {
86             "Unicode escape must not be empty"
87         }
88         EE::UnclosedUnicodeEscape => {
89             "Missing `}` to terminate the unicode escape"
90         }
91         EE::LeadingUnderscoreUnicodeEscape => {
92             "Unicode escape code must not begin with an underscore"
93         }
94         EE::OverlongUnicodeEscape => {
95             "Unicode escape code must have at most 6 digits"
96         }
97         EE::LoneSurrogateUnicodeEscape => {
98             "Unicode escape code must not be a surrogate"
99         }
100         EE::OutOfRangeUnicodeEscape => {
101             "Unicode escape code must be at most 0x10FFFF"
102         }
103         EE::UnicodeEscapeInByte => {
104             "Byte literals must not contain unicode escapes"
105         }
106         EE::NonAsciiCharInByte  => {
107             "Byte literals must not contain non-ASCII characters"
108         }
109         EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
110         EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",
111 
112     };
113 
114     (err_message, err.is_fatal())
115 }
116 
validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>)117 fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
118     // FIXME: move this function to outer scope (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366196658)
119     fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
120         text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
121     }
122 
123     let token = literal.token();
124     let text = token.text();
125 
126     // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205)
127     let mut push_err = |prefix_len, off, err: unescape::EscapeError| {
128         let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
129         let (message, is_err) = rustc_unescape_error_to_string(err);
130         // FIXME: Emit lexer warnings
131         if is_err {
132             acc.push(SyntaxError::new_at_offset(message, off));
133         }
134     };
135 
136     match literal.kind() {
137         ast::LiteralKind::String(s) => {
138             if !s.is_raw() {
139                 if let Some(without_quotes) = unquote(text, 1, '"') {
140                     unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
141                         if let Err(err) = char {
142                             push_err(1, range.start, err);
143                         }
144                     });
145                 }
146             }
147         }
148         ast::LiteralKind::ByteString(s) => {
149             if !s.is_raw() {
150                 if let Some(without_quotes) = unquote(text, 2, '"') {
151                     unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
152                         if let Err(err) = char {
153                             push_err(1, range.start, err);
154                         }
155                     });
156                 }
157             }
158         }
159         ast::LiteralKind::CString(s) => {
160             if !s.is_raw() {
161                 if let Some(without_quotes) = unquote(text, 2, '"') {
162                     unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
163                         if let Err(err) = char {
164                             push_err(1, range.start, err);
165                         }
166                     });
167                 }
168             }
169         }
170         ast::LiteralKind::Char(_) => {
171             if let Some(without_quotes) = unquote(text, 1, '\'') {
172                 unescape_literal(without_quotes, Mode::Char, &mut |range, char| {
173                     if let Err(err) = char {
174                         push_err(1, range.start, err);
175                     }
176                 });
177             }
178         }
179         ast::LiteralKind::Byte(_) => {
180             if let Some(without_quotes) = unquote(text, 2, '\'') {
181                 unescape_literal(without_quotes, Mode::Byte, &mut |range, char| {
182                     if let Err(err) = char {
183                         push_err(2, range.start, err);
184                     }
185                 });
186             }
187         }
188         ast::LiteralKind::IntNumber(_)
189         | ast::LiteralKind::FloatNumber(_)
190         | ast::LiteralKind::Bool(_) => {}
191     }
192 }
193 
validate_block_structure(root: &SyntaxNode)194 pub(crate) fn validate_block_structure(root: &SyntaxNode) {
195     let mut stack = Vec::new();
196     for node in root.descendants_with_tokens() {
197         match node.kind() {
198             T!['{'] => stack.push(node),
199             T!['}'] => {
200                 if let Some(pair) = stack.pop() {
201                     assert_eq!(
202                         node.parent(),
203                         pair.parent(),
204                         "\nunpaired curlies:\n{}\n{:#?}\n",
205                         root.text(),
206                         root,
207                     );
208                     assert!(
209                         node.next_sibling_or_token().is_none()
210                             && pair.prev_sibling_or_token().is_none(),
211                         "\nfloating curlies at {:?}\nfile:\n{}\nerror:\n{}\n",
212                         node,
213                         root.text(),
214                         node,
215                     );
216                 }
217             }
218             _ => (),
219         }
220     }
221 }
222 
validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>)223 fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
224     if let Some(int_token) = int_token(name_ref) {
225         if int_token.text().chars().any(|c| !c.is_ascii_digit()) {
226             errors.push(SyntaxError::new(
227                 "Tuple (struct) field access is only allowed through \
228                 decimal integers with no underscores or suffix",
229                 int_token.text_range(),
230             ));
231         }
232     }
233 
234     fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
235         name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
236     }
237 }
238 
validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>)239 fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
240     let path_without_in_token = vis.in_token().is_none()
241         && vis.path().and_then(|p| p.as_single_name_ref()).and_then(|n| n.ident_token()).is_some();
242     if path_without_in_token {
243         errors.push(SyntaxError::new("incorrect visibility restriction", vis.syntax.text_range()));
244     }
245     let parent = match vis.syntax().parent() {
246         Some(it) => it,
247         None => return,
248     };
249     match parent.kind() {
250         FN | CONST | TYPE_ALIAS => (),
251         _ => return,
252     }
253 
254     let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
255         Some(it) => it,
256         None => return,
257     };
258     // FIXME: disable validation if there's an attribute, since some proc macros use this syntax.
259     // ideally the validation would run only on the fully expanded code, then this wouldn't be necessary.
260     if impl_def.trait_().is_some() && impl_def.attrs().next().is_none() {
261         errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
262     }
263 }
264 
validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>)265 fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
266     if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
267         errors.push(SyntaxError::new(
268             "An inclusive range must have an end expression",
269             expr.syntax().text_range(),
270         ));
271     }
272 }
273 
validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>)274 fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
275     let path = segment.parent_path();
276     let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
277 
278     if let Some(token) = segment.self_token() {
279         if !is_path_start {
280             errors.push(SyntaxError::new(
281                 "The `self` keyword is only allowed as the first segment of a path",
282                 token.text_range(),
283             ));
284         }
285     } else if let Some(token) = segment.crate_token() {
286         if !is_path_start || use_prefix(path).is_some() {
287             errors.push(SyntaxError::new(
288                 "The `crate` keyword is only allowed as the first segment of a path",
289                 token.text_range(),
290             ));
291         }
292     }
293 
294     fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
295         for node in path.syntax().ancestors().skip(1) {
296             match_ast! {
297                 match node {
298                     ast::UseTree(it) => if let Some(tree_path) = it.path() {
299                         // Even a top-level path exists within a `UseTree` so we must explicitly
300                         // allow our path but disallow anything else
301                         if tree_path != path {
302                             return Some(tree_path);
303                         }
304                     },
305                     ast::UseTreeList(_) => continue,
306                     ast::Path(parent) => path = parent,
307                     _ => return None,
308                 }
309             };
310         }
311         None
312     }
313 }
314 
validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>)315 fn validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>) {
316     if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
317         if let Some(err) = validate_trait_object_ty(ty) {
318             errors.push(err);
319         }
320     }
321 }
322 
validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>)323 fn validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>) {
324     if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
325         if let Some(err) = validate_trait_object_ty(ty) {
326             errors.push(err);
327         }
328     }
329 }
330 
validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>)331 fn validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>) {
332     if let Some(ast::Type::DynTraitType(ty)) = ty.ret_type().and_then(|ty| ty.ty()) {
333         if let Some(err) = validate_trait_object_ty(ty) {
334             errors.push(err);
335         }
336     }
337 }
338 
validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError>339 fn validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError> {
340     let tbl = ty.type_bound_list()?;
341 
342     if tbl.bounds().count() > 1 {
343         let dyn_token = ty.dyn_token()?;
344         let potential_parenthesis =
345             algo::skip_trivia_token(dyn_token.prev_token()?, Direction::Prev)?;
346         let kind = potential_parenthesis.kind();
347         if !matches!(kind, T!['('] | T![<] | T![=]) {
348             return Some(SyntaxError::new("ambiguous `+` in a type", ty.syntax().text_range()));
349         }
350     }
351     None
352 }
353 
validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>)354 fn validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>) {
355     if let Some(vis) = mac.visibility() {
356         errors.push(SyntaxError::new(
357             "visibilities are not allowed on `macro_rules!` items",
358             vis.syntax().text_range(),
359         ));
360     }
361 }
362 
validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>)363 fn validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>) {
364     if let Some(mut_token) = const_
365         .const_token()
366         .and_then(|t| t.next_token())
367         .and_then(|t| algo::skip_trivia_token(t, Direction::Next))
368         .filter(|t| t.kind() == T![mut])
369     {
370         errors.push(SyntaxError::new("const globals cannot be mutable", mut_token.text_range()));
371     }
372 }
373 
validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>)374 fn validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>) {
375     let mut token = let_.syntax().clone();
376     loop {
377         token = match token.parent() {
378             Some(it) => it,
379             None => break,
380         };
381 
382         if ast::ParenExpr::can_cast(token.kind()) {
383             continue;
384         } else if let Some(it) = ast::BinExpr::cast(token.clone()) {
385             if it.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And)) {
386                 continue;
387             }
388         } else if ast::IfExpr::can_cast(token.kind())
389             || ast::WhileExpr::can_cast(token.kind())
390             || ast::MatchGuard::can_cast(token.kind())
391         {
392             // It must be part of the condition since the expressions are inside a block.
393             return;
394         }
395 
396         break;
397     }
398     errors.push(SyntaxError::new(
399         "`let` expressions are not supported here",
400         let_.syntax().text_range(),
401     ));
402 }
403