1 //! This module implements syntax validation that the parser doesn't handle.
2 //!
3 //! A failed validation emits a diagnostic.
4
5 mod block;
6
7 use rowan::Direction;
8 use rustc_lexer::unescape::{self, unescape_literal, Mode};
9
10 use crate::{
11 algo,
12 ast::{self, HasAttrs, HasVisibility, IsString},
13 match_ast, AstNode, SyntaxError,
14 SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS},
15 SyntaxNode, SyntaxToken, TextSize, T,
16 };
17
validate(root: &SyntaxNode) -> Vec<SyntaxError>18 pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
19 // FIXME:
20 // * Add unescape validation of raw string literals and raw byte string literals
21 // * Add validation of doc comments are being attached to nodes
22
23 let mut errors = Vec::new();
24 for node in root.descendants() {
25 match_ast! {
26 match node {
27 ast::Literal(it) => validate_literal(it, &mut errors),
28 ast::Const(it) => validate_const(it, &mut errors),
29 ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
30 ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
31 ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
32 ast::Visibility(it) => validate_visibility(it, &mut errors),
33 ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
34 ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
35 ast::RefType(it) => validate_trait_object_ref_ty(it, &mut errors),
36 ast::PtrType(it) => validate_trait_object_ptr_ty(it, &mut errors),
37 ast::FnPtrType(it) => validate_trait_object_fn_ptr_ret_ty(it, &mut errors),
38 ast::MacroRules(it) => validate_macro_rules(it, &mut errors),
39 ast::LetExpr(it) => validate_let_expr(it, &mut errors),
40 _ => (),
41 }
42 }
43 }
44 errors
45 }
46
rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool)47 fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool) {
48 use unescape::EscapeError as EE;
49
50 #[rustfmt::skip]
51 let err_message = match err {
52 EE::ZeroChars => {
53 "Literal must not be empty"
54 }
55 EE::MoreThanOneChar => {
56 "Literal must be one character long"
57 }
58 EE::LoneSlash => {
59 "Character must be escaped: `\\`"
60 }
61 EE::InvalidEscape => {
62 "Invalid escape"
63 }
64 EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
65 "Character must be escaped: `\r`"
66 }
67 EE::EscapeOnlyChar => {
68 "Escape character `\\` must be escaped itself"
69 }
70 EE::TooShortHexEscape => {
71 "ASCII hex escape code must have exactly two digits"
72 }
73 EE::InvalidCharInHexEscape => {
74 "ASCII hex escape code must contain only hex characters"
75 }
76 EE::OutOfRangeHexEscape => {
77 "ASCII hex escape code must be at most 0x7F"
78 }
79 EE::NoBraceInUnicodeEscape => {
80 "Missing `{` to begin the unicode escape"
81 }
82 EE::InvalidCharInUnicodeEscape => {
83 "Unicode escape must contain only hex characters and underscores"
84 }
85 EE::EmptyUnicodeEscape => {
86 "Unicode escape must not be empty"
87 }
88 EE::UnclosedUnicodeEscape => {
89 "Missing `}` to terminate the unicode escape"
90 }
91 EE::LeadingUnderscoreUnicodeEscape => {
92 "Unicode escape code must not begin with an underscore"
93 }
94 EE::OverlongUnicodeEscape => {
95 "Unicode escape code must have at most 6 digits"
96 }
97 EE::LoneSurrogateUnicodeEscape => {
98 "Unicode escape code must not be a surrogate"
99 }
100 EE::OutOfRangeUnicodeEscape => {
101 "Unicode escape code must be at most 0x10FFFF"
102 }
103 EE::UnicodeEscapeInByte => {
104 "Byte literals must not contain unicode escapes"
105 }
106 EE::NonAsciiCharInByte => {
107 "Byte literals must not contain non-ASCII characters"
108 }
109 EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
110 EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",
111
112 };
113
114 (err_message, err.is_fatal())
115 }
116
validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>)117 fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
118 // FIXME: move this function to outer scope (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366196658)
119 fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
120 text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
121 }
122
123 let token = literal.token();
124 let text = token.text();
125
126 // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205)
127 let mut push_err = |prefix_len, off, err: unescape::EscapeError| {
128 let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
129 let (message, is_err) = rustc_unescape_error_to_string(err);
130 // FIXME: Emit lexer warnings
131 if is_err {
132 acc.push(SyntaxError::new_at_offset(message, off));
133 }
134 };
135
136 match literal.kind() {
137 ast::LiteralKind::String(s) => {
138 if !s.is_raw() {
139 if let Some(without_quotes) = unquote(text, 1, '"') {
140 unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
141 if let Err(err) = char {
142 push_err(1, range.start, err);
143 }
144 });
145 }
146 }
147 }
148 ast::LiteralKind::ByteString(s) => {
149 if !s.is_raw() {
150 if let Some(without_quotes) = unquote(text, 2, '"') {
151 unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
152 if let Err(err) = char {
153 push_err(1, range.start, err);
154 }
155 });
156 }
157 }
158 }
159 ast::LiteralKind::CString(s) => {
160 if !s.is_raw() {
161 if let Some(without_quotes) = unquote(text, 2, '"') {
162 unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
163 if let Err(err) = char {
164 push_err(1, range.start, err);
165 }
166 });
167 }
168 }
169 }
170 ast::LiteralKind::Char(_) => {
171 if let Some(without_quotes) = unquote(text, 1, '\'') {
172 unescape_literal(without_quotes, Mode::Char, &mut |range, char| {
173 if let Err(err) = char {
174 push_err(1, range.start, err);
175 }
176 });
177 }
178 }
179 ast::LiteralKind::Byte(_) => {
180 if let Some(without_quotes) = unquote(text, 2, '\'') {
181 unescape_literal(without_quotes, Mode::Byte, &mut |range, char| {
182 if let Err(err) = char {
183 push_err(2, range.start, err);
184 }
185 });
186 }
187 }
188 ast::LiteralKind::IntNumber(_)
189 | ast::LiteralKind::FloatNumber(_)
190 | ast::LiteralKind::Bool(_) => {}
191 }
192 }
193
validate_block_structure(root: &SyntaxNode)194 pub(crate) fn validate_block_structure(root: &SyntaxNode) {
195 let mut stack = Vec::new();
196 for node in root.descendants_with_tokens() {
197 match node.kind() {
198 T!['{'] => stack.push(node),
199 T!['}'] => {
200 if let Some(pair) = stack.pop() {
201 assert_eq!(
202 node.parent(),
203 pair.parent(),
204 "\nunpaired curlies:\n{}\n{:#?}\n",
205 root.text(),
206 root,
207 );
208 assert!(
209 node.next_sibling_or_token().is_none()
210 && pair.prev_sibling_or_token().is_none(),
211 "\nfloating curlies at {:?}\nfile:\n{}\nerror:\n{}\n",
212 node,
213 root.text(),
214 node,
215 );
216 }
217 }
218 _ => (),
219 }
220 }
221 }
222
validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>)223 fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
224 if let Some(int_token) = int_token(name_ref) {
225 if int_token.text().chars().any(|c| !c.is_ascii_digit()) {
226 errors.push(SyntaxError::new(
227 "Tuple (struct) field access is only allowed through \
228 decimal integers with no underscores or suffix",
229 int_token.text_range(),
230 ));
231 }
232 }
233
234 fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
235 name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
236 }
237 }
238
validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>)239 fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
240 let path_without_in_token = vis.in_token().is_none()
241 && vis.path().and_then(|p| p.as_single_name_ref()).and_then(|n| n.ident_token()).is_some();
242 if path_without_in_token {
243 errors.push(SyntaxError::new("incorrect visibility restriction", vis.syntax.text_range()));
244 }
245 let parent = match vis.syntax().parent() {
246 Some(it) => it,
247 None => return,
248 };
249 match parent.kind() {
250 FN | CONST | TYPE_ALIAS => (),
251 _ => return,
252 }
253
254 let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
255 Some(it) => it,
256 None => return,
257 };
258 // FIXME: disable validation if there's an attribute, since some proc macros use this syntax.
259 // ideally the validation would run only on the fully expanded code, then this wouldn't be necessary.
260 if impl_def.trait_().is_some() && impl_def.attrs().next().is_none() {
261 errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
262 }
263 }
264
validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>)265 fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
266 if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
267 errors.push(SyntaxError::new(
268 "An inclusive range must have an end expression",
269 expr.syntax().text_range(),
270 ));
271 }
272 }
273
validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>)274 fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
275 let path = segment.parent_path();
276 let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
277
278 if let Some(token) = segment.self_token() {
279 if !is_path_start {
280 errors.push(SyntaxError::new(
281 "The `self` keyword is only allowed as the first segment of a path",
282 token.text_range(),
283 ));
284 }
285 } else if let Some(token) = segment.crate_token() {
286 if !is_path_start || use_prefix(path).is_some() {
287 errors.push(SyntaxError::new(
288 "The `crate` keyword is only allowed as the first segment of a path",
289 token.text_range(),
290 ));
291 }
292 }
293
294 fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
295 for node in path.syntax().ancestors().skip(1) {
296 match_ast! {
297 match node {
298 ast::UseTree(it) => if let Some(tree_path) = it.path() {
299 // Even a top-level path exists within a `UseTree` so we must explicitly
300 // allow our path but disallow anything else
301 if tree_path != path {
302 return Some(tree_path);
303 }
304 },
305 ast::UseTreeList(_) => continue,
306 ast::Path(parent) => path = parent,
307 _ => return None,
308 }
309 };
310 }
311 None
312 }
313 }
314
validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>)315 fn validate_trait_object_ref_ty(ty: ast::RefType, errors: &mut Vec<SyntaxError>) {
316 if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
317 if let Some(err) = validate_trait_object_ty(ty) {
318 errors.push(err);
319 }
320 }
321 }
322
validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>)323 fn validate_trait_object_ptr_ty(ty: ast::PtrType, errors: &mut Vec<SyntaxError>) {
324 if let Some(ast::Type::DynTraitType(ty)) = ty.ty() {
325 if let Some(err) = validate_trait_object_ty(ty) {
326 errors.push(err);
327 }
328 }
329 }
330
validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>)331 fn validate_trait_object_fn_ptr_ret_ty(ty: ast::FnPtrType, errors: &mut Vec<SyntaxError>) {
332 if let Some(ast::Type::DynTraitType(ty)) = ty.ret_type().and_then(|ty| ty.ty()) {
333 if let Some(err) = validate_trait_object_ty(ty) {
334 errors.push(err);
335 }
336 }
337 }
338
validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError>339 fn validate_trait_object_ty(ty: ast::DynTraitType) -> Option<SyntaxError> {
340 let tbl = ty.type_bound_list()?;
341
342 if tbl.bounds().count() > 1 {
343 let dyn_token = ty.dyn_token()?;
344 let potential_parenthesis =
345 algo::skip_trivia_token(dyn_token.prev_token()?, Direction::Prev)?;
346 let kind = potential_parenthesis.kind();
347 if !matches!(kind, T!['('] | T![<] | T![=]) {
348 return Some(SyntaxError::new("ambiguous `+` in a type", ty.syntax().text_range()));
349 }
350 }
351 None
352 }
353
validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>)354 fn validate_macro_rules(mac: ast::MacroRules, errors: &mut Vec<SyntaxError>) {
355 if let Some(vis) = mac.visibility() {
356 errors.push(SyntaxError::new(
357 "visibilities are not allowed on `macro_rules!` items",
358 vis.syntax().text_range(),
359 ));
360 }
361 }
362
validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>)363 fn validate_const(const_: ast::Const, errors: &mut Vec<SyntaxError>) {
364 if let Some(mut_token) = const_
365 .const_token()
366 .and_then(|t| t.next_token())
367 .and_then(|t| algo::skip_trivia_token(t, Direction::Next))
368 .filter(|t| t.kind() == T![mut])
369 {
370 errors.push(SyntaxError::new("const globals cannot be mutable", mut_token.text_range()));
371 }
372 }
373
validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>)374 fn validate_let_expr(let_: ast::LetExpr, errors: &mut Vec<SyntaxError>) {
375 let mut token = let_.syntax().clone();
376 loop {
377 token = match token.parent() {
378 Some(it) => it,
379 None => break,
380 };
381
382 if ast::ParenExpr::can_cast(token.kind()) {
383 continue;
384 } else if let Some(it) = ast::BinExpr::cast(token.clone()) {
385 if it.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And)) {
386 continue;
387 }
388 } else if ast::IfExpr::can_cast(token.kind())
389 || ast::WhileExpr::can_cast(token.kind())
390 || ast::MatchGuard::can_cast(token.kind())
391 {
392 // It must be part of the condition since the expressions are inside a block.
393 return;
394 }
395
396 break;
397 }
398 errors.push(SyntaxError::new(
399 "`let` expressions are not supported here",
400 let_.syntax().text_range(),
401 ));
402 }
403