]> Witch of Git - ess/blob - src/parser.rs
Merge branch 'documentation' into version/0.4
[ess] / src / parser.rs
1 //! Functions to parse s-expressions and expression atoms.
2 //!
3 //! This module contains the core parsing machinery.
4 //!
5 //! * If you're interested in getting a parsed s-expression that you can use,
6 //! then looking at [`parse`] and [`parse_one`] are your best bet.
7 //! * If you want to write your own parsers that contain s-expressions,
8 //! [`ParseResult`] and [`parse_expression`] will be the most useful to you.
9 //!
10 //! [`parse`]: fn.parse.html
11 //! [`parse_one`]: fn.parse_one.html
12 //! [`ParseResult`]: enum.ParseResult.html
13 //! [`parse_expression`]: fn.parse_expression.html
14
15 use sexp::Sexp;
16 use span::{Span, ByteSpan};
17
18 \f
19 // Parsing Types ///////////////////////////////////////////////////////////////
20
21 /// Represents what to do next in partially completed parsing.
22 ///
23 /// `ParseResult` is returned from all intermediate parsers. If you just want to
24 /// get back parsed s-expressions, you won't need to worry about this type since
25 /// the top level parsers just return a `Result`.
26 ///
27 /// If the parser failed to produce a result, it will return `Error`, and if it
28 /// succeeded we'll get the `Done` variant containing the value produced and the
29 /// rest of the text to work on.
30 #[derive(Debug, PartialEq, Eq, Clone)]
31 pub enum ParseResult<'a, T, E> {
32 /// The parser succeeded, this contains first the un-consumed portion of the
33 /// input then the result produced by parsing.
34 Done(&'a str, T),
35 /// The parser failed, the `E` represents the reason for the failure.
36 Error(E),
37 }
38
39 /// Indicates how parsing failed.
40 ///
41 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
42 /// cause of that error. Using this, `ParseError` variants can be chained to
43 /// produce a more complete picture of what exactly went wrong during parsing.
44 #[derive(Debug, PartialEq, Eq, Clone)]
45 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
46 /// Parsing reached the end of input where not expecting to, usually this
47 /// will be contained inside another `ParseError` like `String(box
48 /// UnexpectedEof, ...)` which indicates that the closing quote was never
49 /// found.
50 UnexpectedEof,
51 /// Some problem occurred while parsing a list, along with the cause of that
52 /// error.
53 List(Box<ParseError>, Loc),
54 /// Some problem occurred while parsing an s-expression. This will only be
55 /// generated if EOF is reached unexpectedly at the beginning of
56 /// `parse_expression`, so it should probably be removed.
57 Sexp(Box<ParseError>, Loc),
58 /// Some problem occurred while parsing a character literal, along with the
59 /// cause of the error.
60 Char(Box<ParseError>, Loc),
61 /// Some problem occurred while parsing a string literal, along with the
62 /// cause of the error.
63 String(Box<ParseError>, Loc),
64 /// Some problem occurred while parsing a symbol, along with the cause of
65 /// the error.
66 Symbol(Box<ParseError>, Loc),
67 /// Some problem occurred while parsing a number literal, along with the
68 /// cause of the error.
69 Number(Box<ParseError>, Loc),
70 /// An unexpected character was found. This will usually be the root cause
71 /// in some chain of `ParseError`s.
72 Unexpected(char, Loc::Begin),
73 }
74 use self::ParseResult::*;
75
76 \f
77 // Parsing Utilities ///////////////////////////////////////////////////////////
78
79 trait IsDelimeter {
80 fn is_delimiter(&self) -> bool;
81 }
82
83 impl IsDelimeter for char {
84 fn is_delimiter(&self) -> bool {
85 self.is_whitespace() || *self == ';'
86 || *self == '(' || *self == ')'
87 || *self == '[' || *self == ']'
88 || *self == '{' || *self == '}'
89 || *self == '"' || *self == '\''
90 || *self == '`' || *self == ','
91 }
92 }
93
94 macro_rules! consume_whitespace {
95 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
96 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
97 (&$input[pos..], $start_loc + pos)
98 } else {
99 return Error($ErrorFn(
100 Box::new(ParseError::UnexpectedEof),
101 ($input.len(), $input.len()).offset($start_loc)));
102 }
103 }
104 }
105
106 \f
107 // Top Level Parsers ///////////////////////////////////////////////////////////
108
109 /// Parse a sequence of s-expressions.
110 ///
111 /// This function returns `(Vec<Sexp>, Option<ParseError>)` so that it can
112 /// return partial results, for when some component parses successfully and a
113 /// later part fails.
114 ///
115 /// # Errors
116 ///
117 /// If the text contains an invalid s-expression (imbalanced parenthesis,
118 /// quotes, invalid numbers like 123q, etc.) then the parser will stop and
119 /// return an error. Every s-expression before that point that successfully
120 /// parsed will still be returned.
121 ///
122 /// # Examples
123 ///
124 /// We can get useful partial results
125 ///
126 /// ```rust
127 /// # use ess::parser::parse;
128 /// let (exprs, err) = parse("1 2 3 ( 4");
129 /// assert_eq!(exprs.len(), 3);
130 /// assert!(err.is_some());
131 /// ```
132 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
133 let mut start_loc = 0;
134 let mut results = Vec::new();
135 loop {
136 match parse_expression(input, start_loc) {
137 Done(rest, result) => {
138 input = rest;
139 start_loc = result.get_loc().1;
140 results.push(result);
141 if rest.trim() == "" {
142 return (results, None);
143 }
144 }
145 Error(err) => {
146 return (results, Some(err));
147 }
148 }
149 }
150 }
151
152 /// Parses a single s-expression, ignoring any trailing text.
153 ///
154 /// This function returns a pair of the parsed s-expression and the tail of the text.
155 ///
156 /// # Errors
157 ///
158 /// If the text begins with an invalid s-expression (imbalanced parenthesis,
159 /// quotes, invalid numbers like 123q, etc.) then the parser will return an
160 /// error. Any text after the first s-expression doesn't affect the parsing.
161 ///
162 /// # Examples
163 ///
164 /// ```rust
165 /// # use ess::parser::parse_one;
166 /// let (expr, rest) = parse_one("1 (").unwrap();
167 /// assert_eq!(rest, " (");
168 /// ```
169 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
170 match parse_expression(input, 0) {
171 Done(rest, result) => Ok((result, rest)),
172 Error(err) => Err(err),
173 }
174 }
175
176 \f
177 // Core Parsers ////////////////////////////////////////////////////////////////
178
179 // TODO: All of these parsers deserve docs, but since they're somewhat internal
180 // parsers, it's less critical than the rest of the API.
181
182 #[allow(missing_docs)]
183 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
184 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
185
186 match input.chars().next() {
187 Some('0'...'9') => parse_number(input, start_loc),
188 Some('(') => parse_list(input, start_loc),
189 Some('#') => parse_character(input, start_loc),
190 Some('"') => parse_string(input, start_loc),
191 Some(_) => parse_symbol(input, start_loc),
192 None => unreachable!(),
193 }
194 }
195
196 #[allow(missing_docs)]
197 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
198 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
199
200 match input.chars().nth(0) {
201 Some('(') => (),
202 Some(c) =>
203 return Error(ParseError::List(
204 Box::new(ParseError::Unexpected(c, 0)),
205 (0, 0).offset(start_loc))),
206 None => unreachable!(),
207 }
208
209 let mut input = &input[1..];
210 let mut loc = start_loc + 1;
211 let mut members = Vec::new();
212 loop {
213 {
214 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
215 input = new_input;
216 loc = new_loc;
217 }
218
219 match input.chars().nth(0) {
220 Some(')') =>
221 return Done(&input[1..],
222 Sexp::List(members, (start_loc, loc+1))),
223 Some(_) => (),
224 None => unreachable!(),
225 }
226
227 match parse_expression(input, loc) {
228 Done(new_input, member) => {
229 loc = member.get_loc().1;
230 members.push(member);
231 input = new_input;
232 }
233 Error(err) =>
234 return Error(ParseError::List(
235 Box::new(err),
236 (0, 0).offset(loc)))
237 }
238 }
239 }
240
241 #[allow(missing_docs)]
242 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
243 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
244
245 match input.chars().next() {
246 Some(c) if !c.is_digit(10) => {
247 return Error(ParseError::Number(
248 Box::new(ParseError::Unexpected(c, start_loc)),
249 (0, c.len_utf8()).offset(start_loc)));
250 }
251 None => return Error(ParseError::Number(
252 Box::new(ParseError::UnexpectedEof),
253 (0, 0).offset(start_loc))),
254 _ => (),
255 }
256
257 let base = 10;
258
259 let mut end = 0;
260 // Before the decimal point
261 for (i, c) in input.char_indices() {
262 if c == '.' {
263 end = i + 1;
264 break;
265 }
266
267 if c.is_delimiter() {
268 return Done(&input[i..],
269 Sexp::Int(input[..i].parse().expect("Already matched digits"),
270 (0, i).offset(start_loc)));
271 }
272
273 if !c.is_digit(base) {
274 return Error(ParseError::Number(
275 Box::new(ParseError::Unexpected(c, start_loc + i)),
276 (i, i).offset(start_loc)));
277 }
278
279 end = i + c.len_utf8();
280 }
281
282 if input[end..].is_empty() {
283 return Done(&input[end..],
284 Sexp::Int(input.parse().expect("Already matched digits"),
285 (0, end).offset(start_loc)));
286 }
287
288 // After the decimal point
289 for (i, c) in input[end..].char_indices() {
290 if c.is_delimiter() {
291 return Done(&input[i+end..],
292 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
293 (0, end+i).offset(start_loc)));
294 }
295
296 if !c.is_digit(base) {
297 return Error(ParseError::Number(
298 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
299 (i+end, i+end).offset(start_loc)));
300 }
301 }
302
303 Done(&input[input.len()..],
304 Sexp::Float(input.parse().expect("Already matched digits.digits"),
305 (0, input.len()).offset(start_loc)))
306 }
307
308 #[allow(missing_docs)]
309 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
310 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
311
312 match input.chars().next() {
313 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
314 return Error(ParseError::Symbol(
315 Box::new(ParseError::Unexpected(c, start_loc)),
316 (0, 0).offset(start_loc))),
317 Some(c) if c.is_delimiter() =>
318 return Error(ParseError::Symbol(
319 Box::new(ParseError::Unexpected(c, start_loc)),
320 (0, 0).offset(start_loc))),
321 Some(_) => (),
322 None => unreachable!(),
323 }
324
325 for (i, c) in input.char_indices() {
326 if c.is_delimiter() {
327 return Done(&input[i..],
328 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
329 }
330 }
331
332 Done(&input[input.len()..],
333 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
334 }
335
336 #[allow(missing_docs)]
337 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
338 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
339
340 match input.chars().next() {
341 Some('"') => (),
342 Some(c) =>
343 return Error(ParseError::String(
344 Box::new(ParseError::Unexpected(c, start_loc)),
345 (0, 0).offset(start_loc))),
346 None => unreachable!(),
347 }
348
349 for (i, c) in input[1..].char_indices() {
350 if c == '"' {
351 return Done(&input[2+i..],
352 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
353 }
354 }
355
356 Error(ParseError::String(
357 Box::new(ParseError::UnexpectedEof),
358 (0, input.len()).offset(start_loc)))
359 }
360
361 #[allow(missing_docs)]
362 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
363 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
364
365 match input.chars().nth(0) {
366 Some('#') => (),
367 Some(c) =>
368 return Error(ParseError::Char(
369 Box::new(ParseError::Unexpected(c, start_loc)),
370 (0, 0).offset(start_loc))),
371 None =>
372 return Error(ParseError::Char(
373 Box::new(ParseError::UnexpectedEof),
374 (0, 0).offset(start_loc))),
375 }
376
377 match input.chars().nth(1) {
378 Some('\\') => (),
379 Some(c) =>
380 return Error(ParseError::Char(
381 Box::new(ParseError::Unexpected(c, start_loc + 1)),
382 (1, 1).offset(start_loc))),
383 None =>
384 return Error(ParseError::Char(
385 Box::new(ParseError::UnexpectedEof),
386 (1, 1).offset(start_loc)))
387 }
388
389 match input.chars().nth(2) {
390 Some(c) =>
391 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
392 None =>
393 Error(ParseError::Char(
394 Box::new(ParseError::UnexpectedEof),
395 (2, 2).offset(start_loc)))
396 }
397 }
398
399 \f
400 // Tests ///////////////////////////////////////////////////////////////////////
401
402 #[cfg(test)]
403 mod test {
404 use sexp::Sexp;
405 use span::Span;
406 use parser::*;
407 use parser::ParseResult::*;
408
409 #[test]
410 fn test_parse() {
411 assert_eq!(parse("1 2 3"), (vec![
412 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
413 ], None));
414 assert_eq!(parse("1 2 )"), (vec![
415 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
416 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
417 }
418
419 #[test]
420 fn test_parse_one() {
421 assert_eq!(parse_one("1 2"),
422 Ok((Sexp::Int(1, (0, 1)), " 2")));
423 }
424
425 #[test]
426 fn test_parse_expression() {
427 assert_eq!(parse_expression(" 1", 0),
428 Done("", Sexp::Int(1, (1, 2))));
429 assert_eq!(parse_expression("2.2", 0),
430 Done("", Sexp::Float(2.2, (0, 3))));
431 assert_eq!(parse_expression(" a", 0),
432 Done("", Sexp::Sym("a".into(), (1, 2))));
433 assert_eq!(parse_expression("#\\c", 0),
434 Done("", Sexp::Char('c', (0, 3))));
435 assert_eq!(parse_expression(r#""hi""#, 0),
436 Done("", Sexp::Str("hi".into(), (0, 4))));
437 assert_eq!(parse_expression("()", 0),
438 Done("", Sexp::List(vec![], (0, 2))));
439 assert_eq!(parse_expression("( 1 2 3 )", 0),
440 Done("", Sexp::List(vec![
441 Sexp::Int(1, (2, 3)),
442 Sexp::Int(2, (4, 5)),
443 Sexp::Int(3, (6, 7)),
444 ], (0, 9))));
445
446 assert_eq!(parse_expression("", 0),
447 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
448 }
449
450 #[test]
451 fn test_parse_list() {
452 assert_eq!(parse_list("()", 0),
453 Done("", Sexp::List(vec![], (0, 2))));
454 assert_eq!(parse_list("(1)", 0),
455 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
456 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
457 Sexp::Int(1, (4, 5)),
458 Sexp::Int(2, (9, 10)),
459 Sexp::Int(3, (12, 13)),
460 Sexp::Sym("a".into(), (14, 15)),
461 ], (2, 17))));
462 }
463
464 #[test]
465 fn test_parse_number() {
466 assert_eq!(parse_number("1", 0),
467 Done("", Sexp::Int(1, (0, 1))));
468 assert_eq!(parse_number(" 13", 0),
469 Done("", Sexp::Int(13, (1, 3))));
470 assert_eq!(parse_number("1.2", 0),
471 Done("", Sexp::Float(1.2, (0, 3))));
472 assert_eq!(parse_number("\u{3000}4.2", 0),
473 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
474 assert_eq!(parse_number(" 42 ", 0),
475 Done(" ", Sexp::Int(42, (2, 4))));
476 assert_eq!(parse_number(" 4.2 ", 0),
477 Done(" ", Sexp::Float(4.2, (1, 4))));
478 assert_eq!(parse_number("1()", 0),
479 Done("()", Sexp::Int(1, (0, 1))));
480 assert_eq!(parse_number("3.6()", 0),
481 Done("()", Sexp::Float(3.6, (0, 3))));
482
483 assert_eq!(parse_number("", 0),
484 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
485 assert_eq!(parse_number("123a", 0),
486 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
487 assert_eq!(parse_number("66.6+", 0),
488 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
489 }
490
491 #[test]
492 fn test_parse_ident() {
493 assert_eq!(parse_symbol("+", 0),
494 Done("", Sexp::Sym("+".into(), (0, 1))));
495 assert_eq!(parse_symbol(" nil?", 0),
496 Done("", Sexp::Sym("nil?".into(), (1, 5))));
497 assert_eq!(parse_symbol(" ->socket", 0),
498 Done("", Sexp::Sym("->socket".into(), (1, 9))));
499 assert_eq!(parse_symbol("fib(", 0),
500 Done("(", Sexp::Sym("fib".into(), (0, 3))));
501 assert_eq!(parse_symbol("foo2", 0),
502 Done("", Sexp::Sym("foo2".into(), (0, 4))));
503
504 // We reserve #foo for the implementation to do as it wishes
505 assert_eq!(parse_symbol("#hi", 0),
506 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
507 // We reserve :foo for keywords
508 assert_eq!(parse_symbol(":hi", 0),
509 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
510
511 assert_eq!(parse_symbol("", 0),
512 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
513 assert_eq!(parse_symbol("0", 0),
514 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
515 assert_eq!(parse_symbol("()", 0),
516 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
517 }
518
519 #[test]
520 fn test_parse_string() {
521 assert_eq!(parse_string(r#""""#, 0),
522 Done("", Sexp::Str("".into(), (0, 2))));
523 assert_eq!(parse_string(r#""hello""#, 0),
524 Done("", Sexp::Str("hello".into(), (0, 7))));
525 assert_eq!(parse_string(r#" "this is a nice string
526 with 0123 things in it""#, 0),
527 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
528
529 assert_eq!(parse_string("", 0),
530 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
531 assert_eq!(parse_string(r#""hi"#, 0),
532 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
533 }
534
535 #[test]
536 fn test_parse_char() {
537 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
538 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
539 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
540
541 assert_eq!(parse_character("", 0),
542 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
543 assert_eq!(parse_character("#", 0),
544 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
545 assert_eq!(parse_character("#\\", 0),
546 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
547 assert_eq!(parse_character("a", 0),
548 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
549 }
550 }