]>
Witch of Git - ess/blob - src/parser.rs
1 //! Functions to parse s-expressions and expression atoms.
3 //! This module contains the core parsing machinery.
5 //! * If you're interested in getting a parsed s-expression that you can use,
6 //! then looking at [`parse`] and [`parse_one`] are your best bet.
7 //! * If you want to write your own parsers that contain s-expressions,
8 //! [`ParseResult`] and [`parse_expression`] will be the most useful to you.
10 //! [`parse`]: fn.parse.html
11 //! [`parse_one`]: fn.parse_one.html
12 //! [`ParseResult`]: enum.ParseResult.html
13 //! [`parse_expression`]: fn.parse_expression.html
16 use span
::{Span
, ByteSpan
};
19 // Parsing Types ///////////////////////////////////////////////////////////////
21 /// Represents what to do next in partially completed parsing.
23 /// `ParseResult` is returned from all intermediate parsers. If you just want to
24 /// get back parsed s-expressions, you won't need to worry about this type since
25 /// the top level parsers just return a `Result`.
27 /// If the parser failed to produce a result, it will return `Error`, and if it
28 /// succeeded we'll get the `Done` variant containing the value produced and the
29 /// rest of the text to work on.
30 #[derive(Debug, PartialEq, Eq, Clone)]
31 pub enum ParseResult
<'a
, T
, E
> {
32 /// The parser succeeded, this contains first the un-consumed portion of the
33 /// input then the result produced by parsing.
35 /// The parser failed, the `E` represents the reason for the failure.
39 /// Indicates how parsing failed.
41 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
42 /// cause of that error. Using this, `ParseError` variants can be chained to
43 /// produce a more complete picture of what exactly went wrong during parsing.
44 #[derive(Debug, PartialEq, Eq, Clone)]
45 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
46 /// Parsing reached the end of input where not expecting to, usually this
47 /// will be contained inside another `ParseError` like `String(box
48 /// UnexpectedEof, ...)` which indicates that the closing quote was never
51 /// Some problem occurred while parsing a list, along with the cause of that
53 List(Box
<ParseError
>, Loc
),
54 /// Some problem occurred while parsing an s-expression. This will only be
55 /// generated if EOF is reached unexpectedly at the beginning of
56 /// `parse_expression`, so it should probably be removed.
57 Sexp(Box
<ParseError
>, Loc
),
58 /// Some problem occurred while parsing a character literal, along with the
59 /// cause of the error.
60 Char(Box
<ParseError
>, Loc
),
61 /// Some problem occurred while parsing a string literal, along with the
62 /// cause of the error.
63 String(Box
<ParseError
>, Loc
),
64 /// Some problem occurred while parsing a symbol, along with the cause of
66 Symbol(Box
<ParseError
>, Loc
),
67 /// Some problem occurred while parsing a number literal, along with the
68 /// cause of the error.
69 Number(Box
<ParseError
>, Loc
),
70 /// An unexpected character was found. This will usually be the root cause
71 /// in some chain of `ParseError`s.
72 Unexpected(char, Loc
::Begin
),
74 use self::ParseResult
::*;
77 // Parsing Utilities ///////////////////////////////////////////////////////////
80 fn is_delimiter(&self) -> bool
;
83 impl IsDelimeter
for char {
84 fn is_delimiter(&self) -> bool
{
85 self.is
_wh
itespace
() || *self == '
;'
86 || *self == '
('
|| *self == '
)'
87 || *self == '
['
|| *self == '
]'
88 || *self == '
{'
|| *self == '
}'
89 || *self == '
"' || *self == '\''
90 || *self == '`' || *self == ','
94 macro_rules! consume_whitespace {
95 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
96 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
97 (&$input[pos..], $start_loc + pos)
99 return Error($ErrorFn(
100 Box::new(ParseError::UnexpectedEof),
101 ($input.len(), $input.len()).offset($start_loc)));
107 // Top Level Parsers ///////////////////////////////////////////////////////////
109 /// Parse a sequence of s-expressions.
111 /// This function returns `(Vec<Sexp>, Option<ParseError>)` so that it can
112 /// return partial results, for when some component parses successfully and a
113 /// later part fails.
117 /// If the text contains an invalid s-expression (imbalanced parenthesis,
118 /// quotes, invalid numbers like 123q, etc.) then the parser will stop and
119 /// return an error. Every s-expression before that point that successfully
120 /// parsed will still be returned.
124 /// We can get useful partial results
127 /// # use ess::parser::parse;
128 /// let (exprs, err) = parse("1 2 3 ( 4");
129 /// assert_eq!(exprs.len(), 3);
130 /// assert!(err.is_some());
132 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
133 let mut start_loc = 0;
134 let mut results = Vec::new();
136 match parse_expression(input, start_loc) {
137 Done(rest, result) => {
139 start_loc = result.get_loc().1;
140 results.push(result);
141 if rest.trim() == "" {
142 return (results, None);
146 return (results, Some(err));
152 /// Parses a single s-expression, ignoring any trailing text.
154 /// This function returns a pair of the parsed s-expression and the tail of the text.
158 /// If the text begins with an invalid s-expression (imbalanced parenthesis,
159 /// quotes, invalid numbers like 123q, etc.) then the parser will return an
160 /// error. Any text after the first s-expression doesn't affect the parsing.
165 /// # use ess::parser::parse_one;
166 /// let (expr, rest) = parse_one("1 (").unwrap();
167 /// assert_eq!(rest, " (");
169 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
170 match parse_expression(input, 0) {
171 Done(rest, result) => Ok((result, rest)),
172 Error(err) => Err(err),
177 // Core Parsers ////////////////////////////////////////////////////////////////
179 // TODO: All of these parsers deserve docs, but since they're somewhat internal
180 // parsers, it's less critical than the rest of the API.
182 #[allow(missing_docs)]
183 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
184 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
186 match input.chars().next() {
187 Some('0'...'9') => parse_number(input, start_loc),
188 Some('(') => parse_list(input, start_loc),
189 Some('#') => parse_character(input, start_loc),
190 Some('"'
) => parse_string(input
, start_loc
),
192 match parse_expression(&input
[1..], start_loc
+ 1) {
193 Done(rest
, result
) => {
194 let span
= *result
.get_loc();
195 let quote_span
= (0, 1).offset(start_loc
);
197 Sexp
::List(vec
![Sexp
::Sym("quote".into
(), quote_span
), result
],
198 quote_span
.un
ion
(&span
)))
203 Some(_
) => parse_symbol(input
, start_loc
),
204 None
=> unreachable
!(),
208 #[allow(missing_docs)]
209 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
210 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
212 match input
.chars().nth(0) {
215 return Error(ParseError
::List(
216 Box
::new(ParseError
::Unexpected(c
, 0)),
217 (0, 0).offset(start_loc
))),
218 None
=> unreachable
!(),
221 let mut input
= &input
[1..];
222 let mut loc
= start_loc
+ 1;
223 let mut members
= Vec
::new();
226 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
231 match input
.chars().nth(0) {
233 return Done(&input
[1..],
234 Sexp
::List(members
, (start_loc
, loc
+1))),
236 None
=> unreachable
!(),
239 match parse_expression(input
, loc
) {
240 Done(new_input
, member
) => {
241 loc
= member
.get_loc().1;
242 members
.push(member
);
246 return Error(ParseError
::List(
253 #[allow(missing_docs)]
254 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
255 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
257 match input
.chars().next() {
258 Some(c
) if !c
.is
_d
ig
it
(10) => {
259 return Error(ParseError
::Number(
260 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
261 (0, c
.len_utf8()).offset(start_loc
)));
263 None
=> return Error(ParseError
::Number(
264 Box
::new(ParseError
::UnexpectedEof
),
265 (0, 0).offset(start_loc
))),
272 // Before the decimal point
273 for (i
, c
) in input
.char_indices() {
279 if c
.is
_del
im
iter
() {
280 return Done(&input
[i
..],
281 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
282 (0, i
).offset(start_loc
)));
285 if !c
.is
_d
ig
it
(base
) {
286 return Error(ParseError
::Number(
287 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
288 (i
, i
).offset(start_loc
)));
291 end
= i
+ c
.len_utf8();
294 if input
[end
..].is
_empty
() {
295 return Done(&input
[end
..],
296 Sexp
::Int(input
.parse().expect("Already matched digits"),
297 (0, end
).offset(start_loc
)));
300 // After the decimal point
301 for (i
, c
) in input
[end
..].char_indices() {
302 if c
.is
_del
im
iter
() {
303 return Done(&input
[i
+end
..],
304 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
305 (0, end
+i
).offset(start_loc
)));
308 if !c
.is
_d
ig
it
(base
) {
309 return Error(ParseError
::Number(
310 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
311 (i
+end
, i
+end
).offset(start_loc
)));
315 Done(&input
[input
.len()..],
316 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
317 (0, input
.len()).offset(start_loc
)))
320 #[allow(missing_docs)]
321 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
322 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
324 match input
.chars().next() {
325 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
326 return Error(ParseError
::Symbol(
327 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
328 (0, 0).offset(start_loc
))),
329 Some(c
) if c
.is
_del
im
iter
() =>
330 return Error(ParseError
::Symbol(
331 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
332 (0, 0).offset(start_loc
))),
334 None
=> unreachable
!(),
337 for (i
, c
) in input
.char_indices() {
338 if c
.is
_del
im
iter
() {
339 return Done(&input
[i
..],
340 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
344 Done(&input
[input
.len()..],
345 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
348 #[allow(missing_docs)]
349 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
350 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
352 match input
.chars().next() {
355 return Error(ParseError::String(
356 Box::new(ParseError::Unexpected(c, start_loc)),
357 (0, 0).offset(start_loc))),
358 None => unreachable!(),
361 for (i, c) in input[1..].char_indices() {
363 return Done(&input
[2+i
..],
364 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
368 Error(ParseError
::String(
369 Box
::new(ParseError
::UnexpectedEof
),
370 (0, input
.len()).offset(start_loc
)))
373 #[allow(missing_docs)]
374 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
375 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
377 match input
.chars().nth(0) {
380 return Error(ParseError
::Char(
381 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
382 (0, 0).offset(start_loc
))),
384 return Error(ParseError
::Char(
385 Box
::new(ParseError
::UnexpectedEof
),
386 (0, 0).offset(start_loc
))),
389 match input
.chars().nth(1) {
392 return Error(ParseError
::Char(
393 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
394 (1, 1).offset(start_loc
))),
396 return Error(ParseError
::Char(
397 Box
::new(ParseError
::UnexpectedEof
),
398 (1, 1).offset(start_loc
)))
401 match input
.chars().nth(2) {
403 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
405 Error(ParseError
::Char(
406 Box
::new(ParseError
::UnexpectedEof
),
407 (2, 2).offset(start_loc
)))
412 // Tests ///////////////////////////////////////////////////////////////////////
419 use parser
::ParseResult
::*;
423 assert_eq
!(parse("1 2 3"), (vec
![
424 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3)), Sexp
::Int(3, (4, 5))
426 assert_eq
!(parse("1 2 )"), (vec
![
427 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))
428 ], Some(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
)'
, 4)), (4, 4)))));
432 fn test_parse_one() {
433 assert_eq
!(parse_one("1 2"),
434 Ok((Sexp
::Int(1, (0, 1)), " 2")));
438 fn test_parse_expression() {
439 assert_eq
!(parse_expression(" 1", 0),
440 Done("", Sexp
::Int(1, (1, 2))));
441 assert_eq
!(parse_expression("2.2", 0),
442 Done("", Sexp
::Float(2.2, (0, 3))));
443 assert_eq
!(parse_expression(" a", 0),
444 Done("", Sexp
::Sym("a".into
(), (1, 2))));
445 assert_eq
!(parse_expression("#\\c", 0),
446 Done("", Sexp
::Char('c'
, (0, 3))));
447 assert_eq
!(parse_expression(r
#""hi""#, 0),
448 Done("", Sexp
::Str("hi".into
(), (0, 4))));
449 assert_eq
!(parse_expression("()", 0),
450 Done("", Sexp
::List(vec
![], (0, 2))));
451 assert_eq
!(parse_expression("( 1 2 3 )", 0),
452 Done("", Sexp
::List(vec
![
453 Sexp
::Int(1, (2, 3)),
454 Sexp
::Int(2, (4, 5)),
455 Sexp
::Int(3, (6, 7)),
458 assert_eq
!(parse_expression("", 0),
459 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
463 fn test_parse_expr_quote() {
464 assert_eq
!(parse_expression("'a", 0),
465 Done("", Sexp
::List(vec
![
466 Sexp
::Sym("quote".into
(), (0, 1)),
467 Sexp
::Sym("a".into
(), (1, 2)),
469 assert_eq
!(parse_expression("'1", 0),
470 Done("", Sexp
::List(vec
![
471 Sexp
::Sym("quote".into
(), (0, 1)),
472 Sexp
::Int(1, (1, 2)),
474 assert_eq
!(parse_expression("' (1 2 3)", 0),
475 Done("", Sexp
::List(vec
![
476 Sexp
::Sym("quote".into
(), (0, 1)),
478 Sexp
::Int(1, (3, 4)),
479 Sexp
::Int(2, (5, 6)),
480 Sexp
::Int(3, (7, 8)),
483 assert_eq
!(parse_expression("'", 0),
484 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (1, 1))));
488 fn test_parse_list() {
489 assert_eq
!(parse_list("()", 0),
490 Done("", Sexp
::List(vec
![], (0, 2))));
491 assert_eq
!(parse_list("(1)", 0),
492 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3))));
493 assert_eq
!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp
::List(vec
![
494 Sexp
::Int(1, (4, 5)),
495 Sexp
::Int(2, (9, 10)),
496 Sexp
::Int(3, (12, 13)),
497 Sexp
::Sym("a".into
(), (14, 15)),
502 fn test_parse_number() {
503 assert_eq
!(parse_number("1", 0),
504 Done("", Sexp
::Int(1, (0, 1))));
505 assert_eq
!(parse_number(" 13", 0),
506 Done("", Sexp
::Int(13, (1, 3))));
507 assert_eq
!(parse_number("1.2", 0),
508 Done("", Sexp
::Float(1.2, (0, 3))));
509 assert_eq
!(parse_number("\u{3000}4.2", 0),
510 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
511 assert_eq
!(parse_number(" 42 ", 0),
512 Done(" ", Sexp
::Int(42, (2, 4))));
513 assert_eq
!(parse_number(" 4.2 ", 0),
514 Done(" ", Sexp
::Float(4.2, (1, 4))));
515 assert_eq
!(parse_number("1()", 0),
516 Done("()", Sexp
::Int(1, (0, 1))));
517 assert_eq
!(parse_number("3.6()", 0),
518 Done("()", Sexp
::Float(3.6, (0, 3))));
520 assert_eq
!(parse_number("", 0),
521 Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
522 assert_eq
!(parse_number("123a", 0),
523 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
524 assert_eq
!(parse_number("66.6+", 0),
525 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
529 fn test_parse_ident() {
530 assert_eq
!(parse_symbol("+", 0),
531 Done("", Sexp
::Sym("+".into
(), (0, 1))));
532 assert_eq
!(parse_symbol(" nil?", 0),
533 Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
534 assert_eq
!(parse_symbol(" ->socket", 0),
535 Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
536 assert_eq
!(parse_symbol("fib(", 0),
537 Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
538 assert_eq
!(parse_symbol("foo2", 0),
539 Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
541 // We reserve #foo for the implementation to do as it wishes
542 assert_eq
!(parse_symbol("#hi", 0),
543 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
544 // We reserve :foo for keywords
545 assert_eq
!(parse_symbol(":hi", 0),
546 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
548 assert_eq
!(parse_symbol("", 0),
549 Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
550 assert_eq
!(parse_symbol("0", 0),
551 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
552 assert_eq
!(parse_symbol("()", 0),
553 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
557 fn test_parse_string() {
558 assert_eq
!(parse_string(r
#""""#, 0),
559 Done("", Sexp
::Str("".into
(), (0, 2))));
560 assert_eq
!(parse_string(r
#""hello""#, 0),
561 Done("", Sexp
::Str("hello".into
(), (0, 7))));
562 assert_eq
!(parse_string(r
#" "this is a nice string
563 with
0123 things
in it
""#, 0),
564 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
566 assert_eq
!(parse_string("", 0),
567 Error(ParseError
::String(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
568 assert_eq
!(parse_string(r
#""hi"#, 0),
569 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
573 fn test_parse_char() {
574 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
575 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
576 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
578 assert_eq!(parse_character("", 0),
579 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
580 assert_eq!(parse_character("#", 0),
581 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
582 assert_eq!(parse_character("#\\", 0),
583 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
584 assert_eq!(parse_character("a", 0),
585 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));