]>
Witch of Git - ess/blob - src/parser.rs
1 //! Functions to parse s-expressions and expression atoms.
3 //! This module contains the core parsing machinery.
5 //! * If you're interested in getting a parsed s-expression that you can use,
6 //! then looking at [`parse`] and [`parse_one`] are your best bet.
7 //! * If you want to write your own parsers that contain s-expressions,
8 //! [`ParseResult`] and [`parse_expression`] will be the most useful to you.
10 //! [`parse`]: fn.parse.html
11 //! [`parse_one`]: fn.parse_one.html
12 //! [`ParseResult`]: enum.ParseResult.html
13 //! [`parse_expression`]: fn.parse_expression.html
16 use span
::{ByteSpan
, Span
};
18 // Parsing Types ///////////////////////////////////////////////////////////////
20 /// Represents what to do next in partially completed parsing.
22 /// `ParseResult` is returned from all intermediate parsers. If you just want to
23 /// get back parsed s-expressions, you won't need to worry about this type since
24 /// the top level parsers just return a `Result`.
26 /// If the parser failed to produce a result, it will return `Error`, and if it
27 /// succeeded we'll get the `Done` variant containing the value produced and the
28 /// rest of the text to work on.
29 #[derive(Debug, PartialEq, Eq, Clone)]
30 pub enum ParseResult
<'a
, T
, E
> {
31 /// The parser succeeded, this contains first the un-consumed portion of the
32 /// input then the result produced by parsing.
34 /// The parser failed, the `E` represents the reason for the failure.
38 /// Indicates how parsing failed.
40 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
41 /// cause of that error. Using this, `ParseError` variants can be chained to
42 /// produce a more complete picture of what exactly went wrong during parsing.
43 #[derive(Debug, PartialEq, Eq, Clone)]
44 pub enum ParseError
<Loc
= ByteSpan
>
48 /// Parsing reached the end of input where not expecting to, usually this
49 /// will be contained inside another `ParseError` like `String(box
50 /// UnexpectedEof, ...)` which indicates that the closing quote was never
53 /// Some problem occurred while parsing a list, along with the cause of that
55 List(Box
<ParseError
>, Loc
),
56 /// Some problem occurred while parsing an s-expression. This will only be
57 /// generated if EOF is reached unexpectedly at the beginning of
58 /// `parse_expression`, so it should probably be removed.
59 Sexp(Box
<ParseError
>, Loc
),
60 /// Some problem occurred while parsing a character literal, along with the
61 /// cause of the error.
62 Char(Box
<ParseError
>, Loc
),
63 /// Some problem occurred while parsing a string literal, along with the
64 /// cause of the error.
65 String(Box
<ParseError
>, Loc
),
66 /// Some problem occurred while parsing a symbol, along with the cause of
68 Symbol(Box
<ParseError
>, Loc
),
69 /// Some problem occurred while parsing a number literal, along with the
70 /// cause of the error.
71 Number(Box
<ParseError
>, Loc
),
72 /// An unexpected character was found. This will usually be the root cause
73 /// in some chain of `ParseError`s.
74 Unexpected(char, Loc
::Begin
),
76 use self::ParseResult
::*;
78 // Parsing Utilities ///////////////////////////////////////////////////////////
81 fn is_delimiter(&self) -> bool
;
84 impl IsDelimeter
for char {
85 fn is_delimiter(&self) -> bool
{
86 let delim_chars
= r
#";()[]{}"\`,"#;
87 self.is_whitespace() || delim_chars.contains(*self)
91 macro_rules! consume_whitespace {
92 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
93 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
94 (&$input[pos..], $start_loc + pos)
96 return Error($ErrorFn(
97 Box::new(ParseError::UnexpectedEof),
98 ($input.len(), $input.len()).offset($start_loc),
104 // Top Level Parsers ///////////////////////////////////////////////////////////
106 /// Parse a sequence of s-expressions.
108 /// This function returns `(Vec<Sexp>, Option<ParseError>)` so that it can
109 /// return partial results, for when some component parses successfully and a
110 /// later part fails.
114 /// If the text contains an invalid s-expression (imbalanced parenthesis,
115 /// quotes, invalid numbers like 123q, etc.) then the parser will stop and
116 /// return an error. Every s-expression before that point that successfully
117 /// parsed will still be returned.
121 /// We can get useful partial results
124 /// # use ess::parser::parse;
125 /// let (exprs, err) = parse("1 2 3 ( 4");
126 /// assert_eq!(exprs.len(), 3);
127 /// assert!(err.is_some());
129 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
130 let mut start_loc = 0;
131 let mut results = Vec::new();
133 match parse_expression(input, start_loc) {
134 Done(rest, result) => {
136 start_loc = result.get_loc().1;
137 results.push(result);
138 if rest.trim() == "" {
139 return (results, None);
143 return (results, Some(err));
149 /// Parses a single s-expression, ignoring any trailing text.
151 /// This function returns a pair of the parsed s-expression and the tail of the text.
155 /// If the text begins with an invalid s-expression (imbalanced parenthesis,
156 /// quotes, invalid numbers like 123q, etc.) then the parser will return an
157 /// error. Any text after the first s-expression doesn't affect the parsing.
162 /// # use ess::parser::parse_one;
163 /// let (expr, rest) = parse_one("1 (").unwrap();
164 /// assert_eq!(rest, " (");
166 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
167 match parse_expression(input, 0) {
168 Done(rest, result) => Ok((result, rest)),
169 Error(err) => Err(err),
173 // Core Parsers ////////////////////////////////////////////////////////////////
175 // TODO: All of these parsers deserve docs, but since they're somewhat internal
176 // parsers, it's less critical than the rest of the API.
178 #[allow(missing_docs)]
179 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
180 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
182 match input.chars().next() {
183 Some('0'...'9') => parse_number(input, start_loc),
184 Some('(') | Some('{') | Some('[') => parse_list(input, start_loc),
185 Some('#') => parse_character(input, start_loc),
186 Some('"') => parse_string(input, start_loc),
187 Some('
\''
) => match parse_expression(&input
[1..], start_loc
+ 1) {
188 Done(rest
, result
) => {
189 let span
= *result
.get_loc();
190 let quote_span
= (0, 1).offset(start_loc
);
194 vec
![Sexp
::Sym("quote".into
(), quote_span
), result
],
195 quote_span
.un
ion
(&span
),
201 Some('`'
) => match parse_expression(&input
[1..], start_loc
+ 1) {
202 Done(rest
, result
) => {
203 let span
= *result
.get_loc();
204 let quote_span
= (0, 1).offset(start_loc
);
208 vec
![Sexp
::Sym("quasiquote".into
(), quote_span
), result
],
209 quote_span
.un
ion
(&span
),
216 if input
[1..].starts_with('@'
) {
217 match parse_expression(&input
[2..], start_loc
+ 2) {
218 Done(rest
, result
) => {
219 let span
= *result
.get_loc();
220 let quote_span
= (0, 2).offset(start_loc
);
224 vec
![Sexp
::Sym("unquote-splicing".into
(), quote_span
), result
],
225 quote_span
.un
ion
(&span
),
232 match parse_expression(&input
[1..], start_loc
+ 1) {
233 Done(rest
, result
) => {
234 let span
= *result
.get_loc();
235 let quote_span
= (0, 1).offset(start_loc
);
239 vec
![Sexp
::Sym("unquote".into
(), quote_span
), result
],
240 quote_span
.un
ion
(&span
),
248 Some(_
) => parse_symbol(input
, start_loc
),
249 None
=> unreachable
!(),
253 #[allow(missing_docs)]
254 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
255 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
257 let first_char
= match input
.chars().nth(0) {
258 Some(c @ '
('
) | Some(c @ '
{'
) | Some(c @ '
['
) => c
,
260 return Error(ParseError
::List(
261 Box
::new(ParseError
::Unexpected(c
, 0)),
262 (0, 0).offset(start_loc
),
265 None
=> unreachable
!(),
268 let mut input
= &input
[1..];
269 let mut loc
= start_loc
+ 1;
270 let mut members
= Vec
::new();
273 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
278 match input
.chars().nth(0) {
279 Some(c @ '
)'
) | Some(c @ '
}'
) | Some(c @ '
]'
) => match (first_char
, c
) {
280 ('
('
, '
)'
) | ('
{'
, '
}'
) | ('
['
, '
]'
) => {
281 return Done(&input
[1..], Sexp
::List(members
, (start_loc
, loc
+ 1)))
284 return Error(ParseError
::List(
285 Box
::new(ParseError
::Unexpected(c
, loc
)),
291 None
=> unreachable
!(),
294 match parse_expression(input
, loc
) {
295 Done(new_input
, member
) => {
296 loc
= member
.get_loc().1;
297 members
.push(member
);
300 Error(err
) => return Error(ParseError
::List(Box
::new(err
), (0, 0).offset(loc
))),
305 #[allow(missing_docs)]
306 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
307 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
309 match input
.chars().next() {
310 Some(c
) if !c
.is
_d
ig
it
(10) => {
311 return Error(ParseError
::Number(
312 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
313 (0, c
.len_utf8()).offset(start_loc
),
317 return Error(ParseError
::Number(
318 Box
::new(ParseError
::UnexpectedEof
),
319 (0, 0).offset(start_loc
),
328 // Before the decimal point
329 for (i
, c
) in input
.char_indices() {
335 if c
.is
_del
im
iter
() {
339 input
[..i].parse().expect("Already matched digits"),
340 (0, i
).offset(start_loc
),
345 if !c
.is
_d
ig
it
(base
) {
346 return Error(ParseError
::Number(
347 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
348 (i
, i
).offset(start_loc
),
352 end
= i
+ c
.len_utf8();
355 if input
[end
..].is
_empty
() {
359 input
.parse().expect("Already matched digits"),
360 (0, end
).offset(start_loc
),
365 // After the decimal point
366 for (i
, c
) in input
[end
..].char_indices() {
367 if c
.is
_del
im
iter
() {
373 .expect("Already matched digits.digits"),
374 (0, end
+ i
).offset(start_loc
),
379 if !c
.is
_d
ig
it
(base
) {
380 return Error(ParseError
::Number(
381 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
382 (i
+ end
, i
+ end
).offset(start_loc
),
388 &input
[input
.len()..],
390 input
.parse().expect("Already matched digits.digits"),
391 (0, input
.len()).offset(start_loc
),
396 #[allow(missing_docs)]
397 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
398 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
400 match input
.chars().next() {
401 Some(c @ '
#') | Some(c @ ':') | Some(c @ '0'...'9') => {
402 return Error(ParseError
::Symbol(
403 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
404 (0, 0).offset(start_loc
),
407 Some(c
) if c
.is
_del
im
iter
() => {
408 return Error(ParseError
::Symbol(
409 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
410 (0, 0).offset(start_loc
),
414 None
=> unreachable
!(),
417 for (i
, c
) in input
.char_indices() {
418 if c
.is
_del
im
iter
() {
421 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)),
427 &input
[input
.len()..],
428 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)),
432 #[allow(missing_docs)]
433 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
434 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
436 match input
.chars().next() {
439 return Error(ParseError::String(
440 Box::new(ParseError::Unexpected(c, start_loc)),
441 (0, 0).offset(start_loc),
444 None => unreachable!(),
447 for (i, c) in input[1..].char_indices() {
451 Sexp
::Str(input
[1..=i
].into
(), (0, i
+ 2).offset(start_loc
)),
456 Error(ParseError
::String(
457 Box
::new(ParseError
::UnexpectedEof
),
458 (0, input
.len()).offset(start_loc
),
462 #[allow(missing_docs)]
463 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
464 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
466 match input
.chars().nth(0) {
469 return Error(ParseError
::Char(
470 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
471 (0, 0).offset(start_loc
),
475 return Error(ParseError
::Char(
476 Box
::new(ParseError
::UnexpectedEof
),
477 (0, 0).offset(start_loc
),
482 match input
.chars().nth(1) {
485 return Error(ParseError
::Char(
486 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
487 (1, 1).offset(start_loc
),
491 return Error(ParseError
::Char(
492 Box
::new(ParseError
::UnexpectedEof
),
493 (1, 1).offset(start_loc
),
498 match input
.chars().nth(2) {
499 Some(c
) => Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
500 None
=> Error(ParseError
::Char(
501 Box
::new(ParseError
::UnexpectedEof
),
502 (2, 2).offset(start_loc
),
507 // Tests ///////////////////////////////////////////////////////////////////////
511 use parser
::ParseResult
::*;
522 Sexp
::Int(1, (0, 1)),
523 Sexp
::Int(2, (2, 3)),
532 vec
![Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))],
533 Some(ParseError
::Symbol(
534 Box
::new(ParseError
::Unexpected('
)'
, 4)),
542 fn test_parse_one() {
543 assert_eq
!(parse_one("1 2"), Ok((Sexp
::Int(1, (0, 1)), " 2")));
547 fn test_parse_expression() {
548 assert_eq
!(parse_expression(" 1", 0), Done("", Sexp
::Int(1, (1, 2))));
550 parse_expression("2.2", 0),
551 Done("", Sexp
::Float(2.2, (0, 3)))
554 parse_expression(" a", 0),
555 Done("", Sexp
::Sym("a".into
(), (1, 2)))
558 parse_expression("#\\c", 0),
559 Done("", Sexp
::Char('c'
, (0, 3)))
562 parse_expression(r
#""hi""#, 0),
563 Done("", Sexp
::Str("hi".into
(), (0, 4)))
566 parse_expression("()", 0),
567 Done("", Sexp
::List(vec
![], (0, 2)))
570 parse_expression("( 1 2 3 )", 0),
575 Sexp
::Int(1, (2, 3)),
576 Sexp
::Int(2, (4, 5)),
577 Sexp
::Int(3, (6, 7)),
585 parse_expression("", 0),
586 Error(ParseError
::Sexp(
587 Box
::new(ParseError
::UnexpectedEof
),
594 fn test_parse_expr_quote() {
596 parse_expression("'a", 0),
601 Sexp
::Sym("quote".into
(), (0, 1)),
602 Sexp
::Sym("a".into
(), (1, 2)),
609 parse_expression("'1", 0),
613 vec
![Sexp
::Sym("quote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
619 parse_expression("' (1 2 3)", 0),
624 Sexp
::Sym("quote".into
(), (0, 1)),
627 Sexp
::Int(1, (3, 4)),
628 Sexp
::Int(2, (5, 6)),
629 Sexp
::Int(3, (7, 8)),
640 parse_expression("'", 0),
641 Error(ParseError
::Sexp(
642 Box
::new(ParseError
::UnexpectedEof
),
647 parse_expression("`'", 0),
648 Error(ParseError
::Sexp(
649 Box
::new(ParseError
::UnexpectedEof
),
656 fn test_parse_expr_quasiquote() {
658 parse_expression("`a", 0),
663 Sexp
::Sym("quasiquote".into
(), (0, 1)),
664 Sexp
::Sym("a".into
(), (1, 2)),
671 parse_expression("`1", 0),
675 vec
![Sexp
::Sym("quasiquote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
681 parse_expression("` (1 2 3)", 0),
686 Sexp
::Sym("quasiquote".into
(), (0, 1)),
689 Sexp
::Int(1, (3, 4)),
690 Sexp
::Int(2, (5, 6)),
691 Sexp
::Int(3, (7, 8)),
701 parse_expression("`'a", 0),
706 Sexp
::Sym("quasiquote".into
(), (0, 1)),
709 Sexp
::Sym("quote".into
(), (1, 2)),
710 Sexp
::Sym("a".into
(), (2, 3)),
721 parse_expression("`", 0),
722 Error(ParseError
::Sexp(
723 Box
::new(ParseError
::UnexpectedEof
),
730 fn test_parse_expr_unquote() {
732 parse_expression(",a", 0),
737 Sexp
::Sym("unquote".into
(), (0, 1)),
738 Sexp
::Sym("a".into
(), (1, 2)),
745 parse_expression(",1", 0),
749 vec
![Sexp
::Sym("unquote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
755 parse_expression(", (1 2 3)", 0),
760 Sexp
::Sym("unquote".into
(), (0, 1)),
763 Sexp
::Int(1, (3, 4)),
764 Sexp
::Int(2, (5, 6)),
765 Sexp
::Int(3, (7, 8)),
775 parse_expression("`,a", 0),
780 Sexp
::Sym("quasiquote".into
(), (0, 1)),
783 Sexp
::Sym("unquote".into
(), (1, 2)),
784 Sexp
::Sym("a".into
(), (2, 3)),
794 parse_expression("`(,@a)", 0),
799 Sexp
::Sym("quasiquote".into
(), (0, 1)),
803 Sexp
::Sym("unquote-splicing".into
(), (2, 4)),
804 Sexp
::Sym("a".into
(), (4, 5)),
817 parse_expression(",", 0),
818 Error(ParseError
::Sexp(
819 Box
::new(ParseError
::UnexpectedEof
),
824 parse_expression(",@", 0),
825 Error(ParseError
::Sexp(
826 Box
::new(ParseError
::UnexpectedEof
),
833 fn test_parse_list() {
834 assert_eq
!(parse_list("()", 0), Done("", Sexp
::List(vec
![], (0, 2))));
836 parse_list("(1)", 0),
837 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3)))
840 parse_list(" ( 1 2 3 a )", 0),
845 Sexp
::Int(1, (4, 5)),
846 Sexp
::Int(2, (9, 10)),
847 Sexp
::Int(3, (12, 13)),
848 Sexp
::Sym("a".into
(), (14, 15)),
857 fn test_parse_number() {
858 assert_eq
!(parse_number("1", 0), Done("", Sexp
::Int(1, (0, 1))));
859 assert_eq
!(parse_number(" 13", 0), Done("", Sexp
::Int(13, (1, 3))));
860 assert_eq
!(parse_number("1.2", 0), Done("", Sexp
::Float(1.2, (0, 3))));
862 parse_number("\u{3000}4.2", 0),
863 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8())))
865 assert_eq
!(parse_number(" 42 ", 0), Done(" ", Sexp
::Int(42, (2, 4))));
867 parse_number(" 4.2 ", 0),
868 Done(" ", Sexp
::Float(4.2, (1, 4)))
870 assert_eq
!(parse_number("1()", 0), Done("()", Sexp
::Int(1, (0, 1))));
872 parse_number("3.6()", 0),
873 Done("()", Sexp
::Float(3.6, (0, 3)))
878 Error(ParseError
::Number(
879 Box
::new(ParseError
::UnexpectedEof
),
884 parse_number("123a", 0),
885 Error(ParseError
::Number(
886 Box
::new(ParseError
::Unexpected('a'
, 3)),
891 parse_number("66.6+", 0),
892 Error(ParseError
::Number(
893 Box
::new(ParseError
::Unexpected('
+'
, 4)),
900 fn test_parse_ident() {
902 parse_symbol("+", 0),
903 Done("", Sexp
::Sym("+".into
(), (0, 1)))
906 parse_symbol(" nil?", 0),
907 Done("", Sexp
::Sym("nil?".into
(), (1, 5)))
910 parse_symbol(" ->socket", 0),
911 Done("", Sexp
::Sym("->socket".into
(), (1, 9)))
914 parse_symbol("fib(", 0),
915 Done("(", Sexp
::Sym("fib".into
(), (0, 3)))
918 parse_symbol("foo2", 0),
919 Done("", Sexp
::Sym("foo2".into
(), (0, 4)))
922 // We reserve #foo for the implementation to do as it wishes
924 parse_symbol("#hi", 0),
925 Error(ParseError
::Symbol(
926 Box
::new(ParseError
::Unexpected('
#', 0)),
930 // We reserve :foo for keywords
932 parse_symbol(":hi", 0),
933 Error(ParseError
::Symbol(
934 Box
::new(ParseError
::Unexpected('
:'
, 0)),
941 Error(ParseError
::Symbol(
942 Box
::new(ParseError
::UnexpectedEof
),
947 parse_symbol("0", 0),
948 Error(ParseError
::Symbol(
949 Box
::new(ParseError
::Unexpected('
0'
, 0)),
954 parse_symbol("()", 0),
955 Error(ParseError
::Symbol(
956 Box
::new(ParseError
::Unexpected('
('
, 0)),
963 fn test_parse_string() {
965 parse_string(r
#""""#, 0),
966 Done("", Sexp
::Str("".into
(), (0, 2)))
969 parse_string(r
#""hello""#, 0),
970 Done("", Sexp
::Str("hello".into
(), (0, 7)))
974 r
#" "this is a nice string
975 with
0123 things
in it
""#,
981 "this is a nice string\nwith 0123 things in it".into
(),
989 Error(ParseError
::String(
990 Box
::new(ParseError
::UnexpectedEof
),
995 parse_string(r
#""hi"#, 0),
996 Error(ParseError::String(
997 Box::new(ParseError::UnexpectedEof),
1004 fn test_parse_char() {
1006 parse_character(r#"#\""#, 0),
1007 Done("", Sexp
::Char('
"', (0, 3)))
1010 parse_character(r#"#\ "#, 0),
1011 Done("", Sexp::Char(' ', (0, 3)))
1014 parse_character(r#" #\\"#, 0),
1015 Done("", Sexp::Char('\\', (2, 5)))
1019 parse_character("", 0),
1020 Error(ParseError::Char(
1021 Box::new(ParseError::UnexpectedEof),
1026 parse_character("#", 0),
1027 Error(ParseError::Char(
1028 Box::new(ParseError::UnexpectedEof),
1033 parse_character("#\\", 0),
1034 Error(ParseError::Char(
1035 Box::new(ParseError::UnexpectedEof),
1040 parse_character("a", 0),
1041 Error(ParseError::Char(
1042 Box::new(ParseError::Unexpected('a', 0)),