]>
Witch of Git - ess/blob - src/parser.rs
1 //! Functions to parse s-expressions and expression atoms.
3 //! This module contains the core parsing machinery.
5 //! * If you're interested in getting a parsed s-expression that you can use,
6 //! then looking at [`parse`] and [`parse_one`] are your best bet.
7 //! * If you want to write your own parsers that contain s-expressions,
8 //! [`ParseResult`] and [`parse_expression`] will be the most useful to you.
10 //! [`parse`]: fn.parse.html
11 //! [`parse_one`]: fn.parse_one.html
12 //! [`ParseResult`]: enum.ParseResult.html
13 //! [`parse_expression`]: fn.parse_expression.html
16 use span
::{ByteSpan
, Span
};
18 // Parsing Types ///////////////////////////////////////////////////////////////
20 /// Represents what to do next in partially completed parsing.
22 /// `ParseResult` is returned from all intermediate parsers. If you just want to
23 /// get back parsed s-expressions, you won't need to worry about this type since
24 /// the top level parsers just return a `Result`.
26 /// If the parser failed to produce a result, it will return `Error`, and if it
27 /// succeeded we'll get the `Done` variant containing the value produced and the
28 /// rest of the text to work on.
29 #[derive(Debug, PartialEq, Eq, Clone)]
30 pub enum ParseResult
<'a
, T
, E
> {
31 /// The parser succeeded, this contains first the un-consumed portion of the
32 /// input then the result produced by parsing.
34 /// The parser failed, the `E` represents the reason for the failure.
38 /// Indicates how parsing failed.
40 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
41 /// cause of that error. Using this, `ParseError` variants can be chained to
42 /// produce a more complete picture of what exactly went wrong during parsing.
43 #[derive(Debug, PartialEq, Eq, Clone)]
44 pub enum ParseError
<Loc
= ByteSpan
>
48 /// Parsing reached the end of input where not expecting to, usually this
49 /// will be contained inside another `ParseError` like `String(box
50 /// UnexpectedEof, ...)` which indicates that the closing quote was never
53 /// Some problem occurred while parsing a list, along with the cause of that
55 List(Box
<ParseError
>, Loc
),
56 /// Some problem occurred while parsing an s-expression. This will only be
57 /// generated if EOF is reached unexpectedly at the beginning of
58 /// `parse_expression`, so it should probably be removed.
59 Sexp(Box
<ParseError
>, Loc
),
60 /// Some problem occurred while parsing a character literal, along with the
61 /// cause of the error.
62 Char(Box
<ParseError
>, Loc
),
63 /// Some problem occurred while parsing a string literal, along with the
64 /// cause of the error.
65 String(Box
<ParseError
>, Loc
),
66 /// Some problem occurred while parsing a symbol, along with the cause of
68 Symbol(Box
<ParseError
>, Loc
),
69 /// Some problem occurred while parsing a number literal, along with the
70 /// cause of the error.
71 Number(Box
<ParseError
>, Loc
),
72 /// An unexpected character was found. This will usually be the root cause
73 /// in some chain of `ParseError`s.
74 Unexpected(char, Loc
::Begin
),
76 use self::ParseResult
::*;
78 // Parsing Utilities ///////////////////////////////////////////////////////////
81 fn is_delimiter(&self) -> bool
;
84 impl IsDelimeter
for char {
85 fn is_delimiter(&self) -> bool
{
101 macro_rules! consume_whitespace {
102 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
103 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
104 (&$input[pos..], $start_loc + pos)
106 return Error($ErrorFn(
107 Box::new(ParseError::UnexpectedEof),
108 ($input.len(), $input.len()).offset($start_loc),
114 // Top Level Parsers ///////////////////////////////////////////////////////////
116 /// Parse a sequence of s-expressions.
118 /// This function returns `(Vec<Sexp>, Option<ParseError>)` so that it can
119 /// return partial results, for when some component parses successfully and a
120 /// later part fails.
124 /// If the text contains an invalid s-expression (imbalanced parenthesis,
125 /// quotes, invalid numbers like 123q, etc.) then the parser will stop and
126 /// return an error. Every s-expression before that point that successfully
127 /// parsed will still be returned.
131 /// We can get useful partial results
134 /// # use ess::parser::parse;
135 /// let (exprs, err) = parse("1 2 3 ( 4");
136 /// assert_eq!(exprs.len(), 3);
137 /// assert!(err.is_some());
139 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
140 let mut start_loc = 0;
141 let mut results = Vec::new();
143 match parse_expression(input, start_loc) {
144 Done(rest, result) => {
146 start_loc = result.get_loc().1;
147 results.push(result);
148 if rest.trim() == "" {
149 return (results, None);
153 return (results, Some(err));
159 /// Parses a single s-expression, ignoring any trailing text.
161 /// This function returns a pair of the parsed s-expression and the tail of the text.
165 /// If the text begins with an invalid s-expression (imbalanced parenthesis,
166 /// quotes, invalid numbers like 123q, etc.) then the parser will return an
167 /// error. Any text after the first s-expression doesn't affect the parsing.
172 /// # use ess::parser::parse_one;
173 /// let (expr, rest) = parse_one("1 (").unwrap();
174 /// assert_eq!(rest, " (");
176 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
177 match parse_expression(input, 0) {
178 Done(rest, result) => Ok((result, rest)),
179 Error(err) => Err(err),
183 // Core Parsers ////////////////////////////////////////////////////////////////
185 // TODO: All of these parsers deserve docs, but since they're somewhat internal
186 // parsers, it's less critical than the rest of the API.
188 #[allow(missing_docs)]
189 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
190 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
192 match input.chars().next() {
193 Some('0'...'9') => parse_number(input, start_loc),
194 Some('(') | Some('{') | Some('[') => parse_list(input, start_loc),
195 Some('#') => parse_character(input, start_loc),
196 Some('"'
) => parse_string(input
, start_loc
),
197 Some('
\''
) => match parse_expression(&input
[1..], start_loc
+ 1) {
198 Done(rest
, result
) => {
199 let span
= *result
.get_loc();
200 let quote_span
= (0, 1).offset(start_loc
);
204 vec
![Sexp
::Sym("quote".into
(), quote_span
), result
],
205 quote_span
.un
ion
(&span
),
211 Some('`'
) => match parse_expression(&input
[1..], start_loc
+ 1) {
212 Done(rest
, result
) => {
213 let span
= *result
.get_loc();
214 let quote_span
= (0, 1).offset(start_loc
);
218 vec
![Sexp
::Sym("quasiquote".into
(), quote_span
), result
],
219 quote_span
.un
ion
(&span
),
226 if input
[1..].chars().next() == Some('@'
) {
227 match parse_expression(&input
[2..], start_loc
+ 2) {
228 Done(rest
, result
) => {
229 let span
= *result
.get_loc();
230 let quote_span
= (0, 2).offset(start_loc
);
234 vec
![Sexp
::Sym("unquote-splicing".into
(), quote_span
), result
],
235 quote_span
.un
ion
(&span
),
242 match parse_expression(&input
[1..], start_loc
+ 1) {
243 Done(rest
, result
) => {
244 let span
= *result
.get_loc();
245 let quote_span
= (0, 1).offset(start_loc
);
249 vec
![Sexp
::Sym("unquote".into
(), quote_span
), result
],
250 quote_span
.un
ion
(&span
),
258 Some(_
) => parse_symbol(input
, start_loc
),
259 None
=> unreachable
!(),
263 #[allow(missing_docs)]
264 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
265 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
267 let first_char
= match input
.chars().nth(0) {
268 Some(c @ '
('
) | Some(c @ '
{'
) | Some(c @ '
['
) => c
,
270 return Error(ParseError
::List(
271 Box
::new(ParseError
::Unexpected(c
, 0)),
272 (0, 0).offset(start_loc
),
275 None
=> unreachable
!(),
278 let mut input
= &input
[1..];
279 let mut loc
= start_loc
+ 1;
280 let mut members
= Vec
::new();
283 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
288 match input
.chars().nth(0) {
289 Some(c @ '
)'
) | Some(c @ '
}'
) | Some(c @ '
]'
) => match (first_char
, c
) {
290 ('
('
, '
)'
) | ('
{'
, '
}'
) | ('
['
, '
]'
) => {
291 return Done(&input
[1..], Sexp
::List(members
, (start_loc
, loc
+ 1)))
294 return Error(ParseError
::List(
295 Box
::new(ParseError
::Unexpected(c
, loc
)),
301 None
=> unreachable
!(),
304 match parse_expression(input
, loc
) {
305 Done(new_input
, member
) => {
306 loc
= member
.get_loc().1;
307 members
.push(member
);
310 Error(err
) => return Error(ParseError
::List(Box
::new(err
), (0, 0).offset(loc
))),
315 #[allow(missing_docs)]
316 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
317 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
319 match input
.chars().next() {
320 Some(c
) if !c
.is
_d
ig
it
(10) => {
321 return Error(ParseError
::Number(
322 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
323 (0, c
.len_utf8()).offset(start_loc
),
327 return Error(ParseError
::Number(
328 Box
::new(ParseError
::UnexpectedEof
),
329 (0, 0).offset(start_loc
),
338 // Before the decimal point
339 for (i
, c
) in input
.char_indices() {
345 if c
.is
_del
im
iter
() {
349 input
[..i].parse().expect("Already matched digits"),
350 (0, i
).offset(start_loc
),
355 if !c
.is
_d
ig
it
(base
) {
356 return Error(ParseError
::Number(
357 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
358 (i
, i
).offset(start_loc
),
362 end
= i
+ c
.len_utf8();
365 if input
[end
..].is
_empty
() {
369 input
.parse().expect("Already matched digits"),
370 (0, end
).offset(start_loc
),
375 // After the decimal point
376 for (i
, c
) in input
[end
..].char_indices() {
377 if c
.is
_del
im
iter
() {
383 .expect("Already matched digits.digits"),
384 (0, end
+ i
).offset(start_loc
),
389 if !c
.is
_d
ig
it
(base
) {
390 return Error(ParseError
::Number(
391 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
392 (i
+ end
, i
+ end
).offset(start_loc
),
398 &input
[input
.len()..],
400 input
.parse().expect("Already matched digits.digits"),
401 (0, input
.len()).offset(start_loc
),
406 #[allow(missing_docs)]
407 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
408 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
410 match input
.chars().next() {
411 Some(c @ '
#') | Some(c @ ':') | Some(c @ '0'...'9') => {
412 return Error(ParseError
::Symbol(
413 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
414 (0, 0).offset(start_loc
),
417 Some(c
) if c
.is
_del
im
iter
() => {
418 return Error(ParseError
::Symbol(
419 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
420 (0, 0).offset(start_loc
),
424 None
=> unreachable
!(),
427 for (i
, c
) in input
.char_indices() {
428 if c
.is
_del
im
iter
() {
431 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)),
437 &input
[input
.len()..],
438 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)),
442 #[allow(missing_docs)]
443 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
444 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
446 match input
.chars().next() {
449 return Error(ParseError::String(
450 Box::new(ParseError::Unexpected(c, start_loc)),
451 (0, 0).offset(start_loc),
454 None => unreachable!(),
457 for (i, c) in input[1..].char_indices() {
461 Sexp
::Str(input
[1..i + 1].into
(), (0, i
+ 2).offset(start_loc
)),
466 Error(ParseError
::String(
467 Box
::new(ParseError
::UnexpectedEof
),
468 (0, input
.len()).offset(start_loc
),
472 #[allow(missing_docs)]
473 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
474 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
476 match input
.chars().nth(0) {
479 return Error(ParseError
::Char(
480 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
481 (0, 0).offset(start_loc
),
485 return Error(ParseError
::Char(
486 Box
::new(ParseError
::UnexpectedEof
),
487 (0, 0).offset(start_loc
),
492 match input
.chars().nth(1) {
495 return Error(ParseError
::Char(
496 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
497 (1, 1).offset(start_loc
),
501 return Error(ParseError
::Char(
502 Box
::new(ParseError
::UnexpectedEof
),
503 (1, 1).offset(start_loc
),
508 match input
.chars().nth(2) {
509 Some(c
) => Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
510 None
=> Error(ParseError
::Char(
511 Box
::new(ParseError
::UnexpectedEof
),
512 (2, 2).offset(start_loc
),
517 // Tests ///////////////////////////////////////////////////////////////////////
521 use parser
::ParseResult
::*;
532 Sexp
::Int(1, (0, 1)),
533 Sexp
::Int(2, (2, 3)),
542 vec
![Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))],
543 Some(ParseError
::Symbol(
544 Box
::new(ParseError
::Unexpected('
)'
, 4)),
552 fn test_parse_one() {
553 assert_eq
!(parse_one("1 2"), Ok((Sexp
::Int(1, (0, 1)), " 2")));
557 fn test_parse_expression() {
558 assert_eq
!(parse_expression(" 1", 0), Done("", Sexp
::Int(1, (1, 2))));
560 parse_expression("2.2", 0),
561 Done("", Sexp
::Float(2.2, (0, 3)))
564 parse_expression(" a", 0),
565 Done("", Sexp
::Sym("a".into
(), (1, 2)))
568 parse_expression("#\\c", 0),
569 Done("", Sexp
::Char('c'
, (0, 3)))
572 parse_expression(r
#""hi""#, 0),
573 Done("", Sexp
::Str("hi".into
(), (0, 4)))
576 parse_expression("()", 0),
577 Done("", Sexp
::List(vec
![], (0, 2)))
580 parse_expression("( 1 2 3 )", 0),
585 Sexp
::Int(1, (2, 3)),
586 Sexp
::Int(2, (4, 5)),
587 Sexp
::Int(3, (6, 7)),
595 parse_expression("", 0),
596 Error(ParseError
::Sexp(
597 Box
::new(ParseError
::UnexpectedEof
),
604 fn test_parse_expr_quote() {
606 parse_expression("'a", 0),
611 Sexp
::Sym("quote".into
(), (0, 1)),
612 Sexp
::Sym("a".into
(), (1, 2)),
619 parse_expression("'1", 0),
623 vec
![Sexp
::Sym("quote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
629 parse_expression("' (1 2 3)", 0),
634 Sexp
::Sym("quote".into
(), (0, 1)),
637 Sexp
::Int(1, (3, 4)),
638 Sexp
::Int(2, (5, 6)),
639 Sexp
::Int(3, (7, 8)),
650 parse_expression("'", 0),
651 Error(ParseError
::Sexp(
652 Box
::new(ParseError
::UnexpectedEof
),
657 parse_expression("`'", 0),
658 Error(ParseError
::Sexp(
659 Box
::new(ParseError
::UnexpectedEof
),
666 fn test_parse_expr_quasiquote() {
668 parse_expression("`a", 0),
673 Sexp
::Sym("quasiquote".into
(), (0, 1)),
674 Sexp
::Sym("a".into
(), (1, 2)),
681 parse_expression("`1", 0),
685 vec
![Sexp
::Sym("quasiquote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
691 parse_expression("` (1 2 3)", 0),
696 Sexp
::Sym("quasiquote".into
(), (0, 1)),
699 Sexp
::Int(1, (3, 4)),
700 Sexp
::Int(2, (5, 6)),
701 Sexp
::Int(3, (7, 8)),
711 parse_expression("`'a", 0),
716 Sexp
::Sym("quasiquote".into
(), (0, 1)),
719 Sexp
::Sym("quote".into
(), (1, 2)),
720 Sexp
::Sym("a".into
(), (2, 3)),
731 parse_expression("`", 0),
732 Error(ParseError
::Sexp(
733 Box
::new(ParseError
::UnexpectedEof
),
740 fn test_parse_expr_unquote() {
742 parse_expression(",a", 0),
747 Sexp
::Sym("unquote".into
(), (0, 1)),
748 Sexp
::Sym("a".into
(), (1, 2)),
755 parse_expression(",1", 0),
759 vec
![Sexp
::Sym("unquote".into
(), (0, 1)), Sexp
::Int(1, (1, 2)),],
765 parse_expression(", (1 2 3)", 0),
770 Sexp
::Sym("unquote".into
(), (0, 1)),
773 Sexp
::Int(1, (3, 4)),
774 Sexp
::Int(2, (5, 6)),
775 Sexp
::Int(3, (7, 8)),
785 parse_expression("`,a", 0),
790 Sexp
::Sym("quasiquote".into
(), (0, 1)),
793 Sexp
::Sym("unquote".into
(), (1, 2)),
794 Sexp
::Sym("a".into
(), (2, 3)),
804 parse_expression("`(,@a)", 0),
809 Sexp
::Sym("quasiquote".into
(), (0, 1)),
813 Sexp
::Sym("unquote-splicing".into
(), (2, 4)),
814 Sexp
::Sym("a".into
(), (4, 5)),
827 parse_expression(",", 0),
828 Error(ParseError
::Sexp(
829 Box
::new(ParseError
::UnexpectedEof
),
834 parse_expression(",@", 0),
835 Error(ParseError
::Sexp(
836 Box
::new(ParseError
::UnexpectedEof
),
843 fn test_parse_list() {
844 assert_eq
!(parse_list("()", 0), Done("", Sexp
::List(vec
![], (0, 2))));
846 parse_list("(1)", 0),
847 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3)))
850 parse_list(" ( 1 2 3 a )", 0),
855 Sexp
::Int(1, (4, 5)),
856 Sexp
::Int(2, (9, 10)),
857 Sexp
::Int(3, (12, 13)),
858 Sexp
::Sym("a".into
(), (14, 15)),
867 fn test_parse_number() {
868 assert_eq
!(parse_number("1", 0), Done("", Sexp
::Int(1, (0, 1))));
869 assert_eq
!(parse_number(" 13", 0), Done("", Sexp
::Int(13, (1, 3))));
870 assert_eq
!(parse_number("1.2", 0), Done("", Sexp
::Float(1.2, (0, 3))));
872 parse_number("\u{3000}4.2", 0),
873 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8())))
875 assert_eq
!(parse_number(" 42 ", 0), Done(" ", Sexp
::Int(42, (2, 4))));
877 parse_number(" 4.2 ", 0),
878 Done(" ", Sexp
::Float(4.2, (1, 4)))
880 assert_eq
!(parse_number("1()", 0), Done("()", Sexp
::Int(1, (0, 1))));
882 parse_number("3.6()", 0),
883 Done("()", Sexp
::Float(3.6, (0, 3)))
888 Error(ParseError
::Number(
889 Box
::new(ParseError
::UnexpectedEof
),
894 parse_number("123a", 0),
895 Error(ParseError
::Number(
896 Box
::new(ParseError
::Unexpected('a'
, 3)),
901 parse_number("66.6+", 0),
902 Error(ParseError
::Number(
903 Box
::new(ParseError
::Unexpected('
+'
, 4)),
910 fn test_parse_ident() {
912 parse_symbol("+", 0),
913 Done("", Sexp
::Sym("+".into
(), (0, 1)))
916 parse_symbol(" nil?", 0),
917 Done("", Sexp
::Sym("nil?".into
(), (1, 5)))
920 parse_symbol(" ->socket", 0),
921 Done("", Sexp
::Sym("->socket".into
(), (1, 9)))
924 parse_symbol("fib(", 0),
925 Done("(", Sexp
::Sym("fib".into
(), (0, 3)))
928 parse_symbol("foo2", 0),
929 Done("", Sexp
::Sym("foo2".into
(), (0, 4)))
932 // We reserve #foo for the implementation to do as it wishes
934 parse_symbol("#hi", 0),
935 Error(ParseError
::Symbol(
936 Box
::new(ParseError
::Unexpected('
#', 0)),
940 // We reserve :foo for keywords
942 parse_symbol(":hi", 0),
943 Error(ParseError
::Symbol(
944 Box
::new(ParseError
::Unexpected('
:'
, 0)),
951 Error(ParseError
::Symbol(
952 Box
::new(ParseError
::UnexpectedEof
),
957 parse_symbol("0", 0),
958 Error(ParseError
::Symbol(
959 Box
::new(ParseError
::Unexpected('
0'
, 0)),
964 parse_symbol("()", 0),
965 Error(ParseError
::Symbol(
966 Box
::new(ParseError
::Unexpected('
('
, 0)),
973 fn test_parse_string() {
975 parse_string(r
#""""#, 0),
976 Done("", Sexp
::Str("".into
(), (0, 2)))
979 parse_string(r
#""hello""#, 0),
980 Done("", Sexp
::Str("hello".into
(), (0, 7)))
984 r
#" "this is a nice string
985 with
0123 things
in it
""#,
991 "this is a nice string\nwith 0123 things in it".into
(),
999 Error(ParseError
::String(
1000 Box
::new(ParseError
::UnexpectedEof
),
1005 parse_string(r
#""hi"#, 0),
1006 Error(ParseError::String(
1007 Box::new(ParseError::UnexpectedEof),
1014 fn test_parse_char() {
1016 parse_character(r#"#\""#, 0),
1017 Done("", Sexp
::Char('
"', (0, 3)))
1020 parse_character(r#"#\ "#, 0),
1021 Done("", Sexp::Char(' ', (0, 3)))
1024 parse_character(r#" #\\"#, 0),
1025 Done("", Sexp::Char('\\', (2, 5)))
1029 parse_character("", 0),
1030 Error(ParseError::Char(
1031 Box::new(ParseError::UnexpectedEof),
1036 parse_character("#", 0),
1037 Error(ParseError::Char(
1038 Box::new(ParseError::UnexpectedEof),
1043 parse_character("#\\", 0),
1044 Error(ParseError::Char(
1045 Box::new(ParseError::UnexpectedEof),
1050 parse_character("a", 0),
1051 Error(ParseError::Char(
1052 Box::new(ParseError::Unexpected('a', 0)),