]>
Witch of Git - ess/blob - src/parser.rs
2 use span
::{Span
, ByteSpan
};
5 // Parsing Types ///////////////////////////////////////////////////////////////
7 /// Represents what to do next in partially completed parsing.
9 /// `ParseResult` is returned from all intermediate parsers. If you just want to
10 /// get back parsed S-expressions, you won't need to worry about this type since
11 /// the top level parsers just return a `Result`.
13 /// If the parser failed to produce a result, it will return `Error`, and if it
14 /// succeeded we'll get the `Done` variant containing the value produced and the
15 /// rest of the text to work on.
16 #[derive(Debug, PartialEq, Eq, Clone)]
17 pub enum ParseResult
<'a
, T
, E
> {
18 /// The parser succeeded, this contains first the un-consumed portion of the
19 /// input then the result produced by parsing.
21 /// The parser failed, the `E` represents the reason for the failure.
25 /// Indicates how parsing failed.
27 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
28 /// cause of that error. Using this, `ParseError` variants can be chained to
29 /// produce a more complete picture of what exactly went wrong during parsing.
30 #[derive(Debug, PartialEq, Eq, Clone)]
31 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
32 /// Parsing reached the end of input where not expecting to, usually this
33 /// will be contained inside another `ParseError` like `String(box
34 /// UnexpectedEof, ...)` which indicates that the closing quote was never
37 /// Some problem occurred while parsing a list, along with the cause of that
39 List(Box
<ParseError
>, Loc
),
40 /// Some problem occurred while parsing an s-expression. This will only be
41 /// generated if EOF is reached unexpectedly at the beginning of
42 /// `parse_expression`, so it should probably be removed.
43 Sexp(Box
<ParseError
>, Loc
),
44 /// Some problem occurred while parsing a character literal, along with the
45 /// cause of the error.
46 Char(Box
<ParseError
>, Loc
),
47 /// Some problem occurred while parsing a string literal, along with the
48 /// cause of the error.
49 String(Box
<ParseError
>, Loc
),
50 /// Some problem occurred while parsing a symbol, along with the cause of
52 Symbol(Box
<ParseError
>, Loc
),
53 /// Some problem occurred while parsing a number literal, along with the
54 /// cause of the error.
55 Number(Box
<ParseError
>, Loc
),
56 /// An unexpected character was found. This will usually be the root cause
57 /// in some chain of `ParseError`s.
58 Unexpected(char, Loc
::Begin
),
60 use self::ParseResult
::*;
63 // Parsing Utilities ///////////////////////////////////////////////////////////
66 fn is_delimiter(&self) -> bool
;
69 impl IsDelimeter
for char {
70 fn is_delimiter(&self) -> bool
{
71 self.is
_wh
itespace
() || *self == '
;'
72 || *self == '
('
|| *self == '
)'
73 || *self == '
['
|| *self == '
]'
74 || *self == '
{'
|| *self == '
}'
75 || *self == '
"' || *self == '\''
76 || *self == '`' || *self == ','
80 macro_rules! consume_whitespace {
81 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
82 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
83 (&$input[pos..], $start_loc + pos)
85 return Error($ErrorFn(
86 Box::new(ParseError::UnexpectedEof),
87 ($input.len(), $input.len()).offset($start_loc)));
93 // Top Level Parsers ///////////////////////////////////////////////////////////
95 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
96 match parse_expression(input, 0) {
97 Done(rest, result) => Ok((result, rest)),
98 Error(err) => Err(err),
102 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
103 let mut start_loc = 0;
104 let mut results = Vec::new();
106 match parse_expression(input, start_loc) {
107 Done(rest, result) => {
109 start_loc = result.get_loc().1;
110 results.push(result);
111 if rest.trim() == "" {
112 return (results, None);
116 return (results, Some(err));
123 // Core Parsers ////////////////////////////////////////////////////////////////
125 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
126 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
128 match input.chars().next() {
129 Some('0'...'9') => parse_number(input, start_loc),
130 Some('(') => parse_list(input, start_loc),
131 Some('#') => parse_character(input, start_loc),
132 Some('"'
) => parse_string(input
, start_loc
),
133 Some(_
) => parse_symbol(input
, start_loc
),
134 None
=> unreachable
!(),
138 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
139 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
141 match input
.chars().nth(0) {
144 return Error(ParseError
::List(
145 Box
::new(ParseError
::Unexpected(c
, 0)),
146 (0, 0).offset(start_loc
))),
147 None
=> unreachable
!(),
150 let mut input
= &input
[1..];
151 let mut loc
= start_loc
+ 1;
152 let mut members
= Vec
::new();
155 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
160 match input
.chars().nth(0) {
162 return Done(&input
[1..],
163 Sexp
::List(members
, (start_loc
, loc
+1))),
165 None
=> unreachable
!(),
168 match parse_expression(input
, loc
) {
169 Done(new_input
, member
) => {
170 loc
= member
.get_loc().1;
171 members
.push(member
);
175 return Error(ParseError
::List(
182 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
183 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
185 match input
.chars().next() {
186 Some(c
) if !c
.is
_d
ig
it
(10) => {
187 return Error(ParseError
::Number(
188 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
189 (0, c
.len_utf8()).offset(start_loc
)));
191 None
=> return Error(ParseError
::Number(
192 Box
::new(ParseError
::UnexpectedEof
),
193 (0, 0).offset(start_loc
))),
200 // Before the decimal point
201 for (i
, c
) in input
.char_indices() {
207 if c
.is
_del
im
iter
() {
208 return Done(&input
[i
..],
209 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
210 (0, i
).offset(start_loc
)));
213 if !c
.is
_d
ig
it
(base
) {
214 return Error(ParseError
::Number(
215 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
216 (i
, i
).offset(start_loc
)));
219 end
= i
+ c
.len_utf8();
222 if input
[end
..].is
_empty
() {
223 return Done(&input
[end
..],
224 Sexp
::Int(input
.parse().expect("Already matched digits"),
225 (0, end
).offset(start_loc
)));
228 // After the decimal point
229 for (i
, c
) in input
[end
..].char_indices() {
230 if c
.is
_del
im
iter
() {
231 return Done(&input
[i
+end
..],
232 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
233 (0, end
+i
).offset(start_loc
)));
236 if !c
.is
_d
ig
it
(base
) {
237 return Error(ParseError
::Number(
238 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
239 (i
+end
, i
+end
).offset(start_loc
)));
243 Done(&input
[input
.len()..],
244 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
245 (0, input
.len()).offset(start_loc
)))
248 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
249 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
251 match input
.chars().next() {
252 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
253 return Error(ParseError
::Symbol(
254 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
255 (0, 0).offset(start_loc
))),
256 Some(c
) if c
.is
_del
im
iter
() =>
257 return Error(ParseError
::Symbol(
258 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
259 (0, 0).offset(start_loc
))),
261 None
=> unreachable
!(),
264 for (i
, c
) in input
.char_indices() {
265 if c
.is
_del
im
iter
() {
266 return Done(&input
[i
..],
267 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
271 Done(&input
[input
.len()..],
272 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
275 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
276 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
278 match input
.chars().next() {
281 return Error(ParseError::String(
282 Box::new(ParseError::Unexpected(c, start_loc)),
283 (0, 0).offset(start_loc))),
284 None => unreachable!(),
287 for (i, c) in input[1..].char_indices() {
289 return Done(&input
[2+i
..],
290 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
294 Error(ParseError
::String(
295 Box
::new(ParseError
::UnexpectedEof
),
296 (0, input
.len()).offset(start_loc
)))
299 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
300 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
302 match input
.chars().nth(0) {
305 return Error(ParseError
::Char(
306 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
307 (0, 0).offset(start_loc
))),
309 return Error(ParseError
::Char(
310 Box
::new(ParseError
::UnexpectedEof
),
311 (0, 0).offset(start_loc
))),
314 match input
.chars().nth(1) {
317 return Error(ParseError
::Char(
318 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
319 (1, 1).offset(start_loc
))),
321 return Error(ParseError
::Char(
322 Box
::new(ParseError
::UnexpectedEof
),
323 (1, 1).offset(start_loc
)))
326 match input
.chars().nth(2) {
328 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
330 Error(ParseError
::Char(
331 Box
::new(ParseError
::UnexpectedEof
),
332 (2, 2).offset(start_loc
)))
337 // Tests ///////////////////////////////////////////////////////////////////////
344 use parser
::ParseResult
::*;
348 assert_eq
!(parse("1 2 3"), (vec
![
349 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3)), Sexp
::Int(3, (4, 5))
351 assert_eq
!(parse("1 2 )"), (vec
![
352 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))
353 ], Some(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
)'
, 4)), (4, 4)))));
357 fn test_parse_one() {
358 assert_eq
!(parse_one("1 2"),
359 Ok((Sexp
::Int(1, (0, 1)), " 2")));
363 fn test_parse_expression() {
364 assert_eq
!(parse_expression(" 1", 0),
365 Done("", Sexp
::Int(1, (1, 2))));
366 assert_eq
!(parse_expression("2.2", 0),
367 Done("", Sexp
::Float(2.2, (0, 3))));
368 assert_eq
!(parse_expression(" a", 0),
369 Done("", Sexp
::Sym("a".into
(), (1, 2))));
370 assert_eq
!(parse_expression("#\\c", 0),
371 Done("", Sexp
::Char('c'
, (0, 3))));
372 assert_eq
!(parse_expression(r
#""hi""#, 0),
373 Done("", Sexp
::Str("hi".into
(), (0, 4))));
374 assert_eq
!(parse_expression("()", 0),
375 Done("", Sexp
::List(vec
![], (0, 2))));
376 assert_eq
!(parse_expression("( 1 2 3 )", 0),
377 Done("", Sexp
::List(vec
![
378 Sexp
::Int(1, (2, 3)),
379 Sexp
::Int(2, (4, 5)),
380 Sexp
::Int(3, (6, 7)),
383 assert_eq
!(parse_expression("", 0),
384 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
388 fn test_parse_list() {
389 assert_eq
!(parse_list("()", 0),
390 Done("", Sexp
::List(vec
![], (0, 2))));
391 assert_eq
!(parse_list("(1)", 0),
392 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3))));
393 assert_eq
!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp
::List(vec
![
394 Sexp
::Int(1, (4, 5)),
395 Sexp
::Int(2, (9, 10)),
396 Sexp
::Int(3, (12, 13)),
397 Sexp
::Sym("a".into
(), (14, 15)),
402 fn test_parse_number() {
403 assert_eq
!(parse_number("1", 0),
404 Done("", Sexp
::Int(1, (0, 1))));
405 assert_eq
!(parse_number(" 13", 0),
406 Done("", Sexp
::Int(13, (1, 3))));
407 assert_eq
!(parse_number("1.2", 0),
408 Done("", Sexp
::Float(1.2, (0, 3))));
409 assert_eq
!(parse_number("\u{3000}4.2", 0),
410 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
411 assert_eq
!(parse_number(" 42 ", 0),
412 Done(" ", Sexp
::Int(42, (2, 4))));
413 assert_eq
!(parse_number(" 4.2 ", 0),
414 Done(" ", Sexp
::Float(4.2, (1, 4))));
415 assert_eq
!(parse_number("1()", 0),
416 Done("()", Sexp
::Int(1, (0, 1))));
417 assert_eq
!(parse_number("3.6()", 0),
418 Done("()", Sexp
::Float(3.6, (0, 3))));
420 assert_eq
!(parse_number("", 0),
421 Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
422 assert_eq
!(parse_number("123a", 0),
423 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
424 assert_eq
!(parse_number("66.6+", 0),
425 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
429 fn test_parse_ident() {
430 assert_eq
!(parse_symbol("+", 0),
431 Done("", Sexp
::Sym("+".into
(), (0, 1))));
432 assert_eq
!(parse_symbol(" nil?", 0),
433 Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
434 assert_eq
!(parse_symbol(" ->socket", 0),
435 Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
436 assert_eq
!(parse_symbol("fib(", 0),
437 Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
438 assert_eq
!(parse_symbol("foo2", 0),
439 Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
441 // We reserve #foo for the implementation to do as it wishes
442 assert_eq
!(parse_symbol("#hi", 0),
443 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
444 // We reserve :foo for keywords
445 assert_eq
!(parse_symbol(":hi", 0),
446 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
448 assert_eq
!(parse_symbol("", 0),
449 Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
450 assert_eq
!(parse_symbol("0", 0),
451 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
452 assert_eq
!(parse_symbol("()", 0),
453 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
457 fn test_parse_string() {
458 assert_eq
!(parse_string(r
#""""#, 0),
459 Done("", Sexp
::Str("".into
(), (0, 2))));
460 assert_eq
!(parse_string(r
#""hello""#, 0),
461 Done("", Sexp
::Str("hello".into
(), (0, 7))));
462 assert_eq
!(parse_string(r
#" "this is a nice string
463 with
0123 things
in it
""#, 0),
464 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
466 assert_eq
!(parse_string("", 0),
467 Error(ParseError
::String(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
468 assert_eq
!(parse_string(r
#""hi"#, 0),
469 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
473 fn test_parse_char() {
474 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
475 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
476 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
478 assert_eq!(parse_character("", 0),
479 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
480 assert_eq!(parse_character("#", 0),
481 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
482 assert_eq!(parse_character("#\\", 0),
483 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
484 assert_eq!(parse_character("a", 0),
485 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));