]>
Witch of Git - ess/blob - src/parser.rs
2 use span
::{Span
, ByteSpan
};
5 // Parsing Types ///////////////////////////////////////////////////////////////
7 #[derive(Debug, PartialEq, Eq, Clone)]
8 pub enum ParseResult
<'a
, T
, E
> {
13 use self::ParseResult
::*;
15 /// Indicates how parsing failed.
16 #[derive(Debug, PartialEq, Eq, Clone)]
17 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
19 List(Box
<ParseError
>, Loc
),
20 Sexp(Box
<ParseError
>, Loc
),
21 Char(Box
<ParseError
>, Loc
),
22 String(Box
<ParseError
>, Loc
),
23 Symbol(Box
<ParseError
>, Loc
),
24 Number(Box
<ParseError
>, Loc
),
25 Unexpected(char, Loc
::Begin
),
30 // Parsing Utilities ///////////////////////////////////////////////////////////
33 fn is_delimiter(&self) -> bool
;
36 impl IsDelimeter
for char {
37 fn is_delimiter(&self) -> bool
{
38 self.is
_wh
itespace
() || *self == '
;'
39 || *self == '
('
|| *self == '
)'
40 || *self == '
['
|| *self == '
]'
41 || *self == '
{'
|| *self == '
}'
42 || *self == '
"' || *self == '\''
43 || *self == '`' || *self == ','
47 macro_rules! consume_whitespace {
48 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
49 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
50 (&$input[pos..], $start_loc + pos)
52 return Error($ErrorFn(
53 Box::new(ParseError::UnexpectedEof),
54 ($input.len(), $input.len()).offset($start_loc)));
60 // Top Level Parsers ///////////////////////////////////////////////////////////
62 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
63 match parse_expression(input, 0) {
64 Done(rest, result) => Ok((result, rest)),
65 Error(err) => Err(err),
69 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
70 let mut start_loc = 0;
71 let mut results = Vec::new();
73 match parse_expression(input, start_loc) {
74 Done(rest, result) => {
76 start_loc = result.get_loc().1;
78 if rest.trim() == "" {
79 return (results, None);
83 return (results, Some(err));
90 // Core Parsers ////////////////////////////////////////////////////////////////
92 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
93 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
95 match input.chars().next() {
96 Some('0'...'9') => parse_number(input, start_loc),
97 Some('(') => parse_list(input, start_loc),
98 Some('#') => parse_character(input, start_loc),
99 Some('"'
) => parse_string(input
, start_loc
),
100 Some(_
) => parse_symbol(input
, start_loc
),
101 None
=> unreachable
!(),
105 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
106 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
108 match input
.chars().nth(0) {
111 return Error(ParseError
::List(
112 Box
::new(ParseError
::Unexpected(c
, 0)),
113 (0, 0).offset(start_loc
))),
114 None
=> unreachable
!(),
117 let mut input
= &input
[1..];
118 let mut loc
= start_loc
+ 1;
119 let mut members
= Vec
::new();
122 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
127 match input
.chars().nth(0) {
129 return Done(&input
[1..],
130 Sexp
::List(members
, (start_loc
, loc
+1))),
132 None
=> unreachable
!(),
135 match parse_expression(input
, loc
) {
136 Done(new_input
, member
) => {
137 loc
= member
.get_loc().1;
138 members
.push(member
);
142 return Error(ParseError
::List(
149 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
150 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
152 match input
.chars().next() {
153 Some(c
) if !c
.is
_d
ig
it
(10) => {
154 return Error(ParseError
::Number(
155 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
156 (0, c
.len_utf8()).offset(start_loc
)));
158 None
=> return Error(ParseError
::Number(
159 Box
::new(ParseError
::UnexpectedEof
),
160 (0, 0).offset(start_loc
))),
167 // Before the decimal point
168 for (i
, c
) in input
.char_indices() {
174 if c
.is
_del
im
iter
() {
175 return Done(&input
[i
..],
176 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
177 (0, i
).offset(start_loc
)));
180 if !c
.is
_d
ig
it
(base
) {
181 return Error(ParseError
::Number(
182 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
183 (i
, i
).offset(start_loc
)));
186 end
= i
+ c
.len_utf8();
189 if input
[end
..].is
_empty
() {
190 return Done(&input
[end
..],
191 Sexp
::Int(input
.parse().expect("Already matched digits"),
192 (0, end
).offset(start_loc
)));
195 // After the decimal point
196 for (i
, c
) in input
[end
..].char_indices() {
197 if c
.is
_del
im
iter
() {
198 return Done(&input
[i
+end
..],
199 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
200 (0, end
+i
).offset(start_loc
)));
203 if !c
.is
_d
ig
it
(base
) {
204 return Error(ParseError
::Number(
205 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
206 (i
+end
, i
+end
).offset(start_loc
)));
210 Done(&input
[input
.len()..],
211 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
212 (0, input
.len()).offset(start_loc
)))
215 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
216 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
218 match input
.chars().next() {
219 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
220 return Error(ParseError
::Symbol(
221 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
222 (0, 0).offset(start_loc
))),
223 Some(c
) if c
.is
_del
im
iter
() =>
224 return Error(ParseError
::Symbol(
225 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
226 (0, 0).offset(start_loc
))),
228 None
=> unreachable
!(),
231 for (i
, c
) in input
.char_indices() {
232 if c
.is
_del
im
iter
() {
233 return Done(&input
[i
..],
234 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
238 Done(&input
[input
.len()..],
239 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
242 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
243 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
245 match input
.chars().next() {
248 return Error(ParseError::String(
249 Box::new(ParseError::Unexpected(c, start_loc)),
250 (0, 0).offset(start_loc))),
251 None => unreachable!(),
254 for (i, c) in input[1..].char_indices() {
256 return Done(&input
[2+i
..],
257 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
261 Error(ParseError
::String(
262 Box
::new(ParseError
::UnexpectedEof
),
263 (0, input
.len()).offset(start_loc
)))
266 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
267 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
269 match input
.chars().nth(0) {
272 return Error(ParseError
::Char(
273 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
274 (0, 0).offset(start_loc
))),
276 return Error(ParseError
::Char(
277 Box
::new(ParseError
::UnexpectedEof
),
278 (0, 0).offset(start_loc
))),
281 match input
.chars().nth(1) {
284 return Error(ParseError
::Char(
285 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
286 (1, 1).offset(start_loc
))),
288 return Error(ParseError
::Char(
289 Box
::new(ParseError
::UnexpectedEof
),
290 (1, 1).offset(start_loc
)))
293 match input
.chars().nth(2) {
295 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
297 Error(ParseError
::Char(
298 Box
::new(ParseError
::UnexpectedEof
),
299 (2, 2).offset(start_loc
)))
304 // Tests ///////////////////////////////////////////////////////////////////////
311 use parser
::ParseResult
::*;
315 assert_eq
!(parse("1 2 3"), (vec
![
316 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3)), Sexp
::Int(3, (4, 5))
318 assert_eq
!(parse("1 2 )"), (vec
![
319 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))
320 ], Some(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
)'
, 4)), (4, 4)))));
324 fn test_parse_one() {
325 assert_eq
!(parse_one("1 2"),
326 Ok((Sexp
::Int(1, (0, 1)), " 2")));
330 fn test_parse_expression() {
331 assert_eq
!(parse_expression(" 1", 0),
332 Done("", Sexp
::Int(1, (1, 2))));
333 assert_eq
!(parse_expression("2.2", 0),
334 Done("", Sexp
::Float(2.2, (0, 3))));
335 assert_eq
!(parse_expression(" a", 0),
336 Done("", Sexp
::Sym("a".into
(), (1, 2))));
337 assert_eq
!(parse_expression("#\\c", 0),
338 Done("", Sexp
::Char('c'
, (0, 3))));
339 assert_eq
!(parse_expression(r
#""hi""#, 0),
340 Done("", Sexp
::Str("hi".into
(), (0, 4))));
341 assert_eq
!(parse_expression("()", 0),
342 Done("", Sexp
::List(vec
![], (0, 2))));
343 assert_eq
!(parse_expression("( 1 2 3 )", 0),
344 Done("", Sexp
::List(vec
![
345 Sexp
::Int(1, (2, 3)),
346 Sexp
::Int(2, (4, 5)),
347 Sexp
::Int(3, (6, 7)),
350 assert_eq
!(parse_expression("", 0),
351 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
355 fn test_parse_list() {
356 assert_eq
!(parse_list("()", 0),
357 Done("", Sexp
::List(vec
![], (0, 2))));
358 assert_eq
!(parse_list("(1)", 0),
359 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3))));
360 assert_eq
!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp
::List(vec
![
361 Sexp
::Int(1, (4, 5)),
362 Sexp
::Int(2, (9, 10)),
363 Sexp
::Int(3, (12, 13)),
364 Sexp
::Sym("a".into
(), (14, 15)),
369 fn test_parse_number() {
370 assert_eq
!(parse_number("1", 0),
371 Done("", Sexp
::Int(1, (0, 1))));
372 assert_eq
!(parse_number(" 13", 0),
373 Done("", Sexp
::Int(13, (1, 3))));
374 assert_eq
!(parse_number("1.2", 0),
375 Done("", Sexp
::Float(1.2, (0, 3))));
376 assert_eq
!(parse_number("\u{3000}4.2", 0),
377 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
378 assert_eq
!(parse_number(" 42 ", 0),
379 Done(" ", Sexp
::Int(42, (2, 4))));
380 assert_eq
!(parse_number(" 4.2 ", 0),
381 Done(" ", Sexp
::Float(4.2, (1, 4))));
382 assert_eq
!(parse_number("1()", 0),
383 Done("()", Sexp
::Int(1, (0, 1))));
384 assert_eq
!(parse_number("3.6()", 0),
385 Done("()", Sexp
::Float(3.6, (0, 3))));
387 assert_eq
!(parse_number("", 0),
388 Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
389 assert_eq
!(parse_number("123a", 0),
390 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
391 assert_eq
!(parse_number("66.6+", 0),
392 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
396 fn test_parse_ident() {
397 assert_eq
!(parse_symbol("+", 0),
398 Done("", Sexp
::Sym("+".into
(), (0, 1))));
399 assert_eq
!(parse_symbol(" nil?", 0),
400 Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
401 assert_eq
!(parse_symbol(" ->socket", 0),
402 Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
403 assert_eq
!(parse_symbol("fib(", 0),
404 Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
405 assert_eq
!(parse_symbol("foo2", 0),
406 Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
408 // We reserve #foo for the implementation to do as it wishes
409 assert_eq
!(parse_symbol("#hi", 0),
410 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
411 // We reserve :foo for keywords
412 assert_eq
!(parse_symbol(":hi", 0),
413 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
415 assert_eq
!(parse_symbol("", 0),
416 Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
417 assert_eq
!(parse_symbol("0", 0),
418 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
419 assert_eq
!(parse_symbol("()", 0),
420 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
424 fn test_parse_string() {
425 assert_eq
!(parse_string(r
#""""#, 0),
426 Done("", Sexp
::Str("".into
(), (0, 2))));
427 assert_eq
!(parse_string(r
#""hello""#, 0),
428 Done("", Sexp
::Str("hello".into
(), (0, 7))));
429 assert_eq
!(parse_string(r
#" "this is a nice string
430 with
0123 things
in it
""#, 0),
431 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
433 assert_eq
!(parse_string("", 0),
434 Error(ParseError
::String(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
435 assert_eq
!(parse_string(r
#""hi"#, 0),
436 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
440 fn test_parse_char() {
441 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
442 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
443 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
445 assert_eq!(parse_character("", 0),
446 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
447 assert_eq!(parse_character("#", 0),
448 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
449 assert_eq!(parse_character("#\\", 0),
450 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
451 assert_eq!(parse_character("a", 0),
452 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));