]> Witch of Git - ess/blob - src/parser.rs
Document ParseError
[ess] / src / parser.rs
1 use sexp::Sexp;
2 use span::{Span, ByteSpan};
3
4 \f
5 // Parsing Types ///////////////////////////////////////////////////////////////
6
7 /// Represents what to do next in partially completed parsing.
8 ///
9 /// `ParseResult` is returned from all intermediate parsers. If you just want to
10 /// get back parsed S-expressions, you won't need to worry about this type since
11 /// the top level parsers just return a `Result`.
12 ///
13 /// If the parser failed to produce a result, it will return `Error`, and if it
14 /// succeeded we'll get the `Done` variant containing the value produced and the
15 /// rest of the text to work on.
16 #[derive(Debug, PartialEq, Eq, Clone)]
17 pub enum ParseResult<'a, T, E> {
18 /// The parser succeeded, this contains first the un-consumed portion of the
19 /// input then the result produced by parsing.
20 Done(&'a str, T),
21 /// The parser failed, the `E` represents the reason for the failure.
22 Error(E),
23 }
24
25 /// Indicates how parsing failed.
26 ///
27 /// Most `ParseError` variants contain a `Box<ParseError>` that represents the
28 /// cause of that error. Using this, `ParseError` variants can be chained to
29 /// produce a more complete picture of what exactly went wrong during parsing.
30 #[derive(Debug, PartialEq, Eq, Clone)]
31 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
32 /// Parsing reached the end of input where not expecting to, usually this
33 /// will be contained inside another `ParseError` like `String(box
34 /// UnexpectedEof, ...)` which indicates that the closing quote was never
35 /// found.
36 UnexpectedEof,
37 /// Some problem occurred while parsing a list, along with the cause of that
38 /// error.
39 List(Box<ParseError>, Loc),
40 /// Some problem occurred while parsing an s-expression. This will only be
41 /// generated if EOF is reached unexpectedly at the beginning of
42 /// `parse_expression`, so it should probably be removed.
43 Sexp(Box<ParseError>, Loc),
44 /// Some problem occurred while parsing a character literal, along with the
45 /// cause of the error.
46 Char(Box<ParseError>, Loc),
47 /// Some problem occurred while parsing a string literal, along with the
48 /// cause of the error.
49 String(Box<ParseError>, Loc),
50 /// Some problem occurred while parsing a symbol, along with the cause of
51 /// the error.
52 Symbol(Box<ParseError>, Loc),
53 /// Some problem occurred while parsing a number literal, along with the
54 /// cause of the error.
55 Number(Box<ParseError>, Loc),
56 /// An unexpected character was found. This will usually be the root cause
57 /// in some chain of `ParseError`s.
58 Unexpected(char, Loc::Begin),
59 }
60 use self::ParseResult::*;
61
62 \f
63 // Parsing Utilities ///////////////////////////////////////////////////////////
64
65 trait IsDelimeter {
66 fn is_delimiter(&self) -> bool;
67 }
68
69 impl IsDelimeter for char {
70 fn is_delimiter(&self) -> bool {
71 self.is_whitespace() || *self == ';'
72 || *self == '(' || *self == ')'
73 || *self == '[' || *self == ']'
74 || *self == '{' || *self == '}'
75 || *self == '"' || *self == '\''
76 || *self == '`' || *self == ','
77 }
78 }
79
80 macro_rules! consume_whitespace {
81 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
82 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
83 (&$input[pos..], $start_loc + pos)
84 } else {
85 return Error($ErrorFn(
86 Box::new(ParseError::UnexpectedEof),
87 ($input.len(), $input.len()).offset($start_loc)));
88 }
89 }
90 }
91
92 \f
93 // Top Level Parsers ///////////////////////////////////////////////////////////
94
95 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
96 match parse_expression(input, 0) {
97 Done(rest, result) => Ok((result, rest)),
98 Error(err) => Err(err),
99 }
100 }
101
102 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
103 let mut start_loc = 0;
104 let mut results = Vec::new();
105 loop {
106 match parse_expression(input, start_loc) {
107 Done(rest, result) => {
108 input = rest;
109 start_loc = result.get_loc().1;
110 results.push(result);
111 if rest.trim() == "" {
112 return (results, None);
113 }
114 }
115 Error(err) => {
116 return (results, Some(err));
117 }
118 }
119 }
120 }
121
122 \f
123 // Core Parsers ////////////////////////////////////////////////////////////////
124
125 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
126 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
127
128 match input.chars().next() {
129 Some('0'...'9') => parse_number(input, start_loc),
130 Some('(') => parse_list(input, start_loc),
131 Some('#') => parse_character(input, start_loc),
132 Some('"') => parse_string(input, start_loc),
133 Some(_) => parse_symbol(input, start_loc),
134 None => unreachable!(),
135 }
136 }
137
138 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
139 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
140
141 match input.chars().nth(0) {
142 Some('(') => (),
143 Some(c) =>
144 return Error(ParseError::List(
145 Box::new(ParseError::Unexpected(c, 0)),
146 (0, 0).offset(start_loc))),
147 None => unreachable!(),
148 }
149
150 let mut input = &input[1..];
151 let mut loc = start_loc + 1;
152 let mut members = Vec::new();
153 loop {
154 {
155 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
156 input = new_input;
157 loc = new_loc;
158 }
159
160 match input.chars().nth(0) {
161 Some(')') =>
162 return Done(&input[1..],
163 Sexp::List(members, (start_loc, loc+1))),
164 Some(_) => (),
165 None => unreachable!(),
166 }
167
168 match parse_expression(input, loc) {
169 Done(new_input, member) => {
170 loc = member.get_loc().1;
171 members.push(member);
172 input = new_input;
173 }
174 Error(err) =>
175 return Error(ParseError::List(
176 Box::new(err),
177 (0, 0).offset(loc)))
178 }
179 }
180 }
181
182 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
183 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
184
185 match input.chars().next() {
186 Some(c) if !c.is_digit(10) => {
187 return Error(ParseError::Number(
188 Box::new(ParseError::Unexpected(c, start_loc)),
189 (0, c.len_utf8()).offset(start_loc)));
190 }
191 None => return Error(ParseError::Number(
192 Box::new(ParseError::UnexpectedEof),
193 (0, 0).offset(start_loc))),
194 _ => (),
195 }
196
197 let base = 10;
198
199 let mut end = 0;
200 // Before the decimal point
201 for (i, c) in input.char_indices() {
202 if c == '.' {
203 end = i + 1;
204 break;
205 }
206
207 if c.is_delimiter() {
208 return Done(&input[i..],
209 Sexp::Int(input[..i].parse().expect("Already matched digits"),
210 (0, i).offset(start_loc)));
211 }
212
213 if !c.is_digit(base) {
214 return Error(ParseError::Number(
215 Box::new(ParseError::Unexpected(c, start_loc + i)),
216 (i, i).offset(start_loc)));
217 }
218
219 end = i + c.len_utf8();
220 }
221
222 if input[end..].is_empty() {
223 return Done(&input[end..],
224 Sexp::Int(input.parse().expect("Already matched digits"),
225 (0, end).offset(start_loc)));
226 }
227
228 // After the decimal point
229 for (i, c) in input[end..].char_indices() {
230 if c.is_delimiter() {
231 return Done(&input[i+end..],
232 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
233 (0, end+i).offset(start_loc)));
234 }
235
236 if !c.is_digit(base) {
237 return Error(ParseError::Number(
238 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
239 (i+end, i+end).offset(start_loc)));
240 }
241 }
242
243 Done(&input[input.len()..],
244 Sexp::Float(input.parse().expect("Already matched digits.digits"),
245 (0, input.len()).offset(start_loc)))
246 }
247
248 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
249 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
250
251 match input.chars().next() {
252 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
253 return Error(ParseError::Symbol(
254 Box::new(ParseError::Unexpected(c, start_loc)),
255 (0, 0).offset(start_loc))),
256 Some(c) if c.is_delimiter() =>
257 return Error(ParseError::Symbol(
258 Box::new(ParseError::Unexpected(c, start_loc)),
259 (0, 0).offset(start_loc))),
260 Some(_) => (),
261 None => unreachable!(),
262 }
263
264 for (i, c) in input.char_indices() {
265 if c.is_delimiter() {
266 return Done(&input[i..],
267 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
268 }
269 }
270
271 Done(&input[input.len()..],
272 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
273 }
274
275 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
276 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
277
278 match input.chars().next() {
279 Some('"') => (),
280 Some(c) =>
281 return Error(ParseError::String(
282 Box::new(ParseError::Unexpected(c, start_loc)),
283 (0, 0).offset(start_loc))),
284 None => unreachable!(),
285 }
286
287 for (i, c) in input[1..].char_indices() {
288 if c == '"' {
289 return Done(&input[2+i..],
290 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
291 }
292 }
293
294 Error(ParseError::String(
295 Box::new(ParseError::UnexpectedEof),
296 (0, input.len()).offset(start_loc)))
297 }
298
299 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
300 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
301
302 match input.chars().nth(0) {
303 Some('#') => (),
304 Some(c) =>
305 return Error(ParseError::Char(
306 Box::new(ParseError::Unexpected(c, start_loc)),
307 (0, 0).offset(start_loc))),
308 None =>
309 return Error(ParseError::Char(
310 Box::new(ParseError::UnexpectedEof),
311 (0, 0).offset(start_loc))),
312 }
313
314 match input.chars().nth(1) {
315 Some('\\') => (),
316 Some(c) =>
317 return Error(ParseError::Char(
318 Box::new(ParseError::Unexpected(c, start_loc + 1)),
319 (1, 1).offset(start_loc))),
320 None =>
321 return Error(ParseError::Char(
322 Box::new(ParseError::UnexpectedEof),
323 (1, 1).offset(start_loc)))
324 }
325
326 match input.chars().nth(2) {
327 Some(c) =>
328 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
329 None =>
330 Error(ParseError::Char(
331 Box::new(ParseError::UnexpectedEof),
332 (2, 2).offset(start_loc)))
333 }
334 }
335
336 \f
337 // Tests ///////////////////////////////////////////////////////////////////////
338
339 #[cfg(test)]
340 mod test {
341 use sexp::Sexp;
342 use span::Span;
343 use parser::*;
344 use parser::ParseResult::*;
345
346 #[test]
347 fn test_parse() {
348 assert_eq!(parse("1 2 3"), (vec![
349 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
350 ], None));
351 assert_eq!(parse("1 2 )"), (vec![
352 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
353 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
354 }
355
356 #[test]
357 fn test_parse_one() {
358 assert_eq!(parse_one("1 2"),
359 Ok((Sexp::Int(1, (0, 1)), " 2")));
360 }
361
362 #[test]
363 fn test_parse_expression() {
364 assert_eq!(parse_expression(" 1", 0),
365 Done("", Sexp::Int(1, (1, 2))));
366 assert_eq!(parse_expression("2.2", 0),
367 Done("", Sexp::Float(2.2, (0, 3))));
368 assert_eq!(parse_expression(" a", 0),
369 Done("", Sexp::Sym("a".into(), (1, 2))));
370 assert_eq!(parse_expression("#\\c", 0),
371 Done("", Sexp::Char('c', (0, 3))));
372 assert_eq!(parse_expression(r#""hi""#, 0),
373 Done("", Sexp::Str("hi".into(), (0, 4))));
374 assert_eq!(parse_expression("()", 0),
375 Done("", Sexp::List(vec![], (0, 2))));
376 assert_eq!(parse_expression("( 1 2 3 )", 0),
377 Done("", Sexp::List(vec![
378 Sexp::Int(1, (2, 3)),
379 Sexp::Int(2, (4, 5)),
380 Sexp::Int(3, (6, 7)),
381 ], (0, 9))));
382
383 assert_eq!(parse_expression("", 0),
384 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
385 }
386
387 #[test]
388 fn test_parse_list() {
389 assert_eq!(parse_list("()", 0),
390 Done("", Sexp::List(vec![], (0, 2))));
391 assert_eq!(parse_list("(1)", 0),
392 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
393 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
394 Sexp::Int(1, (4, 5)),
395 Sexp::Int(2, (9, 10)),
396 Sexp::Int(3, (12, 13)),
397 Sexp::Sym("a".into(), (14, 15)),
398 ], (2, 17))));
399 }
400
401 #[test]
402 fn test_parse_number() {
403 assert_eq!(parse_number("1", 0),
404 Done("", Sexp::Int(1, (0, 1))));
405 assert_eq!(parse_number(" 13", 0),
406 Done("", Sexp::Int(13, (1, 3))));
407 assert_eq!(parse_number("1.2", 0),
408 Done("", Sexp::Float(1.2, (0, 3))));
409 assert_eq!(parse_number("\u{3000}4.2", 0),
410 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
411 assert_eq!(parse_number(" 42 ", 0),
412 Done(" ", Sexp::Int(42, (2, 4))));
413 assert_eq!(parse_number(" 4.2 ", 0),
414 Done(" ", Sexp::Float(4.2, (1, 4))));
415 assert_eq!(parse_number("1()", 0),
416 Done("()", Sexp::Int(1, (0, 1))));
417 assert_eq!(parse_number("3.6()", 0),
418 Done("()", Sexp::Float(3.6, (0, 3))));
419
420 assert_eq!(parse_number("", 0),
421 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
422 assert_eq!(parse_number("123a", 0),
423 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
424 assert_eq!(parse_number("66.6+", 0),
425 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
426 }
427
428 #[test]
429 fn test_parse_ident() {
430 assert_eq!(parse_symbol("+", 0),
431 Done("", Sexp::Sym("+".into(), (0, 1))));
432 assert_eq!(parse_symbol(" nil?", 0),
433 Done("", Sexp::Sym("nil?".into(), (1, 5))));
434 assert_eq!(parse_symbol(" ->socket", 0),
435 Done("", Sexp::Sym("->socket".into(), (1, 9))));
436 assert_eq!(parse_symbol("fib(", 0),
437 Done("(", Sexp::Sym("fib".into(), (0, 3))));
438 assert_eq!(parse_symbol("foo2", 0),
439 Done("", Sexp::Sym("foo2".into(), (0, 4))));
440
441 // We reserve #foo for the implementation to do as it wishes
442 assert_eq!(parse_symbol("#hi", 0),
443 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
444 // We reserve :foo for keywords
445 assert_eq!(parse_symbol(":hi", 0),
446 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
447
448 assert_eq!(parse_symbol("", 0),
449 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
450 assert_eq!(parse_symbol("0", 0),
451 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
452 assert_eq!(parse_symbol("()", 0),
453 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
454 }
455
456 #[test]
457 fn test_parse_string() {
458 assert_eq!(parse_string(r#""""#, 0),
459 Done("", Sexp::Str("".into(), (0, 2))));
460 assert_eq!(parse_string(r#""hello""#, 0),
461 Done("", Sexp::Str("hello".into(), (0, 7))));
462 assert_eq!(parse_string(r#" "this is a nice string
463 with 0123 things in it""#, 0),
464 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
465
466 assert_eq!(parse_string("", 0),
467 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
468 assert_eq!(parse_string(r#""hi"#, 0),
469 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
470 }
471
472 #[test]
473 fn test_parse_char() {
474 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
475 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
476 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
477
478 assert_eq!(parse_character("", 0),
479 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
480 assert_eq!(parse_character("#", 0),
481 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
482 assert_eq!(parse_character("#\\", 0),
483 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
484 assert_eq!(parse_character("a", 0),
485 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
486 }
487 }