]> Witch of Git - ess/blob - src/parser.rs
Document ParseResult
[ess] / src / parser.rs
1 use sexp::Sexp;
2 use span::{Span, ByteSpan};
3
4 \f
5 // Parsing Types ///////////////////////////////////////////////////////////////
6
7 /// Represents what to do next in partially completed parsing.
8 ///
9 /// `ParseResult` is returned from all intermediate parsers. If you just want to
10 /// get back parsed S-expressions, you won't need to worry about this type since
11 /// the top level parsers just return a `Result`.
12 ///
13 /// If the parser failed to produce a result, it will return `Error`, and if it
14 /// succeeded we'll get the `Done` variant containing the value produced and the
15 /// rest of the text to work on.
16 #[derive(Debug, PartialEq, Eq, Clone)]
17 pub enum ParseResult<'a, T, E> {
18 /// The parser succeeded, this contains first the un-consumed portion of the
19 /// input then the result produced by parsing.
20 Done(&'a str, T),
21 /// The parser failed, the `E` represents the reason for the failure.
22 Error(E),
23 }
24
25 /// Indicates how parsing failed.
26 #[derive(Debug, PartialEq, Eq, Clone)]
27 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
28 UnexpectedEof,
29 List(Box<ParseError>, Loc),
30 Sexp(Box<ParseError>, Loc),
31 Char(Box<ParseError>, Loc),
32 String(Box<ParseError>, Loc),
33 Symbol(Box<ParseError>, Loc),
34 Number(Box<ParseError>, Loc),
35 Unexpected(char, Loc::Begin),
36 }
37 use self::ParseResult::*;
38
39 \f
40 // Parsing Utilities ///////////////////////////////////////////////////////////
41
42 trait IsDelimeter {
43 fn is_delimiter(&self) -> bool;
44 }
45
46 impl IsDelimeter for char {
47 fn is_delimiter(&self) -> bool {
48 self.is_whitespace() || *self == ';'
49 || *self == '(' || *self == ')'
50 || *self == '[' || *self == ']'
51 || *self == '{' || *self == '}'
52 || *self == '"' || *self == '\''
53 || *self == '`' || *self == ','
54 }
55 }
56
57 macro_rules! consume_whitespace {
58 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
59 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
60 (&$input[pos..], $start_loc + pos)
61 } else {
62 return Error($ErrorFn(
63 Box::new(ParseError::UnexpectedEof),
64 ($input.len(), $input.len()).offset($start_loc)));
65 }
66 }
67 }
68
69 \f
70 // Top Level Parsers ///////////////////////////////////////////////////////////
71
72 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
73 match parse_expression(input, 0) {
74 Done(rest, result) => Ok((result, rest)),
75 Error(err) => Err(err),
76 }
77 }
78
79 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
80 let mut start_loc = 0;
81 let mut results = Vec::new();
82 loop {
83 match parse_expression(input, start_loc) {
84 Done(rest, result) => {
85 input = rest;
86 start_loc = result.get_loc().1;
87 results.push(result);
88 if rest.trim() == "" {
89 return (results, None);
90 }
91 }
92 Error(err) => {
93 return (results, Some(err));
94 }
95 }
96 }
97 }
98
99 \f
100 // Core Parsers ////////////////////////////////////////////////////////////////
101
102 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
103 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
104
105 match input.chars().next() {
106 Some('0'...'9') => parse_number(input, start_loc),
107 Some('(') => parse_list(input, start_loc),
108 Some('#') => parse_character(input, start_loc),
109 Some('"') => parse_string(input, start_loc),
110 Some(_) => parse_symbol(input, start_loc),
111 None => unreachable!(),
112 }
113 }
114
115 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
116 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
117
118 match input.chars().nth(0) {
119 Some('(') => (),
120 Some(c) =>
121 return Error(ParseError::List(
122 Box::new(ParseError::Unexpected(c, 0)),
123 (0, 0).offset(start_loc))),
124 None => unreachable!(),
125 }
126
127 let mut input = &input[1..];
128 let mut loc = start_loc + 1;
129 let mut members = Vec::new();
130 loop {
131 {
132 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
133 input = new_input;
134 loc = new_loc;
135 }
136
137 match input.chars().nth(0) {
138 Some(')') =>
139 return Done(&input[1..],
140 Sexp::List(members, (start_loc, loc+1))),
141 Some(_) => (),
142 None => unreachable!(),
143 }
144
145 match parse_expression(input, loc) {
146 Done(new_input, member) => {
147 loc = member.get_loc().1;
148 members.push(member);
149 input = new_input;
150 }
151 Error(err) =>
152 return Error(ParseError::List(
153 Box::new(err),
154 (0, 0).offset(loc)))
155 }
156 }
157 }
158
159 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
160 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
161
162 match input.chars().next() {
163 Some(c) if !c.is_digit(10) => {
164 return Error(ParseError::Number(
165 Box::new(ParseError::Unexpected(c, start_loc)),
166 (0, c.len_utf8()).offset(start_loc)));
167 }
168 None => return Error(ParseError::Number(
169 Box::new(ParseError::UnexpectedEof),
170 (0, 0).offset(start_loc))),
171 _ => (),
172 }
173
174 let base = 10;
175
176 let mut end = 0;
177 // Before the decimal point
178 for (i, c) in input.char_indices() {
179 if c == '.' {
180 end = i + 1;
181 break;
182 }
183
184 if c.is_delimiter() {
185 return Done(&input[i..],
186 Sexp::Int(input[..i].parse().expect("Already matched digits"),
187 (0, i).offset(start_loc)));
188 }
189
190 if !c.is_digit(base) {
191 return Error(ParseError::Number(
192 Box::new(ParseError::Unexpected(c, start_loc + i)),
193 (i, i).offset(start_loc)));
194 }
195
196 end = i + c.len_utf8();
197 }
198
199 if input[end..].is_empty() {
200 return Done(&input[end..],
201 Sexp::Int(input.parse().expect("Already matched digits"),
202 (0, end).offset(start_loc)));
203 }
204
205 // After the decimal point
206 for (i, c) in input[end..].char_indices() {
207 if c.is_delimiter() {
208 return Done(&input[i+end..],
209 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
210 (0, end+i).offset(start_loc)));
211 }
212
213 if !c.is_digit(base) {
214 return Error(ParseError::Number(
215 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
216 (i+end, i+end).offset(start_loc)));
217 }
218 }
219
220 Done(&input[input.len()..],
221 Sexp::Float(input.parse().expect("Already matched digits.digits"),
222 (0, input.len()).offset(start_loc)))
223 }
224
225 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
226 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
227
228 match input.chars().next() {
229 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
230 return Error(ParseError::Symbol(
231 Box::new(ParseError::Unexpected(c, start_loc)),
232 (0, 0).offset(start_loc))),
233 Some(c) if c.is_delimiter() =>
234 return Error(ParseError::Symbol(
235 Box::new(ParseError::Unexpected(c, start_loc)),
236 (0, 0).offset(start_loc))),
237 Some(_) => (),
238 None => unreachable!(),
239 }
240
241 for (i, c) in input.char_indices() {
242 if c.is_delimiter() {
243 return Done(&input[i..],
244 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
245 }
246 }
247
248 Done(&input[input.len()..],
249 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
250 }
251
252 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
253 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
254
255 match input.chars().next() {
256 Some('"') => (),
257 Some(c) =>
258 return Error(ParseError::String(
259 Box::new(ParseError::Unexpected(c, start_loc)),
260 (0, 0).offset(start_loc))),
261 None => unreachable!(),
262 }
263
264 for (i, c) in input[1..].char_indices() {
265 if c == '"' {
266 return Done(&input[2+i..],
267 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
268 }
269 }
270
271 Error(ParseError::String(
272 Box::new(ParseError::UnexpectedEof),
273 (0, input.len()).offset(start_loc)))
274 }
275
276 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
277 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
278
279 match input.chars().nth(0) {
280 Some('#') => (),
281 Some(c) =>
282 return Error(ParseError::Char(
283 Box::new(ParseError::Unexpected(c, start_loc)),
284 (0, 0).offset(start_loc))),
285 None =>
286 return Error(ParseError::Char(
287 Box::new(ParseError::UnexpectedEof),
288 (0, 0).offset(start_loc))),
289 }
290
291 match input.chars().nth(1) {
292 Some('\\') => (),
293 Some(c) =>
294 return Error(ParseError::Char(
295 Box::new(ParseError::Unexpected(c, start_loc + 1)),
296 (1, 1).offset(start_loc))),
297 None =>
298 return Error(ParseError::Char(
299 Box::new(ParseError::UnexpectedEof),
300 (1, 1).offset(start_loc)))
301 }
302
303 match input.chars().nth(2) {
304 Some(c) =>
305 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
306 None =>
307 Error(ParseError::Char(
308 Box::new(ParseError::UnexpectedEof),
309 (2, 2).offset(start_loc)))
310 }
311 }
312
313 \f
314 // Tests ///////////////////////////////////////////////////////////////////////
315
316 #[cfg(test)]
317 mod test {
318 use sexp::Sexp;
319 use span::Span;
320 use parser::*;
321 use parser::ParseResult::*;
322
323 #[test]
324 fn test_parse() {
325 assert_eq!(parse("1 2 3"), (vec![
326 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
327 ], None));
328 assert_eq!(parse("1 2 )"), (vec![
329 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
330 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
331 }
332
333 #[test]
334 fn test_parse_one() {
335 assert_eq!(parse_one("1 2"),
336 Ok((Sexp::Int(1, (0, 1)), " 2")));
337 }
338
339 #[test]
340 fn test_parse_expression() {
341 assert_eq!(parse_expression(" 1", 0),
342 Done("", Sexp::Int(1, (1, 2))));
343 assert_eq!(parse_expression("2.2", 0),
344 Done("", Sexp::Float(2.2, (0, 3))));
345 assert_eq!(parse_expression(" a", 0),
346 Done("", Sexp::Sym("a".into(), (1, 2))));
347 assert_eq!(parse_expression("#\\c", 0),
348 Done("", Sexp::Char('c', (0, 3))));
349 assert_eq!(parse_expression(r#""hi""#, 0),
350 Done("", Sexp::Str("hi".into(), (0, 4))));
351 assert_eq!(parse_expression("()", 0),
352 Done("", Sexp::List(vec![], (0, 2))));
353 assert_eq!(parse_expression("( 1 2 3 )", 0),
354 Done("", Sexp::List(vec![
355 Sexp::Int(1, (2, 3)),
356 Sexp::Int(2, (4, 5)),
357 Sexp::Int(3, (6, 7)),
358 ], (0, 9))));
359
360 assert_eq!(parse_expression("", 0),
361 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
362 }
363
364 #[test]
365 fn test_parse_list() {
366 assert_eq!(parse_list("()", 0),
367 Done("", Sexp::List(vec![], (0, 2))));
368 assert_eq!(parse_list("(1)", 0),
369 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
370 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
371 Sexp::Int(1, (4, 5)),
372 Sexp::Int(2, (9, 10)),
373 Sexp::Int(3, (12, 13)),
374 Sexp::Sym("a".into(), (14, 15)),
375 ], (2, 17))));
376 }
377
378 #[test]
379 fn test_parse_number() {
380 assert_eq!(parse_number("1", 0),
381 Done("", Sexp::Int(1, (0, 1))));
382 assert_eq!(parse_number(" 13", 0),
383 Done("", Sexp::Int(13, (1, 3))));
384 assert_eq!(parse_number("1.2", 0),
385 Done("", Sexp::Float(1.2, (0, 3))));
386 assert_eq!(parse_number("\u{3000}4.2", 0),
387 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
388 assert_eq!(parse_number(" 42 ", 0),
389 Done(" ", Sexp::Int(42, (2, 4))));
390 assert_eq!(parse_number(" 4.2 ", 0),
391 Done(" ", Sexp::Float(4.2, (1, 4))));
392 assert_eq!(parse_number("1()", 0),
393 Done("()", Sexp::Int(1, (0, 1))));
394 assert_eq!(parse_number("3.6()", 0),
395 Done("()", Sexp::Float(3.6, (0, 3))));
396
397 assert_eq!(parse_number("", 0),
398 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
399 assert_eq!(parse_number("123a", 0),
400 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
401 assert_eq!(parse_number("66.6+", 0),
402 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
403 }
404
405 #[test]
406 fn test_parse_ident() {
407 assert_eq!(parse_symbol("+", 0),
408 Done("", Sexp::Sym("+".into(), (0, 1))));
409 assert_eq!(parse_symbol(" nil?", 0),
410 Done("", Sexp::Sym("nil?".into(), (1, 5))));
411 assert_eq!(parse_symbol(" ->socket", 0),
412 Done("", Sexp::Sym("->socket".into(), (1, 9))));
413 assert_eq!(parse_symbol("fib(", 0),
414 Done("(", Sexp::Sym("fib".into(), (0, 3))));
415 assert_eq!(parse_symbol("foo2", 0),
416 Done("", Sexp::Sym("foo2".into(), (0, 4))));
417
418 // We reserve #foo for the implementation to do as it wishes
419 assert_eq!(parse_symbol("#hi", 0),
420 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
421 // We reserve :foo for keywords
422 assert_eq!(parse_symbol(":hi", 0),
423 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
424
425 assert_eq!(parse_symbol("", 0),
426 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
427 assert_eq!(parse_symbol("0", 0),
428 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
429 assert_eq!(parse_symbol("()", 0),
430 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
431 }
432
433 #[test]
434 fn test_parse_string() {
435 assert_eq!(parse_string(r#""""#, 0),
436 Done("", Sexp::Str("".into(), (0, 2))));
437 assert_eq!(parse_string(r#""hello""#, 0),
438 Done("", Sexp::Str("hello".into(), (0, 7))));
439 assert_eq!(parse_string(r#" "this is a nice string
440 with 0123 things in it""#, 0),
441 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
442
443 assert_eq!(parse_string("", 0),
444 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
445 assert_eq!(parse_string(r#""hi"#, 0),
446 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
447 }
448
449 #[test]
450 fn test_parse_char() {
451 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
452 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
453 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
454
455 assert_eq!(parse_character("", 0),
456 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
457 assert_eq!(parse_character("#", 0),
458 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
459 assert_eq!(parse_character("#\\", 0),
460 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
461 assert_eq!(parse_character("a", 0),
462 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
463 }
464 }