]> Witch of Git - ess/blob - src/parser.rs
Split the project into multiple modules
[ess] / src / parser.rs
1 use sexp::Sexp;
2 use span::{Span, ByteSpan};
3
4 \f
5 // Parsing Types ///////////////////////////////////////////////////////////////
6
7 #[derive(Debug, PartialEq, Eq, Clone)]
8 pub enum ParseResult<'a, T, E> {
9 Done(&'a str, T),
10 Error(E),
11 }
12
13 use self::ParseResult::*;
14
15 /// Indicates how parsing failed.
16 #[derive(Debug, PartialEq, Eq, Clone)]
17 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
18 UnexpectedEof,
19 List(Box<ParseError>, Loc),
20 Sexp(Box<ParseError>, Loc),
21 Char(Box<ParseError>, Loc),
22 String(Box<ParseError>, Loc),
23 Symbol(Box<ParseError>, Loc),
24 Number(Box<ParseError>, Loc),
25 Unexpected(char, Loc::Begin),
26 Unimplemented,
27 }
28
29 \f
30 // Parsing Utilities ///////////////////////////////////////////////////////////
31
32 trait IsDelimeter {
33 fn is_delimiter(&self) -> bool;
34 }
35
36 impl IsDelimeter for char {
37 fn is_delimiter(&self) -> bool {
38 self.is_whitespace() || *self == ';'
39 || *self == '(' || *self == ')'
40 || *self == '[' || *self == ']'
41 || *self == '{' || *self == '}'
42 || *self == '"' || *self == '\''
43 || *self == '`' || *self == ','
44 }
45 }
46
47 macro_rules! consume_whitespace {
48 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
49 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
50 (&$input[pos..], $start_loc + pos)
51 } else {
52 return Error($ErrorFn(
53 Box::new(ParseError::UnexpectedEof),
54 ($input.len(), $input.len()).offset($start_loc)));
55 }
56 }
57 }
58
59 \f
60 // Top Level Parsers ///////////////////////////////////////////////////////////
61
62 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
63 match parse_expression(input, 0) {
64 Done(rest, result) => Ok((result, rest)),
65 Error(err) => Err(err),
66 }
67 }
68
69 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
70 let mut start_loc = 0;
71 let mut results = Vec::new();
72 loop {
73 match parse_expression(input, start_loc) {
74 Done(rest, result) => {
75 input = rest;
76 start_loc = result.get_loc().1;
77 results.push(result);
78 if rest.trim() == "" {
79 return (results, None);
80 }
81 }
82 Error(err) => {
83 return (results, Some(err));
84 }
85 }
86 }
87 }
88
89 \f
90 // Core Parsers ////////////////////////////////////////////////////////////////
91
92 pub fn parse_expression(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
93 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
94
95 match input.chars().next() {
96 Some('0'...'9') => parse_number(input, start_loc),
97 Some('(') => parse_list(input, start_loc),
98 Some('#') => parse_character(input, start_loc),
99 Some('"') => parse_string(input, start_loc),
100 Some(_) => parse_symbol(input, start_loc),
101 None => unreachable!(),
102 }
103 }
104
105 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
106 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
107
108 match input.chars().nth(0) {
109 Some('(') => (),
110 Some(c) =>
111 return Error(ParseError::List(
112 Box::new(ParseError::Unexpected(c, 0)),
113 (0, 0).offset(start_loc))),
114 None => unreachable!(),
115 }
116
117 let mut input = &input[1..];
118 let mut loc = start_loc + 1;
119 let mut members = Vec::new();
120 loop {
121 {
122 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
123 input = new_input;
124 loc = new_loc;
125 }
126
127 match input.chars().nth(0) {
128 Some(')') =>
129 return Done(&input[1..],
130 Sexp::List(members, (start_loc, loc+1))),
131 Some(_) => (),
132 None => unreachable!(),
133 }
134
135 match parse_expression(input, loc) {
136 Done(new_input, member) => {
137 loc = member.get_loc().1;
138 members.push(member);
139 input = new_input;
140 }
141 Error(err) =>
142 return Error(ParseError::List(
143 Box::new(err),
144 (0, 0).offset(loc)))
145 }
146 }
147 }
148
149 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
150 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
151
152 match input.chars().next() {
153 Some(c) if !c.is_digit(10) => {
154 return Error(ParseError::Number(
155 Box::new(ParseError::Unexpected(c, start_loc)),
156 (0, c.len_utf8()).offset(start_loc)));
157 }
158 None => return Error(ParseError::Number(
159 Box::new(ParseError::UnexpectedEof),
160 (0, 0).offset(start_loc))),
161 _ => (),
162 }
163
164 let base = 10;
165
166 let mut end = 0;
167 // Before the decimal point
168 for (i, c) in input.char_indices() {
169 if c == '.' {
170 end = i + 1;
171 break;
172 }
173
174 if c.is_delimiter() {
175 return Done(&input[i..],
176 Sexp::Int(input[..i].parse().expect("Already matched digits"),
177 (0, i).offset(start_loc)));
178 }
179
180 if !c.is_digit(base) {
181 return Error(ParseError::Number(
182 Box::new(ParseError::Unexpected(c, start_loc + i)),
183 (i, i).offset(start_loc)));
184 }
185
186 end = i + c.len_utf8();
187 }
188
189 if input[end..].is_empty() {
190 return Done(&input[end..],
191 Sexp::Int(input.parse().expect("Already matched digits"),
192 (0, end).offset(start_loc)));
193 }
194
195 // After the decimal point
196 for (i, c) in input[end..].char_indices() {
197 if c.is_delimiter() {
198 return Done(&input[i+end..],
199 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
200 (0, end+i).offset(start_loc)));
201 }
202
203 if !c.is_digit(base) {
204 return Error(ParseError::Number(
205 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
206 (i+end, i+end).offset(start_loc)));
207 }
208 }
209
210 Done(&input[input.len()..],
211 Sexp::Float(input.parse().expect("Already matched digits.digits"),
212 (0, input.len()).offset(start_loc)))
213 }
214
215 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
216 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
217
218 match input.chars().next() {
219 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
220 return Error(ParseError::Symbol(
221 Box::new(ParseError::Unexpected(c, start_loc)),
222 (0, 0).offset(start_loc))),
223 Some(c) if c.is_delimiter() =>
224 return Error(ParseError::Symbol(
225 Box::new(ParseError::Unexpected(c, start_loc)),
226 (0, 0).offset(start_loc))),
227 Some(_) => (),
228 None => unreachable!(),
229 }
230
231 for (i, c) in input.char_indices() {
232 if c.is_delimiter() {
233 return Done(&input[i..],
234 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
235 }
236 }
237
238 Done(&input[input.len()..],
239 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
240 }
241
242 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
243 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
244
245 match input.chars().next() {
246 Some('"') => (),
247 Some(c) =>
248 return Error(ParseError::String(
249 Box::new(ParseError::Unexpected(c, start_loc)),
250 (0, 0).offset(start_loc))),
251 None => unreachable!(),
252 }
253
254 for (i, c) in input[1..].char_indices() {
255 if c == '"' {
256 return Done(&input[2+i..],
257 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
258 }
259 }
260
261 Error(ParseError::String(
262 Box::new(ParseError::UnexpectedEof),
263 (0, input.len()).offset(start_loc)))
264 }
265
266 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
267 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
268
269 match input.chars().nth(0) {
270 Some('#') => (),
271 Some(c) =>
272 return Error(ParseError::Char(
273 Box::new(ParseError::Unexpected(c, start_loc)),
274 (0, 0).offset(start_loc))),
275 None =>
276 return Error(ParseError::Char(
277 Box::new(ParseError::UnexpectedEof),
278 (0, 0).offset(start_loc))),
279 }
280
281 match input.chars().nth(1) {
282 Some('\\') => (),
283 Some(c) =>
284 return Error(ParseError::Char(
285 Box::new(ParseError::Unexpected(c, start_loc + 1)),
286 (1, 1).offset(start_loc))),
287 None =>
288 return Error(ParseError::Char(
289 Box::new(ParseError::UnexpectedEof),
290 (1, 1).offset(start_loc)))
291 }
292
293 match input.chars().nth(2) {
294 Some(c) =>
295 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
296 None =>
297 Error(ParseError::Char(
298 Box::new(ParseError::UnexpectedEof),
299 (2, 2).offset(start_loc)))
300 }
301 }
302
303 \f
304 // Tests ///////////////////////////////////////////////////////////////////////
305
306 #[cfg(test)]
307 mod test {
308 use sexp::Sexp;
309 use span::Span;
310 use parser::*;
311 use parser::ParseResult::*;
312
313 #[test]
314 fn test_parse() {
315 assert_eq!(parse("1 2 3"), (vec![
316 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
317 ], None));
318 assert_eq!(parse("1 2 )"), (vec![
319 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
320 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
321 }
322
323 #[test]
324 fn test_parse_one() {
325 assert_eq!(parse_one("1 2"),
326 Ok((Sexp::Int(1, (0, 1)), " 2")));
327 }
328
329 #[test]
330 fn test_parse_expression() {
331 assert_eq!(parse_expression(" 1", 0),
332 Done("", Sexp::Int(1, (1, 2))));
333 assert_eq!(parse_expression("2.2", 0),
334 Done("", Sexp::Float(2.2, (0, 3))));
335 assert_eq!(parse_expression(" a", 0),
336 Done("", Sexp::Sym("a".into(), (1, 2))));
337 assert_eq!(parse_expression("#\\c", 0),
338 Done("", Sexp::Char('c', (0, 3))));
339 assert_eq!(parse_expression(r#""hi""#, 0),
340 Done("", Sexp::Str("hi".into(), (0, 4))));
341 assert_eq!(parse_expression("()", 0),
342 Done("", Sexp::List(vec![], (0, 2))));
343 assert_eq!(parse_expression("( 1 2 3 )", 0),
344 Done("", Sexp::List(vec![
345 Sexp::Int(1, (2, 3)),
346 Sexp::Int(2, (4, 5)),
347 Sexp::Int(3, (6, 7)),
348 ], (0, 9))));
349
350 assert_eq!(parse_expression("", 0),
351 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
352 }
353
354 #[test]
355 fn test_parse_list() {
356 assert_eq!(parse_list("()", 0),
357 Done("", Sexp::List(vec![], (0, 2))));
358 assert_eq!(parse_list("(1)", 0),
359 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
360 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
361 Sexp::Int(1, (4, 5)),
362 Sexp::Int(2, (9, 10)),
363 Sexp::Int(3, (12, 13)),
364 Sexp::Sym("a".into(), (14, 15)),
365 ], (2, 17))));
366 }
367
368 #[test]
369 fn test_parse_number() {
370 assert_eq!(parse_number("1", 0),
371 Done("", Sexp::Int(1, (0, 1))));
372 assert_eq!(parse_number(" 13", 0),
373 Done("", Sexp::Int(13, (1, 3))));
374 assert_eq!(parse_number("1.2", 0),
375 Done("", Sexp::Float(1.2, (0, 3))));
376 assert_eq!(parse_number("\u{3000}4.2", 0),
377 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
378 assert_eq!(parse_number(" 42 ", 0),
379 Done(" ", Sexp::Int(42, (2, 4))));
380 assert_eq!(parse_number(" 4.2 ", 0),
381 Done(" ", Sexp::Float(4.2, (1, 4))));
382 assert_eq!(parse_number("1()", 0),
383 Done("()", Sexp::Int(1, (0, 1))));
384 assert_eq!(parse_number("3.6()", 0),
385 Done("()", Sexp::Float(3.6, (0, 3))));
386
387 assert_eq!(parse_number("", 0),
388 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
389 assert_eq!(parse_number("123a", 0),
390 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
391 assert_eq!(parse_number("66.6+", 0),
392 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
393 }
394
395 #[test]
396 fn test_parse_ident() {
397 assert_eq!(parse_symbol("+", 0),
398 Done("", Sexp::Sym("+".into(), (0, 1))));
399 assert_eq!(parse_symbol(" nil?", 0),
400 Done("", Sexp::Sym("nil?".into(), (1, 5))));
401 assert_eq!(parse_symbol(" ->socket", 0),
402 Done("", Sexp::Sym("->socket".into(), (1, 9))));
403 assert_eq!(parse_symbol("fib(", 0),
404 Done("(", Sexp::Sym("fib".into(), (0, 3))));
405 assert_eq!(parse_symbol("foo2", 0),
406 Done("", Sexp::Sym("foo2".into(), (0, 4))));
407
408 // We reserve #foo for the implementation to do as it wishes
409 assert_eq!(parse_symbol("#hi", 0),
410 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
411 // We reserve :foo for keywords
412 assert_eq!(parse_symbol(":hi", 0),
413 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
414
415 assert_eq!(parse_symbol("", 0),
416 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
417 assert_eq!(parse_symbol("0", 0),
418 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
419 assert_eq!(parse_symbol("()", 0),
420 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
421 }
422
423 #[test]
424 fn test_parse_string() {
425 assert_eq!(parse_string(r#""""#, 0),
426 Done("", Sexp::Str("".into(), (0, 2))));
427 assert_eq!(parse_string(r#""hello""#, 0),
428 Done("", Sexp::Str("hello".into(), (0, 7))));
429 assert_eq!(parse_string(r#" "this is a nice string
430 with 0123 things in it""#, 0),
431 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
432
433 assert_eq!(parse_string("", 0),
434 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
435 assert_eq!(parse_string(r#""hi"#, 0),
436 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
437 }
438
439 #[test]
440 fn test_parse_char() {
441 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
442 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
443 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
444
445 assert_eq!(parse_character("", 0),
446 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
447 assert_eq!(parse_character("#", 0),
448 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
449 assert_eq!(parse_character("#\\", 0),
450 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
451 assert_eq!(parse_character("a", 0),
452 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
453 }
454 }