]> Witch of Git - ess/blob - src/lib.rs
Change test case formatting
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 use std::borrow::Cow;
7
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
10 /// purposes.
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp<'a, Loc=ByteSpan> where Loc: Span {
13 /// A value representing a symbol.
14 Sym(Cow<'a, str>, Loc),
15 /// A value representing a string literal.
16 Str(Cow<'a, str>, Loc),
17 /// A value representing a single character.
18 Char(char, Loc),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64, Loc),
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
24 Float(f64, Loc),
25 /// A list of subexpressions.
26 List(Vec<Sexp<'a, Loc>>, Loc),
27 }
28
29 impl<'a, Loc> Sexp<'a, Loc> where Loc: Span {
30 pub fn get_loc(&self) -> &Loc {
31 match *self {
32 Sexp::Sym(.., ref l) | Sexp::Str(.., ref l) |
33 Sexp::Char(.., ref l) | Sexp::Int(.., ref l) |
34 Sexp::Float(.., ref l) | Sexp::List(.., ref l) => l,
35 }
36 }
37
38 pub fn get_loc_mut(&mut self) -> &mut Loc {
39 match *self {
40 Sexp::Sym(.., ref mut l) | Sexp::Str(.., ref mut l) |
41 Sexp::Char(.., ref mut l) | Sexp::Int(.., ref mut l) |
42 Sexp::Float(.., ref mut l) | Sexp::List(.., ref mut l) => l,
43 }
44 }
45 }
46
47 \f
48 // General Parsing Types ///////////////////////////////////////////////////////
49
50 pub trait Span {
51 type Begin;
52
53 fn offset(&self, begin: Self::Begin) -> Self;
54 fn begin(&self) -> Self::Begin;
55 fn union(&self, other: &Self) -> Self;
56 }
57
58 #[derive(Debug, PartialEq, Eq, Clone)]
59 pub enum ParseResult<'a, T, E> {
60 Done(&'a str, T),
61 Error(E),
62 }
63
64 use ParseResult::*;
65
66 \f
67 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
68
69 /// Indicates how parsing failed.
70 #[derive(Debug, PartialEq, Eq, Clone)]
71 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
72 UnexpectedEof,
73 List(Box<ParseError>, Loc),
74 Sexp(Box<ParseError>, Loc),
75 Char(Box<ParseError>, Loc),
76 String(Box<ParseError>, Loc),
77 Symbol(Box<ParseError>, Loc),
78 Number(Box<ParseError>, Loc),
79 Unexpected(char, Loc::Begin),
80 Unimplemented,
81 }
82
83 type ByteSpan = (usize, usize);
84
85 impl Span for ByteSpan {
86 type Begin = usize;
87
88 fn offset(&self, begin: Self::Begin) -> Self {
89 (self.0 + begin, self.1 + begin)
90 }
91
92 fn begin(&self) -> Self::Begin {
93 self.0
94 }
95
96 fn union(&self, other: &Self) -> Self {
97 use std::cmp::{min, max};
98 (min(self.0, other.0), max(self.1, other.1))
99 }
100 }
101
102
103 \f
104 // Parsing Utilities ///////////////////////////////////////////////////////////
105
106 trait IsDelimeter {
107 fn is_delimiter(&self) -> bool;
108 }
109
110 impl IsDelimeter for char {
111 fn is_delimiter(&self) -> bool {
112 self.is_whitespace() || *self == ';'
113 || *self == '(' || *self == ')'
114 || *self == '[' || *self == ']'
115 || *self == '{' || *self == '}'
116 || *self == '"' || *self == '\''
117 || *self == '`' || *self == ','
118 }
119 }
120
121 macro_rules! consume_whitespace {
122 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
123 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
124 (&$input[pos..], $start_loc + pos)
125 } else {
126 return Error($ErrorFn(
127 Box::new(ParseError::UnexpectedEof),
128 ($input.len(), $input.len()).offset($start_loc)));
129 }
130 }
131 }
132
133 \f
134 // Top Level Parsers ///////////////////////////////////////////////////////////
135
136 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
137 match parse_sexp(input, 0) {
138 Done(rest, result) => Ok((result, rest)),
139 Error(err) => Err(err),
140 }
141 }
142
143 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
144 let mut start_loc = 0;
145 let mut results = Vec::new();
146 loop {
147 match parse_sexp(input, start_loc) {
148 Done(rest, result) => {
149 input = rest;
150 start_loc = result.get_loc().1;
151 results.push(result);
152 if rest.trim() == "" {
153 return (results, None);
154 }
155 }
156 Error(err) => {
157 return (results, Some(err));
158 }
159 }
160 }
161 }
162
163 \f
164 // Core Parsers ////////////////////////////////////////////////////////////////
165
166 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
167 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
168
169 match input.chars().next() {
170 Some('0'...'9') => parse_number(input, start_loc),
171 Some('(') => parse_list(input, start_loc),
172 Some('#') => parse_character(input, start_loc),
173 Some('"') => parse_string(input, start_loc),
174 Some(_) => parse_symbol(input, start_loc),
175 None => unreachable!(),
176 }
177 }
178
179 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
180 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
181
182 match input.chars().nth(0) {
183 Some('(') => (),
184 Some(c) =>
185 return Error(ParseError::List(
186 Box::new(ParseError::Unexpected(c, 0)),
187 (0, 0).offset(start_loc))),
188 None => unreachable!(),
189 }
190
191 let mut input = &input[1..];
192 let mut loc = start_loc + 1;
193 let mut members = Vec::new();
194 println!("!{}", loc);
195 loop {
196 {
197 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
198 input = new_input;
199 loc = new_loc;
200 println!("{}", loc);
201 }
202
203 match input.chars().nth(0) {
204 Some(')') =>
205 return Done(&input[1..],
206 Sexp::List(members, (start_loc, loc+1))),
207 Some(_) => (),
208 None => unreachable!(),
209 }
210
211 match parse_sexp(input, loc) {
212 Done(new_input, member) => {
213 loc = member.get_loc().1;
214 members.push(member);
215 input = new_input;
216 }
217 Error(err) =>
218 return Error(ParseError::List(
219 Box::new(err),
220 (0, 0).offset(loc)))
221 }
222 }
223 }
224
225 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
226 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
227
228 match input.chars().next() {
229 Some(c) if !c.is_digit(10) => {
230 return Error(ParseError::Number(
231 Box::new(ParseError::Unexpected(c, start_loc)),
232 (0, c.len_utf8()).offset(start_loc)));
233 }
234 None => return Error(ParseError::Number(
235 Box::new(ParseError::UnexpectedEof),
236 (0, 0).offset(start_loc))),
237 _ => (),
238 }
239
240 let base = 10;
241
242 let mut end = 0;
243 // Before the decimal point
244 for (i, c) in input.char_indices() {
245 if c == '.' {
246 end = i + 1;
247 break;
248 }
249
250 if c.is_delimiter() {
251 return Done(&input[i..],
252 Sexp::Int(input[..i].parse().expect("Already matched digits"),
253 (0, i).offset(start_loc)));
254 }
255
256 if !c.is_digit(base) {
257 return Error(ParseError::Number(
258 Box::new(ParseError::Unexpected(c, start_loc + i)),
259 (i, i).offset(start_loc)));
260 }
261
262 end = i + c.len_utf8();
263 }
264
265 if input[end..].is_empty() {
266 return Done(&input[end..],
267 Sexp::Int(input.parse().expect("Already matched digits"),
268 (0, end).offset(start_loc)));
269 }
270
271 // After the decimal point
272 for (i, c) in input[end..].char_indices() {
273 if c.is_delimiter() {
274 return Done(&input[i+end..],
275 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
276 (0, end+i).offset(start_loc)));
277 }
278
279 if !c.is_digit(base) {
280 return Error(ParseError::Number(
281 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
282 (i+end, i+end).offset(start_loc)));
283 }
284 }
285
286 Done(&input[input.len()..],
287 Sexp::Float(input.parse().expect("Already matched digits.digits"),
288 (0, input.len()).offset(start_loc)))
289 }
290
291 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
292 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
293
294 match input.chars().next() {
295 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
296 return Error(ParseError::Symbol(
297 Box::new(ParseError::Unexpected(c, start_loc)),
298 (0, 0).offset(start_loc))),
299 Some(c) if c.is_delimiter() =>
300 return Error(ParseError::Symbol(
301 Box::new(ParseError::Unexpected(c, start_loc)),
302 (0, 0).offset(start_loc))),
303 Some(_) => (),
304 None => unreachable!(),
305 }
306
307 for (i, c) in input.char_indices() {
308 if c.is_delimiter() {
309 return Done(&input[i..],
310 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
311 }
312 }
313
314 Done(&input[input.len()..],
315 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
316 }
317
318 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
319 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
320
321 match input.chars().next() {
322 Some('"') => (),
323 Some(c) =>
324 return Error(ParseError::String(
325 Box::new(ParseError::Unexpected(c, start_loc)),
326 (0, 0).offset(start_loc))),
327 None => unreachable!(),
328 }
329
330 for (i, c) in input[1..].char_indices() {
331 if c == '"' {
332 return Done(&input[2+i..],
333 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
334 }
335 }
336
337 Error(ParseError::String(
338 Box::new(ParseError::UnexpectedEof),
339 (0, input.len()).offset(start_loc)))
340 }
341
342 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
343 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
344
345 match input.chars().nth(0) {
346 Some('#') => (),
347 Some(c) =>
348 return Error(ParseError::Char(
349 Box::new(ParseError::Unexpected(c, start_loc)),
350 (0, 0).offset(start_loc))),
351 None =>
352 return Error(ParseError::Char(
353 Box::new(ParseError::UnexpectedEof),
354 (0, 0).offset(start_loc))),
355 }
356
357 match input.chars().nth(1) {
358 Some('\\') => (),
359 Some(c) =>
360 return Error(ParseError::Char(
361 Box::new(ParseError::Unexpected(c, start_loc + 1)),
362 (1, 1).offset(start_loc))),
363 None =>
364 return Error(ParseError::Char(
365 Box::new(ParseError::UnexpectedEof),
366 (1, 1).offset(start_loc)))
367 }
368
369 match input.chars().nth(2) {
370 Some(c) =>
371 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
372 None =>
373 Error(ParseError::Char(
374 Box::new(ParseError::UnexpectedEof),
375 (2, 2).offset(start_loc)))
376 }
377 }
378
379 \f
380 // Tests ///////////////////////////////////////////////////////////////////////
381
382 #[cfg(test)]
383 mod test {
384 use super::*;
385 use super::ParseResult::*;
386
387 #[test]
388 fn test_parse() {
389 assert_eq!(parse("1 2 3"), (vec![
390 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
391 ], None));
392 assert_eq!(parse("1 2 )"), (vec![
393 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
394 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
395 }
396
397 #[test]
398 fn test_parse_one() {
399 assert_eq!(parse_one("1 2"),
400 Ok((Sexp::Int(1, (0, 1)), " 2")));
401 }
402
403 #[test]
404 fn test_parse_sexp() {
405 assert_eq!(parse_sexp(" 1", 0),
406 Done("", Sexp::Int(1, (1, 2))));
407 assert_eq!(parse_sexp("2.2", 0),
408 Done("", Sexp::Float(2.2, (0, 3))));
409 assert_eq!(parse_sexp(" a", 0),
410 Done("", Sexp::Sym("a".into(), (1, 2))));
411 assert_eq!(parse_sexp("#\\c", 0),
412 Done("", Sexp::Char('c', (0, 3))));
413 assert_eq!(parse_sexp(r#""hi""#, 0),
414 Done("", Sexp::Str("hi".into(), (0, 4))));
415 assert_eq!(parse_sexp("()", 0),
416 Done("", Sexp::List(vec![], (0, 2))));
417 assert_eq!(parse_sexp("( 1 2 3 )", 0),
418 Done("", Sexp::List(vec![
419 Sexp::Int(1, (2, 3)),
420 Sexp::Int(2, (4, 5)),
421 Sexp::Int(3, (6, 7)),
422 ], (0, 9))));
423
424 assert_eq!(parse_sexp("", 0),
425 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
426 }
427
428 #[test]
429 fn test_parse_list() {
430 assert_eq!(parse_list("()", 0),
431 Done("", Sexp::List(vec![], (0, 2))));
432 assert_eq!(parse_list("(1)", 0),
433 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
434 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
435 Sexp::Int(1, (4, 5)),
436 Sexp::Int(2, (9, 10)),
437 Sexp::Int(3, (12, 13)),
438 Sexp::Sym("a".into(), (14, 15)),
439 ], (2, 17))));
440 }
441
442 #[test]
443 fn test_parse_number() {
444 assert_eq!(parse_number("1", 0),
445 Done("", Sexp::Int(1, (0, 1))));
446 assert_eq!(parse_number(" 13", 0),
447 Done("", Sexp::Int(13, (1, 3))));
448 assert_eq!(parse_number("1.2", 0),
449 Done("", Sexp::Float(1.2, (0, 3))));
450 assert_eq!(parse_number("\u{3000}4.2", 0),
451 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
452 assert_eq!(parse_number(" 42 ", 0),
453 Done(" ", Sexp::Int(42, (2, 4))));
454 assert_eq!(parse_number(" 4.2 ", 0),
455 Done(" ", Sexp::Float(4.2, (1, 4))));
456 assert_eq!(parse_number("1()", 0),
457 Done("()", Sexp::Int(1, (0, 1))));
458 assert_eq!(parse_number("3.6()", 0),
459 Done("()", Sexp::Float(3.6, (0, 3))));
460
461 assert_eq!(parse_number("", 0),
462 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
463 assert_eq!(parse_number("123a", 0),
464 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
465 assert_eq!(parse_number("66.6+", 0),
466 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
467 }
468
469 #[test]
470 fn test_parse_ident() {
471 assert_eq!(parse_symbol("+", 0),
472 Done("", Sexp::Sym("+".into(), (0, 1))));
473 assert_eq!(parse_symbol(" nil?", 0),
474 Done("", Sexp::Sym("nil?".into(), (1, 5))));
475 assert_eq!(parse_symbol(" ->socket", 0),
476 Done("", Sexp::Sym("->socket".into(), (1, 9))));
477 assert_eq!(parse_symbol("fib(", 0),
478 Done("(", Sexp::Sym("fib".into(), (0, 3))));
479 assert_eq!(parse_symbol("foo2", 0),
480 Done("", Sexp::Sym("foo2".into(), (0, 4))));
481
482 // We reserve #foo for the implementation to do as it wishes
483 assert_eq!(parse_symbol("#hi", 0),
484 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
485 // We reserve :foo for keywords
486 assert_eq!(parse_symbol(":hi", 0),
487 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
488
489 assert_eq!(parse_symbol("", 0),
490 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
491 assert_eq!(parse_symbol("0", 0),
492 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
493 assert_eq!(parse_symbol("()", 0),
494 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
495 }
496
497 #[test]
498 fn test_parse_string() {
499 assert_eq!(parse_string(r#""""#, 0),
500 Done("", Sexp::Str("".into(), (0, 2))));
501 assert_eq!(parse_string(r#""hello""#, 0),
502 Done("", Sexp::Str("hello".into(), (0, 7))));
503 assert_eq!(parse_string(r#" "this is a nice string
504 with 0123 things in it""#, 0),
505 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
506
507 assert_eq!(parse_string("", 0),
508 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
509 assert_eq!(parse_string(r#""hi"#, 0),
510 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
511 }
512
513 #[test]
514 fn test_parse_char() {
515 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
516 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
517 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
518
519 assert_eq!(parse_character("", 0),
520 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
521 assert_eq!(parse_character("#", 0),
522 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
523 assert_eq!(parse_character("#\\", 0),
524 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
525 assert_eq!(parse_character("a", 0),
526 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
527 }
528 }