]> Witch of Git - ess/blob - src/lib.rs
Make it possible to take ownership of the strings in a Sexp
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 use std::borrow::Cow;
7
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
10 /// purposes.
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp<'a, Loc=ByteSpan> where Loc: Span {
13 /// A value representing a symbol.
14 Sym(Cow<'a, str>, Loc),
15 /// A value representing a string literal.
16 Str(Cow<'a, str>, Loc),
17 /// A value representing a single character.
18 Char(char, Loc),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64, Loc),
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
24 Float(f64, Loc),
25 /// A list of subexpressions.
26 List(Vec<Sexp<'a, Loc>>, Loc),
27 }
28
29 impl<'a, Loc> Sexp<'a, Loc> where Loc: Span {
30 pub fn get_loc(&self) -> &Loc {
31 match *self {
32 Sexp::Sym(.., ref l) | Sexp::Str(.., ref l) |
33 Sexp::Char(.., ref l) | Sexp::Int(.., ref l) |
34 Sexp::Float(.., ref l) | Sexp::List(.., ref l) => l,
35 }
36 }
37
38 pub fn get_loc_mut(&mut self) -> &mut Loc {
39 match *self {
40 Sexp::Sym(.., ref mut l) | Sexp::Str(.., ref mut l) |
41 Sexp::Char(.., ref mut l) | Sexp::Int(.., ref mut l) |
42 Sexp::Float(.., ref mut l) | Sexp::List(.., ref mut l) => l,
43 }
44 }
45 }
46
47 fn extend_cow<'a, T: ?Sized>(cow: &Cow<'a, T>) -> Cow<'static, T>
48 where T: ToOwned
49 {
50 Cow::Owned(cow.clone().into_owned())
51 }
52
53 impl<'a, Loc> Sexp<'a, Loc> where Loc: Span + Clone {
54 pub fn to_owned(&self) -> Sexp<'static, Loc> {
55 match *self {
56 Sexp::Sym(ref s, ref l) => Sexp::Sym(extend_cow(s), l.clone()),
57 Sexp::Str(ref s, ref l) => Sexp::Str(extend_cow(s), l.clone()),
58 Sexp::Char(c, ref l) => Sexp::Char(c, l.clone()),
59 Sexp::Int(i, ref l) => Sexp::Int(i, l.clone()),
60 Sexp::Float(f, ref l) => Sexp::Float(f, l.clone()),
61 Sexp::List(ref xs, ref l) =>
62 Sexp::List(xs.iter().map(Sexp::to_owned).collect(),
63 l.clone()),
64 }
65 }
66 }
67
68 \f
69 // General Parsing Types ///////////////////////////////////////////////////////
70
71 pub trait Span {
72 type Begin;
73
74 fn offset(&self, begin: Self::Begin) -> Self;
75 fn begin(&self) -> Self::Begin;
76 fn union(&self, other: &Self) -> Self;
77 }
78
79 #[derive(Debug, PartialEq, Eq, Clone)]
80 pub enum ParseResult<'a, T, E> {
81 Done(&'a str, T),
82 Error(E),
83 }
84
85 use ParseResult::*;
86
87 \f
88 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
89
90 /// Indicates how parsing failed.
91 #[derive(Debug, PartialEq, Eq, Clone)]
92 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
93 UnexpectedEof,
94 List(Box<ParseError>, Loc),
95 Sexp(Box<ParseError>, Loc),
96 Char(Box<ParseError>, Loc),
97 String(Box<ParseError>, Loc),
98 Symbol(Box<ParseError>, Loc),
99 Number(Box<ParseError>, Loc),
100 Unexpected(char, Loc::Begin),
101 Unimplemented,
102 }
103
104 type ByteSpan = (usize, usize);
105
106 impl Span for ByteSpan {
107 type Begin = usize;
108
109 fn offset(&self, begin: Self::Begin) -> Self {
110 (self.0 + begin, self.1 + begin)
111 }
112
113 fn begin(&self) -> Self::Begin {
114 self.0
115 }
116
117 fn union(&self, other: &Self) -> Self {
118 use std::cmp::{min, max};
119 (min(self.0, other.0), max(self.1, other.1))
120 }
121 }
122
123
124 \f
125 // Parsing Utilities ///////////////////////////////////////////////////////////
126
127 trait IsDelimeter {
128 fn is_delimiter(&self) -> bool;
129 }
130
131 impl IsDelimeter for char {
132 fn is_delimiter(&self) -> bool {
133 self.is_whitespace() || *self == ';'
134 || *self == '(' || *self == ')'
135 || *self == '[' || *self == ']'
136 || *self == '{' || *self == '}'
137 || *self == '"' || *self == '\''
138 || *self == '`' || *self == ','
139 }
140 }
141
142 macro_rules! consume_whitespace {
143 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
144 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
145 (&$input[pos..], $start_loc + pos)
146 } else {
147 return Error($ErrorFn(
148 Box::new(ParseError::UnexpectedEof),
149 ($input.len(), $input.len()).offset($start_loc)));
150 }
151 }
152 }
153
154 \f
155 // Top Level Parsers ///////////////////////////////////////////////////////////
156
157 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
158 match parse_sexp(input, 0) {
159 Done(rest, result) => Ok((result, rest)),
160 Error(err) => Err(err),
161 }
162 }
163
164 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
165 let mut start_loc = 0;
166 let mut results = Vec::new();
167 loop {
168 match parse_sexp(input, start_loc) {
169 Done(rest, result) => {
170 input = rest;
171 start_loc = result.get_loc().1;
172 results.push(result);
173 if rest.trim() == "" {
174 return (results, None);
175 }
176 }
177 Error(err) => {
178 return (results, Some(err));
179 }
180 }
181 }
182 }
183
184 \f
185 // Core Parsers ////////////////////////////////////////////////////////////////
186
187 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
188 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
189
190 match input.chars().next() {
191 Some('0'...'9') => parse_number(input, start_loc),
192 Some('(') => parse_list(input, start_loc),
193 Some('#') => parse_character(input, start_loc),
194 Some('"') => parse_string(input, start_loc),
195 Some(_) => parse_symbol(input, start_loc),
196 None => unreachable!(),
197 }
198 }
199
200 pub fn parse_list(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
201 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::List);
202
203 match input.chars().nth(0) {
204 Some('(') => (),
205 Some(c) =>
206 return Error(ParseError::List(
207 Box::new(ParseError::Unexpected(c, 0)),
208 (0, 0).offset(start_loc))),
209 None => unreachable!(),
210 }
211
212 let mut input = &input[1..];
213 let mut loc = start_loc + 1;
214 let mut members = Vec::new();
215 loop {
216 {
217 let (new_input, new_loc) = consume_whitespace!(input, loc, ParseError::List);
218 input = new_input;
219 loc = new_loc;
220 }
221
222 match input.chars().nth(0) {
223 Some(')') =>
224 return Done(&input[1..],
225 Sexp::List(members, (start_loc, loc+1))),
226 Some(_) => (),
227 None => unreachable!(),
228 }
229
230 match parse_sexp(input, loc) {
231 Done(new_input, member) => {
232 loc = member.get_loc().1;
233 members.push(member);
234 input = new_input;
235 }
236 Error(err) =>
237 return Error(ParseError::List(
238 Box::new(err),
239 (0, 0).offset(loc)))
240 }
241 }
242 }
243
244 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
245 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Number);
246
247 match input.chars().next() {
248 Some(c) if !c.is_digit(10) => {
249 return Error(ParseError::Number(
250 Box::new(ParseError::Unexpected(c, start_loc)),
251 (0, c.len_utf8()).offset(start_loc)));
252 }
253 None => return Error(ParseError::Number(
254 Box::new(ParseError::UnexpectedEof),
255 (0, 0).offset(start_loc))),
256 _ => (),
257 }
258
259 let base = 10;
260
261 let mut end = 0;
262 // Before the decimal point
263 for (i, c) in input.char_indices() {
264 if c == '.' {
265 end = i + 1;
266 break;
267 }
268
269 if c.is_delimiter() {
270 return Done(&input[i..],
271 Sexp::Int(input[..i].parse().expect("Already matched digits"),
272 (0, i).offset(start_loc)));
273 }
274
275 if !c.is_digit(base) {
276 return Error(ParseError::Number(
277 Box::new(ParseError::Unexpected(c, start_loc + i)),
278 (i, i).offset(start_loc)));
279 }
280
281 end = i + c.len_utf8();
282 }
283
284 if input[end..].is_empty() {
285 return Done(&input[end..],
286 Sexp::Int(input.parse().expect("Already matched digits"),
287 (0, end).offset(start_loc)));
288 }
289
290 // After the decimal point
291 for (i, c) in input[end..].char_indices() {
292 if c.is_delimiter() {
293 return Done(&input[i+end..],
294 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
295 (0, end+i).offset(start_loc)));
296 }
297
298 if !c.is_digit(base) {
299 return Error(ParseError::Number(
300 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
301 (i+end, i+end).offset(start_loc)));
302 }
303 }
304
305 Done(&input[input.len()..],
306 Sexp::Float(input.parse().expect("Already matched digits.digits"),
307 (0, input.len()).offset(start_loc)))
308 }
309
310 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
311 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Symbol);
312
313 match input.chars().next() {
314 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
315 return Error(ParseError::Symbol(
316 Box::new(ParseError::Unexpected(c, start_loc)),
317 (0, 0).offset(start_loc))),
318 Some(c) if c.is_delimiter() =>
319 return Error(ParseError::Symbol(
320 Box::new(ParseError::Unexpected(c, start_loc)),
321 (0, 0).offset(start_loc))),
322 Some(_) => (),
323 None => unreachable!(),
324 }
325
326 for (i, c) in input.char_indices() {
327 if c.is_delimiter() {
328 return Done(&input[i..],
329 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
330 }
331 }
332
333 Done(&input[input.len()..],
334 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
335 }
336
337 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
338 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::String);
339
340 match input.chars().next() {
341 Some('"') => (),
342 Some(c) =>
343 return Error(ParseError::String(
344 Box::new(ParseError::Unexpected(c, start_loc)),
345 (0, 0).offset(start_loc))),
346 None => unreachable!(),
347 }
348
349 for (i, c) in input[1..].char_indices() {
350 if c == '"' {
351 return Done(&input[2+i..],
352 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
353 }
354 }
355
356 Error(ParseError::String(
357 Box::new(ParseError::UnexpectedEof),
358 (0, input.len()).offset(start_loc)))
359 }
360
361 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
362 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Char);
363
364 match input.chars().nth(0) {
365 Some('#') => (),
366 Some(c) =>
367 return Error(ParseError::Char(
368 Box::new(ParseError::Unexpected(c, start_loc)),
369 (0, 0).offset(start_loc))),
370 None =>
371 return Error(ParseError::Char(
372 Box::new(ParseError::UnexpectedEof),
373 (0, 0).offset(start_loc))),
374 }
375
376 match input.chars().nth(1) {
377 Some('\\') => (),
378 Some(c) =>
379 return Error(ParseError::Char(
380 Box::new(ParseError::Unexpected(c, start_loc + 1)),
381 (1, 1).offset(start_loc))),
382 None =>
383 return Error(ParseError::Char(
384 Box::new(ParseError::UnexpectedEof),
385 (1, 1).offset(start_loc)))
386 }
387
388 match input.chars().nth(2) {
389 Some(c) =>
390 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
391 None =>
392 Error(ParseError::Char(
393 Box::new(ParseError::UnexpectedEof),
394 (2, 2).offset(start_loc)))
395 }
396 }
397
398 \f
399 // Tests ///////////////////////////////////////////////////////////////////////
400
401 #[cfg(test)]
402 mod test {
403 use super::*;
404 use super::ParseResult::*;
405
406 #[test]
407 fn test_parse() {
408 assert_eq!(parse("1 2 3"), (vec![
409 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3)), Sexp::Int(3, (4, 5))
410 ], None));
411 assert_eq!(parse("1 2 )"), (vec![
412 Sexp::Int(1, (0, 1)), Sexp::Int(2, (2, 3))
413 ], Some(ParseError::Symbol(Box::new(ParseError::Unexpected(')', 4)), (4, 4)))));
414 }
415
416 #[test]
417 fn test_parse_one() {
418 assert_eq!(parse_one("1 2"),
419 Ok((Sexp::Int(1, (0, 1)), " 2")));
420 }
421
422 #[test]
423 fn test_parse_sexp() {
424 assert_eq!(parse_sexp(" 1", 0),
425 Done("", Sexp::Int(1, (1, 2))));
426 assert_eq!(parse_sexp("2.2", 0),
427 Done("", Sexp::Float(2.2, (0, 3))));
428 assert_eq!(parse_sexp(" a", 0),
429 Done("", Sexp::Sym("a".into(), (1, 2))));
430 assert_eq!(parse_sexp("#\\c", 0),
431 Done("", Sexp::Char('c', (0, 3))));
432 assert_eq!(parse_sexp(r#""hi""#, 0),
433 Done("", Sexp::Str("hi".into(), (0, 4))));
434 assert_eq!(parse_sexp("()", 0),
435 Done("", Sexp::List(vec![], (0, 2))));
436 assert_eq!(parse_sexp("( 1 2 3 )", 0),
437 Done("", Sexp::List(vec![
438 Sexp::Int(1, (2, 3)),
439 Sexp::Int(2, (4, 5)),
440 Sexp::Int(3, (6, 7)),
441 ], (0, 9))));
442
443 assert_eq!(parse_sexp("", 0),
444 Error(ParseError::Sexp(Box::new(ParseError::UnexpectedEof), (0, 0))));
445 }
446
447 #[test]
448 fn test_parse_list() {
449 assert_eq!(parse_list("()", 0),
450 Done("", Sexp::List(vec![], (0, 2))));
451 assert_eq!(parse_list("(1)", 0),
452 Done("", Sexp::List(vec![Sexp::Int(1, (1, 2))], (0, 3))));
453 assert_eq!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp::List(vec![
454 Sexp::Int(1, (4, 5)),
455 Sexp::Int(2, (9, 10)),
456 Sexp::Int(3, (12, 13)),
457 Sexp::Sym("a".into(), (14, 15)),
458 ], (2, 17))));
459 }
460
461 #[test]
462 fn test_parse_number() {
463 assert_eq!(parse_number("1", 0),
464 Done("", Sexp::Int(1, (0, 1))));
465 assert_eq!(parse_number(" 13", 0),
466 Done("", Sexp::Int(13, (1, 3))));
467 assert_eq!(parse_number("1.2", 0),
468 Done("", Sexp::Float(1.2, (0, 3))));
469 assert_eq!(parse_number("\u{3000}4.2", 0),
470 Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
471 assert_eq!(parse_number(" 42 ", 0),
472 Done(" ", Sexp::Int(42, (2, 4))));
473 assert_eq!(parse_number(" 4.2 ", 0),
474 Done(" ", Sexp::Float(4.2, (1, 4))));
475 assert_eq!(parse_number("1()", 0),
476 Done("()", Sexp::Int(1, (0, 1))));
477 assert_eq!(parse_number("3.6()", 0),
478 Done("()", Sexp::Float(3.6, (0, 3))));
479
480 assert_eq!(parse_number("", 0),
481 Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
482 assert_eq!(parse_number("123a", 0),
483 Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
484 assert_eq!(parse_number("66.6+", 0),
485 Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
486 }
487
488 #[test]
489 fn test_parse_ident() {
490 assert_eq!(parse_symbol("+", 0),
491 Done("", Sexp::Sym("+".into(), (0, 1))));
492 assert_eq!(parse_symbol(" nil?", 0),
493 Done("", Sexp::Sym("nil?".into(), (1, 5))));
494 assert_eq!(parse_symbol(" ->socket", 0),
495 Done("", Sexp::Sym("->socket".into(), (1, 9))));
496 assert_eq!(parse_symbol("fib(", 0),
497 Done("(", Sexp::Sym("fib".into(), (0, 3))));
498 assert_eq!(parse_symbol("foo2", 0),
499 Done("", Sexp::Sym("foo2".into(), (0, 4))));
500
501 // We reserve #foo for the implementation to do as it wishes
502 assert_eq!(parse_symbol("#hi", 0),
503 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
504 // We reserve :foo for keywords
505 assert_eq!(parse_symbol(":hi", 0),
506 Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
507
508 assert_eq!(parse_symbol("", 0),
509 Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
510 assert_eq!(parse_symbol("0", 0),
511 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
512 assert_eq!(parse_symbol("()", 0),
513 Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
514 }
515
516 #[test]
517 fn test_parse_string() {
518 assert_eq!(parse_string(r#""""#, 0),
519 Done("", Sexp::Str("".into(), (0, 2))));
520 assert_eq!(parse_string(r#""hello""#, 0),
521 Done("", Sexp::Str("hello".into(), (0, 7))));
522 assert_eq!(parse_string(r#" "this is a nice string
523 with 0123 things in it""#, 0),
524 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
525
526 assert_eq!(parse_string("", 0),
527 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 0))));
528 assert_eq!(parse_string(r#""hi"#, 0),
529 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
530 }
531
532 #[test]
533 fn test_parse_char() {
534 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
535 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
536 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
537
538 assert_eq!(parse_character("", 0),
539 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
540 assert_eq!(parse_character("#", 0),
541 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
542 assert_eq!(parse_character("#\\", 0),
543 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
544 assert_eq!(parse_character("a", 0),
545 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
546 }
547 }