]>
Witch of Git - ess/blob - src/lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
3 // #![warn(missing_docs)]
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp
<'a
, Loc
=ByteSpan
> where Loc
: Span
{
13 /// A value representing a symbol.
14 Sym(Cow
<'a
, str>, Loc
),
15 /// A value representing a string literal.
16 Str(Cow
<'a
, str>, Loc
),
17 /// A value representing a single character.
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
25 /// A list of subexpressions.
26 List(Vec
<Sexp
<'a
, Loc
>>, Loc
),
29 impl<'a
, Loc
> Sexp
<'a
, Loc
> where Loc
: Span
{
30 pub fn get_loc(&self) -> &Loc
{
32 Sexp
::Sym(.., ref l
) | Sexp
::Str(.., ref l
) |
33 Sexp
::Char(.., ref l
) | Sexp
::Int(.., ref l
) |
34 Sexp
::Float(.., ref l
) | Sexp
::List(.., ref l
) => l
,
38 pub fn get_loc_mut(&mut self) -> &mut Loc
{
40 Sexp
::Sym(.., ref mut l
) | Sexp
::Str(.., ref mut l
) |
41 Sexp
::Char(.., ref mut l
) | Sexp
::Int(.., ref mut l
) |
42 Sexp
::Float(.., ref mut l
) | Sexp
::List(.., ref mut l
) => l
,
48 // General Parsing Types ///////////////////////////////////////////////////////
53 fn offset(&self, begin
: Self::Begin
) -> Self;
54 fn begin(&self) -> Self::Begin
;
55 fn union(&self, other
: &Self) -> Self;
58 #[derive(Debug, PartialEq, Eq, Clone)]
59 pub enum ParseResult
<'a
, T
, E
> {
67 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
69 /// Indicates how parsing failed.
70 #[derive(Debug, PartialEq, Eq, Clone)]
71 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
73 List(Box
<ParseError
>, Loc
),
74 Sexp(Box
<ParseError
>, Loc
),
75 Char(Box
<ParseError
>, Loc
),
76 String(Box
<ParseError
>, Loc
),
77 Symbol(Box
<ParseError
>, Loc
),
78 Number(Box
<ParseError
>, Loc
),
79 Unexpected(char, Loc
::Begin
),
83 type ByteSpan
= (usize, usize);
85 impl Span
for ByteSpan
{
88 fn offset(&self, begin
: Self::Begin
) -> Self {
89 (self.0 + begin
, self.1 + begin
)
92 fn begin(&self) -> Self::Begin
{
96 fn union(&self, other
: &Self) -> Self {
97 use std
::cmp
::{min
, max
};
98 (min(self.0, other
.0), max(self.1, other
.1))
104 // Parsing Utilities ///////////////////////////////////////////////////////////
107 fn is_delimiter(&self) -> bool
;
110 impl IsDelimeter
for char {
111 fn is_delimiter(&self) -> bool
{
112 self.is
_wh
itespace
() || *self == '
;'
113 || *self == '
('
|| *self == '
)'
114 || *self == '
['
|| *self == '
]'
115 || *self == '
{'
|| *self == '
}'
116 || *self == '
"' || *self == '\''
117 || *self == '`' || *self == ','
121 macro_rules! consume_whitespace {
122 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
123 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
124 (&$input[pos..], $start_loc + pos)
126 return Error($ErrorFn(
127 Box::new(ParseError::UnexpectedEof),
128 ($input.len(), $input.len()).offset($start_loc)));
134 // Top Level Parsers ///////////////////////////////////////////////////////////
136 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
137 match parse_sexp(input, 0) {
138 Done(rest, result) => Ok((result, rest)),
139 Error(err) => Err(err),
143 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
144 let mut start_loc = 0;
145 let mut results = Vec::new();
147 match parse_sexp(input, start_loc) {
148 Done(rest, result) => {
150 start_loc = result.get_loc().1;
151 results.push(result);
152 if rest.trim() == "" {
153 return (results, None);
157 return (results, Some(err));
164 // Core Parsers ////////////////////////////////////////////////////////////////
166 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
167 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
169 match input.chars().next() {
170 Some('0'...'9') => parse_number(input, start_loc),
171 Some('(') => parse_list(input, start_loc),
172 Some('#') => parse_character(input, start_loc),
173 Some('"'
) => parse_string(input
, start_loc
),
174 Some(_
) => parse_symbol(input
, start_loc
),
175 None
=> unreachable
!(),
179 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
180 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
182 match input
.chars().nth(0) {
185 return Error(ParseError
::List(
186 Box
::new(ParseError
::Unexpected(c
, 0)),
187 (0, 0).offset(start_loc
))),
188 None
=> unreachable
!(),
191 let mut input
= &input
[1..];
192 let mut loc
= start_loc
+ 1;
193 let mut members
= Vec
::new();
194 println
!("!{}", loc
);
197 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
203 match input
.chars().nth(0) {
205 return Done(&input
[1..],
206 Sexp
::List(members
, (start_loc
, loc
+1))),
208 None
=> unreachable
!(),
211 match parse_sexp(input
, loc
) {
212 Done(new_input
, member
) => {
213 loc
= member
.get_loc().1;
214 members
.push(member
);
218 return Error(ParseError
::List(
225 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
226 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
228 match input
.chars().next() {
229 Some(c
) if !c
.is
_d
ig
it
(10) => {
230 return Error(ParseError
::Number(
231 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
232 (0, c
.len_utf8()).offset(start_loc
)));
234 None
=> return Error(ParseError
::Number(
235 Box
::new(ParseError
::UnexpectedEof
),
236 (0, 0).offset(start_loc
))),
243 // Before the decimal point
244 for (i
, c
) in input
.char_indices() {
250 if c
.is
_del
im
iter
() {
251 return Done(&input
[i
..],
252 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
253 (0, i
).offset(start_loc
)));
256 if !c
.is
_d
ig
it
(base
) {
257 return Error(ParseError
::Number(
258 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
259 (i
, i
).offset(start_loc
)));
262 end
= i
+ c
.len_utf8();
265 if input
[end
..].is
_empty
() {
266 return Done(&input
[end
..],
267 Sexp
::Int(input
.parse().expect("Already matched digits"),
268 (0, end
).offset(start_loc
)));
271 // After the decimal point
272 for (i
, c
) in input
[end
..].char_indices() {
273 if c
.is
_del
im
iter
() {
274 return Done(&input
[i
+end
..],
275 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
276 (0, end
+i
).offset(start_loc
)));
279 if !c
.is
_d
ig
it
(base
) {
280 return Error(ParseError
::Number(
281 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
282 (i
+end
, i
+end
).offset(start_loc
)));
286 Done(&input
[input
.len()..],
287 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
288 (0, input
.len()).offset(start_loc
)))
291 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
292 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
294 match input
.chars().next() {
295 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
296 return Error(ParseError
::Symbol(
297 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
298 (0, 0).offset(start_loc
))),
299 Some(c
) if c
.is
_del
im
iter
() =>
300 return Error(ParseError
::Symbol(
301 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
302 (0, 0).offset(start_loc
))),
304 None
=> unreachable
!(),
307 for (i
, c
) in input
.char_indices() {
308 if c
.is
_del
im
iter
() {
309 return Done(&input
[i
..],
310 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
314 Done(&input
[input
.len()..],
315 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
318 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
319 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
321 match input
.chars().next() {
324 return Error(ParseError::String(
325 Box::new(ParseError::Unexpected(c, start_loc)),
326 (0, 0).offset(start_loc))),
327 None => unreachable!(),
330 for (i, c) in input[1..].char_indices() {
332 return Done(&input
[2+i
..],
333 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
337 Error(ParseError
::String(
338 Box
::new(ParseError
::UnexpectedEof
),
339 (0, input
.len()).offset(start_loc
)))
342 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
343 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
345 match input
.chars().nth(0) {
348 return Error(ParseError
::Char(
349 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
350 (0, 0).offset(start_loc
))),
352 return Error(ParseError
::Char(
353 Box
::new(ParseError
::UnexpectedEof
),
354 (0, 0).offset(start_loc
))),
357 match input
.chars().nth(1) {
360 return Error(ParseError
::Char(
361 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
362 (1, 1).offset(start_loc
))),
364 return Error(ParseError
::Char(
365 Box
::new(ParseError
::UnexpectedEof
),
366 (1, 1).offset(start_loc
)))
369 match input
.chars().nth(2) {
371 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
373 Error(ParseError
::Char(
374 Box
::new(ParseError
::UnexpectedEof
),
375 (2, 2).offset(start_loc
)))
380 // Tests ///////////////////////////////////////////////////////////////////////
385 use super::ParseResult
::*;
389 assert_eq
!(parse("1 2 3"), (vec
![
390 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3)), Sexp
::Int(3, (4, 5))
392 assert_eq
!(parse("1 2 )"), (vec
![
393 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))
394 ], Some(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
)'
, 4)), (4, 4)))));
398 fn test_parse_one() {
399 assert_eq
!(parse_one("1 2"),
400 Ok((Sexp
::Int(1, (0, 1)), " 2")));
404 fn test_parse_sexp() {
405 assert_eq
!(parse_sexp(" 1", 0),
406 Done("", Sexp
::Int(1, (1, 2))));
407 assert_eq
!(parse_sexp("2.2", 0),
408 Done("", Sexp
::Float(2.2, (0, 3))));
409 assert_eq
!(parse_sexp(" a", 0),
410 Done("", Sexp
::Sym("a".into
(), (1, 2))));
411 assert_eq
!(parse_sexp("#\\c", 0),
412 Done("", Sexp
::Char('c'
, (0, 3))));
413 assert_eq
!(parse_sexp(r
#""hi""#, 0),
414 Done("", Sexp
::Str("hi".into
(), (0, 4))));
415 assert_eq
!(parse_sexp("()", 0),
416 Done("", Sexp
::List(vec
![], (0, 2))));
417 assert_eq
!(parse_sexp("( 1 2 3 )", 0),
418 Done("", Sexp
::List(vec
![
419 Sexp
::Int(1, (2, 3)),
420 Sexp
::Int(2, (4, 5)),
421 Sexp
::Int(3, (6, 7)),
424 assert_eq
!(parse_sexp("", 0),
425 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
429 fn test_parse_list() {
430 assert_eq
!(parse_list("()", 0),
431 Done("", Sexp
::List(vec
![], (0, 2))));
432 assert_eq
!(parse_list("(1)", 0),
433 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3))));
434 assert_eq
!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp
::List(vec
![
435 Sexp
::Int(1, (4, 5)),
436 Sexp
::Int(2, (9, 10)),
437 Sexp
::Int(3, (12, 13)),
438 Sexp
::Sym("a".into
(), (14, 15)),
443 fn test_parse_number() {
444 assert_eq
!(parse_number("1", 0),
445 Done("", Sexp
::Int(1, (0, 1))));
446 assert_eq
!(parse_number(" 13", 0),
447 Done("", Sexp
::Int(13, (1, 3))));
448 assert_eq
!(parse_number("1.2", 0),
449 Done("", Sexp
::Float(1.2, (0, 3))));
450 assert_eq
!(parse_number("\u{3000}4.2", 0),
451 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
452 assert_eq
!(parse_number(" 42 ", 0),
453 Done(" ", Sexp
::Int(42, (2, 4))));
454 assert_eq
!(parse_number(" 4.2 ", 0),
455 Done(" ", Sexp
::Float(4.2, (1, 4))));
456 assert_eq
!(parse_number("1()", 0),
457 Done("()", Sexp
::Int(1, (0, 1))));
458 assert_eq
!(parse_number("3.6()", 0),
459 Done("()", Sexp
::Float(3.6, (0, 3))));
461 assert_eq
!(parse_number("", 0),
462 Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
463 assert_eq
!(parse_number("123a", 0),
464 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
465 assert_eq
!(parse_number("66.6+", 0),
466 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
470 fn test_parse_ident() {
471 assert_eq
!(parse_symbol("+", 0),
472 Done("", Sexp
::Sym("+".into
(), (0, 1))));
473 assert_eq
!(parse_symbol(" nil?", 0),
474 Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
475 assert_eq
!(parse_symbol(" ->socket", 0),
476 Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
477 assert_eq
!(parse_symbol("fib(", 0),
478 Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
479 assert_eq
!(parse_symbol("foo2", 0),
480 Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
482 // We reserve #foo for the implementation to do as it wishes
483 assert_eq
!(parse_symbol("#hi", 0),
484 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
485 // We reserve :foo for keywords
486 assert_eq
!(parse_symbol(":hi", 0),
487 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
489 assert_eq
!(parse_symbol("", 0),
490 Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
491 assert_eq
!(parse_symbol("0", 0),
492 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
493 assert_eq
!(parse_symbol("()", 0),
494 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
498 fn test_parse_string() {
499 assert_eq
!(parse_string(r
#""""#, 0),
500 Done("", Sexp
::Str("".into
(), (0, 2))));
501 assert_eq
!(parse_string(r
#""hello""#, 0),
502 Done("", Sexp
::Str("hello".into
(), (0, 7))));
503 assert_eq
!(parse_string(r
#" "this is a nice string
504 with
0123 things
in it
""#, 0),
505 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
507 assert_eq
!(parse_string("", 0),
508 Error(ParseError
::String(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
509 assert_eq
!(parse_string(r
#""hi"#, 0),
510 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
514 fn test_parse_char() {
515 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
516 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
517 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
519 assert_eq!(parse_character("", 0),
520 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
521 assert_eq!(parse_character("#", 0),
522 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
523 assert_eq!(parse_character("#\\", 0),
524 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
525 assert_eq!(parse_character("a", 0),
526 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));