]>
Witch of Git - ess/blob - src/lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
3 // #![warn(missing_docs)]
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp
<'a
, Loc
=ByteSpan
> where Loc
: Span
{
13 /// A value representing a symbol.
14 Sym(Cow
<'a
, str>, Loc
),
15 /// A value representing a string literal.
16 Str(Cow
<'a
, str>, Loc
),
17 /// A value representing a single character.
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
25 /// A list of subexpressions.
26 List(Vec
<Sexp
<'a
, Loc
>>, Loc
),
29 impl<'a
, Loc
> Sexp
<'a
, Loc
> where Loc
: Span
{
30 pub fn get_loc(&self) -> &Loc
{
32 Sexp
::Sym(.., ref l
) | Sexp
::Str(.., ref l
) |
33 Sexp
::Char(.., ref l
) | Sexp
::Int(.., ref l
) |
34 Sexp
::Float(.., ref l
) | Sexp
::List(.., ref l
) => l
,
38 pub fn get_loc_mut(&mut self) -> &mut Loc
{
40 Sexp
::Sym(.., ref mut l
) | Sexp
::Str(.., ref mut l
) |
41 Sexp
::Char(.., ref mut l
) | Sexp
::Int(.., ref mut l
) |
42 Sexp
::Float(.., ref mut l
) | Sexp
::List(.., ref mut l
) => l
,
47 fn extend_cow
<'a
, T
: ?Sized
>(cow
: &Cow
<'a
, T
>) -> Cow
<'
static, T
>
50 Cow
::Owned(cow
.clone().into
_owned
())
53 impl<'a
, Loc
> Sexp
<'a
, Loc
> where Loc
: Span
+ Clone
{
54 pub fn to_owned(&self) -> Sexp
<'
static, Loc
> {
56 Sexp
::Sym(ref s
, ref l
) => Sexp
::Sym(extend_cow(s
), l
.clone()),
57 Sexp
::Str(ref s
, ref l
) => Sexp
::Str(extend_cow(s
), l
.clone()),
58 Sexp
::Char(c
, ref l
) => Sexp
::Char(c
, l
.clone()),
59 Sexp
::Int(i
, ref l
) => Sexp
::Int(i
, l
.clone()),
60 Sexp
::Float(f
, ref l
) => Sexp
::Float(f
, l
.clone()),
61 Sexp
::List(ref xs
, ref l
) =>
62 Sexp
::List(xs
.iter
().map(Sexp
::to_owned
).collect(),
69 // General Parsing Types ///////////////////////////////////////////////////////
74 fn offset(&self, begin
: Self::Begin
) -> Self;
75 fn begin(&self) -> Self::Begin
;
76 fn union(&self, other
: &Self) -> Self;
79 #[derive(Debug, PartialEq, Eq, Clone)]
80 pub enum ParseResult
<'a
, T
, E
> {
88 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
90 /// Indicates how parsing failed.
91 #[derive(Debug, PartialEq, Eq, Clone)]
92 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
94 List(Box
<ParseError
>, Loc
),
95 Sexp(Box
<ParseError
>, Loc
),
96 Char(Box
<ParseError
>, Loc
),
97 String(Box
<ParseError
>, Loc
),
98 Symbol(Box
<ParseError
>, Loc
),
99 Number(Box
<ParseError
>, Loc
),
100 Unexpected(char, Loc
::Begin
),
104 type ByteSpan
= (usize, usize);
106 impl Span
for ByteSpan
{
109 fn offset(&self, begin
: Self::Begin
) -> Self {
110 (self.0 + begin
, self.1 + begin
)
113 fn begin(&self) -> Self::Begin
{
117 fn union(&self, other
: &Self) -> Self {
118 use std
::cmp
::{min
, max
};
119 (min(self.0, other
.0), max(self.1, other
.1))
125 // Parsing Utilities ///////////////////////////////////////////////////////////
128 fn is_delimiter(&self) -> bool
;
131 impl IsDelimeter
for char {
132 fn is_delimiter(&self) -> bool
{
133 self.is
_wh
itespace
() || *self == '
;'
134 || *self == '
('
|| *self == '
)'
135 || *self == '
['
|| *self == '
]'
136 || *self == '
{'
|| *self == '
}'
137 || *self == '
"' || *self == '\''
138 || *self == '`' || *self == ','
142 macro_rules! consume_whitespace {
143 ($input:expr, $start_loc:expr, $ErrorFn:expr) => {
144 if let Some(pos) = $input.find(|c: char| !c.is_whitespace()) {
145 (&$input[pos..], $start_loc + pos)
147 return Error($ErrorFn(
148 Box::new(ParseError::UnexpectedEof),
149 ($input.len(), $input.len()).offset($start_loc)));
155 // Top Level Parsers ///////////////////////////////////////////////////////////
157 pub fn parse_one(input: &str) -> Result<(Sexp, &str), ParseError> {
158 match parse_sexp(input, 0) {
159 Done(rest, result) => Ok((result, rest)),
160 Error(err) => Err(err),
164 pub fn parse(mut input: &str) -> (Vec<Sexp>, Option<ParseError>) {
165 let mut start_loc = 0;
166 let mut results = Vec::new();
168 match parse_sexp(input, start_loc) {
169 Done(rest, result) => {
171 start_loc = result.get_loc().1;
172 results.push(result);
173 if rest.trim() == "" {
174 return (results, None);
178 return (results, Some(err));
185 // Core Parsers ////////////////////////////////////////////////////////////////
187 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
188 let (input, start_loc) = consume_whitespace!(input, start_loc, ParseError::Sexp);
190 match input.chars().next() {
191 Some('0'...'9') => parse_number(input, start_loc),
192 Some('(') => parse_list(input, start_loc),
193 Some('#') => parse_character(input, start_loc),
194 Some('"'
) => parse_string(input
, start_loc
),
195 Some(_
) => parse_symbol(input
, start_loc
),
196 None
=> unreachable
!(),
200 pub fn parse_list(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
201 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::List
);
203 match input
.chars().nth(0) {
206 return Error(ParseError
::List(
207 Box
::new(ParseError
::Unexpected(c
, 0)),
208 (0, 0).offset(start_loc
))),
209 None
=> unreachable
!(),
212 let mut input
= &input
[1..];
213 let mut loc
= start_loc
+ 1;
214 let mut members
= Vec
::new();
217 let (new_input
, new_loc
) = consume_whitespace
!(input
, loc
, ParseError
::List
);
222 match input
.chars().nth(0) {
224 return Done(&input
[1..],
225 Sexp
::List(members
, (start_loc
, loc
+1))),
227 None
=> unreachable
!(),
230 match parse_sexp(input
, loc
) {
231 Done(new_input
, member
) => {
232 loc
= member
.get_loc().1;
233 members
.push(member
);
237 return Error(ParseError
::List(
244 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
245 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Number
);
247 match input
.chars().next() {
248 Some(c
) if !c
.is
_d
ig
it
(10) => {
249 return Error(ParseError
::Number(
250 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
251 (0, c
.len_utf8()).offset(start_loc
)));
253 None
=> return Error(ParseError
::Number(
254 Box
::new(ParseError
::UnexpectedEof
),
255 (0, 0).offset(start_loc
))),
262 // Before the decimal point
263 for (i
, c
) in input
.char_indices() {
269 if c
.is
_del
im
iter
() {
270 return Done(&input
[i
..],
271 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
272 (0, i
).offset(start_loc
)));
275 if !c
.is
_d
ig
it
(base
) {
276 return Error(ParseError
::Number(
277 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
278 (i
, i
).offset(start_loc
)));
281 end
= i
+ c
.len_utf8();
284 if input
[end
..].is
_empty
() {
285 return Done(&input
[end
..],
286 Sexp
::Int(input
.parse().expect("Already matched digits"),
287 (0, end
).offset(start_loc
)));
290 // After the decimal point
291 for (i
, c
) in input
[end
..].char_indices() {
292 if c
.is
_del
im
iter
() {
293 return Done(&input
[i
+end
..],
294 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
295 (0, end
+i
).offset(start_loc
)));
298 if !c
.is
_d
ig
it
(base
) {
299 return Error(ParseError
::Number(
300 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
301 (i
+end
, i
+end
).offset(start_loc
)));
305 Done(&input
[input
.len()..],
306 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
307 (0, input
.len()).offset(start_loc
)))
310 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
311 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Symbol
);
313 match input
.chars().next() {
314 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
315 return Error(ParseError
::Symbol(
316 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
317 (0, 0).offset(start_loc
))),
318 Some(c
) if c
.is
_del
im
iter
() =>
319 return Error(ParseError
::Symbol(
320 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
321 (0, 0).offset(start_loc
))),
323 None
=> unreachable
!(),
326 for (i
, c
) in input
.char_indices() {
327 if c
.is
_del
im
iter
() {
328 return Done(&input
[i
..],
329 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
333 Done(&input
[input
.len()..],
334 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
337 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
338 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::String
);
340 match input
.chars().next() {
343 return Error(ParseError::String(
344 Box::new(ParseError::Unexpected(c, start_loc)),
345 (0, 0).offset(start_loc))),
346 None => unreachable!(),
349 for (i, c) in input[1..].char_indices() {
351 return Done(&input
[2+i
..],
352 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
356 Error(ParseError
::String(
357 Box
::new(ParseError
::UnexpectedEof
),
358 (0, input
.len()).offset(start_loc
)))
361 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
362 let (input
, start_loc
) = consume_whitespace
!(input
, start_loc
, ParseError
::Char
);
364 match input
.chars().nth(0) {
367 return Error(ParseError
::Char(
368 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
369 (0, 0).offset(start_loc
))),
371 return Error(ParseError
::Char(
372 Box
::new(ParseError
::UnexpectedEof
),
373 (0, 0).offset(start_loc
))),
376 match input
.chars().nth(1) {
379 return Error(ParseError
::Char(
380 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
381 (1, 1).offset(start_loc
))),
383 return Error(ParseError
::Char(
384 Box
::new(ParseError
::UnexpectedEof
),
385 (1, 1).offset(start_loc
)))
388 match input
.chars().nth(2) {
390 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
392 Error(ParseError
::Char(
393 Box
::new(ParseError
::UnexpectedEof
),
394 (2, 2).offset(start_loc
)))
399 // Tests ///////////////////////////////////////////////////////////////////////
404 use super::ParseResult
::*;
408 assert_eq
!(parse("1 2 3"), (vec
![
409 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3)), Sexp
::Int(3, (4, 5))
411 assert_eq
!(parse("1 2 )"), (vec
![
412 Sexp
::Int(1, (0, 1)), Sexp
::Int(2, (2, 3))
413 ], Some(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
)'
, 4)), (4, 4)))));
417 fn test_parse_one() {
418 assert_eq
!(parse_one("1 2"),
419 Ok((Sexp
::Int(1, (0, 1)), " 2")));
423 fn test_parse_sexp() {
424 assert_eq
!(parse_sexp(" 1", 0),
425 Done("", Sexp
::Int(1, (1, 2))));
426 assert_eq
!(parse_sexp("2.2", 0),
427 Done("", Sexp
::Float(2.2, (0, 3))));
428 assert_eq
!(parse_sexp(" a", 0),
429 Done("", Sexp
::Sym("a".into
(), (1, 2))));
430 assert_eq
!(parse_sexp("#\\c", 0),
431 Done("", Sexp
::Char('c'
, (0, 3))));
432 assert_eq
!(parse_sexp(r
#""hi""#, 0),
433 Done("", Sexp
::Str("hi".into
(), (0, 4))));
434 assert_eq
!(parse_sexp("()", 0),
435 Done("", Sexp
::List(vec
![], (0, 2))));
436 assert_eq
!(parse_sexp("( 1 2 3 )", 0),
437 Done("", Sexp
::List(vec
![
438 Sexp
::Int(1, (2, 3)),
439 Sexp
::Int(2, (4, 5)),
440 Sexp
::Int(3, (6, 7)),
443 assert_eq
!(parse_sexp("", 0),
444 Error(ParseError
::Sexp(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
448 fn test_parse_list() {
449 assert_eq
!(parse_list("()", 0),
450 Done("", Sexp
::List(vec
![], (0, 2))));
451 assert_eq
!(parse_list("(1)", 0),
452 Done("", Sexp
::List(vec
![Sexp
::Int(1, (1, 2))], (0, 3))));
453 assert_eq
!(parse_list(" ( 1 2 3 a )", 0), Done("", Sexp
::List(vec
![
454 Sexp
::Int(1, (4, 5)),
455 Sexp
::Int(2, (9, 10)),
456 Sexp
::Int(3, (12, 13)),
457 Sexp
::Sym("a".into
(), (14, 15)),
462 fn test_parse_number() {
463 assert_eq
!(parse_number("1", 0),
464 Done("", Sexp
::Int(1, (0, 1))));
465 assert_eq
!(parse_number(" 13", 0),
466 Done("", Sexp
::Int(13, (1, 3))));
467 assert_eq
!(parse_number("1.2", 0),
468 Done("", Sexp
::Float(1.2, (0, 3))));
469 assert_eq
!(parse_number("\u{3000}4.2", 0),
470 Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
471 assert_eq
!(parse_number(" 42 ", 0),
472 Done(" ", Sexp
::Int(42, (2, 4))));
473 assert_eq
!(parse_number(" 4.2 ", 0),
474 Done(" ", Sexp
::Float(4.2, (1, 4))));
475 assert_eq
!(parse_number("1()", 0),
476 Done("()", Sexp
::Int(1, (0, 1))));
477 assert_eq
!(parse_number("3.6()", 0),
478 Done("()", Sexp
::Float(3.6, (0, 3))));
480 assert_eq
!(parse_number("", 0),
481 Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
482 assert_eq
!(parse_number("123a", 0),
483 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
484 assert_eq
!(parse_number("66.6+", 0),
485 Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
489 fn test_parse_ident() {
490 assert_eq
!(parse_symbol("+", 0),
491 Done("", Sexp
::Sym("+".into
(), (0, 1))));
492 assert_eq
!(parse_symbol(" nil?", 0),
493 Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
494 assert_eq
!(parse_symbol(" ->socket", 0),
495 Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
496 assert_eq
!(parse_symbol("fib(", 0),
497 Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
498 assert_eq
!(parse_symbol("foo2", 0),
499 Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
501 // We reserve #foo for the implementation to do as it wishes
502 assert_eq
!(parse_symbol("#hi", 0),
503 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
504 // We reserve :foo for keywords
505 assert_eq
!(parse_symbol(":hi", 0),
506 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
508 assert_eq
!(parse_symbol("", 0),
509 Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
510 assert_eq
!(parse_symbol("0", 0),
511 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
512 assert_eq
!(parse_symbol("()", 0),
513 Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
517 fn test_parse_string() {
518 assert_eq
!(parse_string(r
#""""#, 0),
519 Done("", Sexp
::Str("".into
(), (0, 2))));
520 assert_eq
!(parse_string(r
#""hello""#, 0),
521 Done("", Sexp
::Str("hello".into
(), (0, 7))));
522 assert_eq
!(parse_string(r
#" "this is a nice string
523 with
0123 things
in it
""#, 0),
524 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
526 assert_eq
!(parse_string("", 0),
527 Error(ParseError
::String(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
528 assert_eq
!(parse_string(r
#""hi"#, 0),
529 Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
533 fn test_parse_char() {
534 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
535 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
536 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
538 assert_eq!(parse_character("", 0),
539 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (0, 0))));
540 assert_eq!(parse_character("#", 0),
541 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
542 assert_eq!(parse_character("#\\", 0),
543 Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (2, 2))));
544 assert_eq!(parse_character("a", 0),
545 Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));