]>
Witch of Git - ess/blob - src/lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
3 // #![warn(missing_docs)]
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp
<'a
, Loc
=ByteSpan
> where Loc
: Span
{
13 /// A value representing a symbol.
14 Sym(Cow
<'a
, str>, Loc
),
15 /// A value representing a string literal.
16 Str(Cow
<'a
, str>, Loc
),
17 /// A value representing a single character.
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
25 /// A list of subexpressions.
26 List(Vec
<Sexp
<'a
, Loc
>>, Loc
),
29 impl<'a
, Loc
> Sexp
<'a
, Loc
> where Loc
: Span
{
30 pub fn get_loc(&self) -> &Loc
{
32 Sexp
::Sym(.., ref l
) => l
,
33 Sexp
::Str(.., ref l
) => l
,
34 Sexp
::Char(.., ref l
) => l
,
35 Sexp
::Int(.., ref l
) => l
,
36 Sexp
::Float(.., ref l
) => l
,
37 Sexp
::List(.., ref l
) => l
,
41 pub fn get_loc_mut(&mut self) -> &mut Loc
{
43 Sexp
::Sym(.., ref mut l
) => l
,
44 Sexp
::Str(.., ref mut l
) => l
,
45 Sexp
::Char(.., ref mut l
) => l
,
46 Sexp
::Int(.., ref mut l
) => l
,
47 Sexp
::Float(.., ref mut l
) => l
,
48 Sexp
::List(.., ref mut l
) => l
,
54 // General Parsing Types ///////////////////////////////////////////////////////
59 fn offset(&self, begin
: Self::Begin
) -> Self;
60 fn begin(&self) -> Self::Begin
;
61 fn union(&self, other
: &Self) -> Self;
64 #[derive(Debug, PartialEq, Eq, Clone)]
65 pub enum ParseResult
<'a
, T
, E
> {
73 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
75 /// Indicates how parsing failed.
76 #[derive(Debug, PartialEq, Eq, Clone)]
77 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
78 /// We can't explain how the parsing failed.
80 Char(Box
<ParseError
>, Loc
),
81 String(Box
<ParseError
>, Loc
),
82 Symbol(Box
<ParseError
>, Loc
),
83 Number(Box
<ParseError
>, Loc
),
84 Unexpected(char, Loc
::Begin
),
88 type ByteSpan
= (usize, usize);
90 impl Span
for ByteSpan
{
93 fn offset(&self, begin
: Self::Begin
) -> Self {
94 (self.0 + begin
, self.1 + begin
)
97 fn begin(&self) -> Self::Begin
{
101 fn union(&self, other
: &Self) -> Self {
102 use std
::cmp
::{min
, max
};
103 (min(self.0, other
.0), max(self.1, other
.1))
109 // Parsing Utilities ///////////////////////////////////////////////////////////
112 fn is_delimiter(&self) -> bool
;
115 impl IsDelimeter
for char {
116 fn is_delimiter(&self) -> bool
{
117 self.is
_wh
itespace
() || *self == '
;'
118 || *self == '
('
|| *self == '
)'
119 || *self == '
['
|| *self == '
]'
120 || *self == '
{'
|| *self == '
}'
121 || *self == '
"' || *self == '\''
122 || *self == '`' || *self == ','
127 // Parsers /////////////////////////////////////////////////////////////////////
129 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
130 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
133 return Error(ParseError::Number(
134 Box::new(ParseError::UnexpectedEof),
135 (input.len(), input.len()).offset(start_loc)));
138 let input = &input[end_of_white..];
139 let start_loc = start_loc + end_of_white;
141 match input.chars().next() {
142 Some('0'...'9') => parse_number(input, start_loc),
143 Some('(') => unimplemented!(),
144 Some('#') => parse_character(input, start_loc),
145 Some('"'
) => parse_string(input
, start_loc
),
146 Some(_
) => parse_symbol(input
, start_loc
),
147 None
=> unreachable
!(),
151 pub fn parse_number(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
152 // Consume all the whitespace at the beginning of the string
153 let end_of_white
= if let Some(pos
) = input
.find
(|c
: char| !c
.is
_wh
itespace
()) {
156 return Error(ParseError
::Number(
157 Box
::new(ParseError
::UnexpectedEof
),
158 (input
.len(), input
.len()).offset(start_loc
)));
161 let input
= &input
[end_of_white
..];
162 let start_loc
= start_loc
+ end_of_white
;
164 match input
.chars().next() {
165 Some(c
) if !c
.is
_d
ig
it
(10) => {
166 return Error(ParseError
::Number(
167 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
168 (0, c
.len_utf8()).offset(start_loc
)));
170 None
=> return Error(ParseError
::Number(
171 Box
::new(ParseError
::UnexpectedEof
),
172 (0, 0).offset(start_loc
))),
179 // Before the decimal point
180 for (i
, c
) in input
.char_indices() {
186 if c
.is
_del
im
iter
() {
187 return Done(&input
[i
..],
188 Sexp
::Int(input
[..i].parse().expect("Already matched digits"),
189 (0, i
).offset(start_loc
)));
192 if !c
.is
_d
ig
it
(base
) {
193 return Error(ParseError
::Number(
194 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
)),
195 (i
, i
).offset(start_loc
)));
198 end
= i
+ c
.len_utf8();
201 if input
[end
..].is
_empty
() {
202 return Done(&input
[end
..],
203 Sexp
::Int(input
.parse().expect("Already matched digits"),
204 (0, end
).offset(start_loc
)));
207 // After the decimal point
208 for (i
, c
) in input
[end
..].char_indices() {
209 if c
.is
_del
im
iter
() {
210 return Done(&input
[i
+end
..],
211 Sexp
::Float(input
[..end
+i
].parse().expect("Already matched digits.digits"),
212 (0, end
+i
).offset(start_loc
)));
215 if !c
.is
_d
ig
it
(base
) {
216 return Error(ParseError
::Number(
217 Box
::new(ParseError
::Unexpected(c
, start_loc
+ i
+ end
)),
218 (i
+end
, i
+end
).offset(start_loc
)));
222 Done(&input
[input
.len()..],
223 Sexp
::Float(input
.parse().expect("Already matched digits.digits"),
224 (0, input
.len()).offset(start_loc
)))
227 pub fn parse_symbol(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
228 let end_of_white
= if let Some(pos
) = input
.find
(|c
: char| !c
.is
_wh
itespace
()) {
231 return Error(ParseError
::Symbol(
232 Box
::new(ParseError
::UnexpectedEof
),
233 (input
.len(), input
.len()).offset(start_loc
)));
236 let input
= &input
[end_of_white
..];
237 let start_loc
= start_loc
+ end_of_white
;
239 match input
.chars().next() {
240 Some(c@'
#') | Some(c@':') | Some(c@'0'...'9') =>
241 return Error(ParseError
::Symbol(
242 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
243 (0, 0).offset(start_loc
))),
244 Some(c
) if c
.is
_del
im
iter
() =>
245 return Error(ParseError
::Symbol(
246 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
247 (0, 0).offset(start_loc
))),
249 None
=> unreachable
!(),
252 for (i
, c
) in input
.char_indices() {
253 if c
.is
_del
im
iter
() {
254 return Done(&input
[i
..],
255 Sexp
::Sym(input
[..i].into
(), (0, i
).offset(start_loc
)));
259 Done(&input
[input
.len()..],
260 Sexp
::Sym(input
.into
(), (0, input
.len()).offset(start_loc
)))
263 pub fn parse_string(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
264 let end_of_white
= if let Some(pos
) = input
.find
(|c
: char| !c
.is
_wh
itespace
()) {
267 return Error(ParseError
::String(
268 Box
::new(ParseError
::UnexpectedEof
),
269 (input
.len(), input
.len()).offset(start_loc
)));
272 let input
= &input
[end_of_white
..];
273 let start_loc
= start_loc
+ end_of_white
;
275 match input
.chars().next() {
278 return Error(ParseError::String(
279 Box::new(ParseError::Unexpected(c, start_loc)),
280 (0, 0).offset(start_loc))),
281 None => unreachable!(),
284 for (i, c) in input[1..].char_indices() {
286 return Done(&input
[2+i
..],
287 Sexp
::Str(input
[1..i+1].into
(), (0, i
+2).offset(start_loc
)));
291 Error(ParseError
::String(
292 Box
::new(ParseError
::UnexpectedEof
),
293 (0, input
.len()).offset(start_loc
)))
296 pub fn parse_character(input
: &str, start_loc
: usize) -> ParseResult
<Sexp
, ParseError
> {
297 let end_of_white
= if let Some(pos
) = input
.find
(|c
: char| !c
.is
_wh
itespace
()) {
300 return Error(ParseError
::String(
301 Box
::new(ParseError
::UnexpectedEof
),
302 (input
.len(), input
.len()).offset(start_loc
)));
305 let input
= &input
[end_of_white
..];
306 let start_loc
= start_loc
+ end_of_white
;
308 match input
.chars().nth(0) {
311 return Error(ParseError
::Char(
312 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
313 (0, 0).offset(start_loc
))),
315 return Error(ParseError
::Char(
316 Box
::new(ParseError
::UnexpectedEof
),
317 (0, 0).offset(start_loc
))),
320 match input
.chars().nth(1) {
323 return Error(ParseError
::Char(
324 Box
::new(ParseError
::Unexpected(c
, start_loc
+ 1)),
325 (1, 1).offset(start_loc
))),
327 return Error(ParseError
::Char(
328 Box
::new(ParseError
::UnexpectedEof
),
329 (1, 1).offset(start_loc
)))
332 match input
.chars().nth(2) {
334 Done(&input
[3..], Sexp
::Char(c
, (0, 3).offset(start_loc
))),
336 Error(ParseError
::Char(
337 Box
::new(ParseError
::UnexpectedEof
),
338 (2, 2).offset(start_loc
)))
343 // Tests ///////////////////////////////////////////////////////////////////////
348 use super::ParseResult
::*;
351 fn test_parse_sexp() {
352 assert_eq
!(parse_sexp(" 1", 0), Done("", Sexp
::Int(1, (1, 2))));
353 assert_eq
!(parse_sexp("2.2", 0), Done("", Sexp
::Float(2.2, (0, 3))));
354 assert_eq
!(parse_sexp(" a", 0), Done("", Sexp
::Sym("a".into
(), (1, 2))));
355 assert_eq
!(parse_sexp("#\\c", 0), Done("", Sexp
::Char('c'
, (0, 3))));
356 assert_eq
!(parse_sexp(r
#""hi""#, 0), Done("", Sexp::Str("hi".into(), (0, 4))));
360 fn test_parse_number() {
361 assert_eq
!(parse_number("1", 0), Done("", Sexp
::Int(1, (0, 1))));
362 assert_eq
!(parse_number(" 13", 0), Done("", Sexp
::Int(13, (1, 3))));
363 assert_eq
!(parse_number("1.2", 0), Done("", Sexp
::Float(1.2, (0, 3))));
364 assert_eq
!(parse_number("\u{3000}4.2", 0), Done("", Sexp
::Float(4.2, (0, 3).offset('
\u{3000}'
.len_utf8()))));
365 assert_eq
!(parse_number(" 42 ", 0), Done(" ", Sexp
::Int(42, (2, 4))));
366 assert_eq
!(parse_number(" 4.2 ", 0), Done(" ", Sexp
::Float(4.2, (1, 4))));
367 assert_eq
!(parse_number("1()", 0), Done("()", Sexp
::Int(1, (0, 1))));
368 assert_eq
!(parse_number("3.6()", 0), Done("()", Sexp
::Float(3.6, (0, 3))));
370 assert_eq
!(parse_number("", 0), Error(ParseError
::Number(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
371 assert_eq
!(parse_number("123a", 0), Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('a'
, 3)), (3, 3))));
372 assert_eq
!(parse_number("66.6+", 0), Error(ParseError
::Number(Box
::new(ParseError
::Unexpected('
+'
, 4)), (4, 4))));
376 fn test_parse_ident() {
377 assert_eq
!(parse_symbol("+", 0), Done("", Sexp
::Sym("+".into
(), (0, 1))));
378 assert_eq
!(parse_symbol(" nil?", 0), Done("", Sexp
::Sym("nil?".into
(), (1, 5))));
379 assert_eq
!(parse_symbol(" ->socket", 0), Done("", Sexp
::Sym("->socket".into
(), (1, 9))));
380 assert_eq
!(parse_symbol("fib(", 0), Done("(", Sexp
::Sym("fib".into
(), (0, 3))));
381 assert_eq
!(parse_symbol("foo2", 0), Done("", Sexp
::Sym("foo2".into
(), (0, 4))));
383 // We reserve #foo for the implementation to do as it wishes
384 assert_eq
!(parse_symbol("#hi", 0), Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
#', 0)), (0, 0))));
385 // We reserve :foo for keywords
386 assert_eq
!(parse_symbol(":hi", 0), Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
:'
, 0)), (0, 0))));
388 assert_eq
!(parse_symbol("", 0), Error(ParseError
::Symbol(Box
::new(ParseError
::UnexpectedEof
), (0, 0))));
389 assert_eq
!(parse_symbol("0", 0), Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
0'
, 0)), (0, 0))));
390 assert_eq
!(parse_symbol("()", 0), Error(ParseError
::Symbol(Box
::new(ParseError
::Unexpected('
('
, 0)), (0, 0))));
394 fn test_parse_string() {
395 assert_eq
!(parse_string(r
#""""#, 0), Done("", Sexp::Str("".into(), (0, 2))));
396 assert_eq
!(parse_string(r
#""hello""#, 0), Done("", Sexp::Str("hello".into(), (0, 7))));
397 assert_eq
!(parse_string(r
#" "this is a nice string
398 with
0123 things
in it
""#, 0),
399 Done("", Sexp
::Str("this is a nice string\nwith 0123 things in it".into
(), (2, 48))));
400 assert_eq
!(parse_string(r
#""hi"#, 0), Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
404 fn test_parse_char() {
405 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
406 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
407 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
409 assert_eq!(parse_character("#", 0), Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
410 assert_eq!(parse_character("a", 0), Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
417 // fn test_parse_list() {
418 // assert_eq!(list("()"), IResult::Done("", vec![]));
419 // assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
420 // assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
424 // Sexp::Sym("a".into()),
430 // fn test_parse_only_one() {
431 // assert!(parse_one("1 2").is_err());
436 // fn test_parse_expression() {
437 // assert_eq!(parse_one(r#"
439 // (print (str "say " #\" "Hello, World" #\" " today!")))
441 // Ok(Sexp::List(vec![
442 // Sexp::Sym("def".into()),
444 // vec![Sexp::Sym("main".into())]
447 // Sexp::Sym("print".into()),
449 // Sexp::Sym("str".into()),
450 // Sexp::Str("say ".into()),
452 // Sexp::Str("Hello, World".into()),
454 // Sexp::Str(" today!".into()),
462 // fn test_parse_multi() {
463 // assert_eq!(parse(" 1 2 3 "),
464 // Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));