]>
Witch of Git - ess/blob - src/lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
3 // #![warn(missing_docs)]
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp
<'a
, Loc
=ByteSpan
> where Loc
: Span
{
13 /// A value representing a symbol.
14 Sym(Cow
<'a
, str>, Loc
),
15 /// A value representing a string literal.
16 Str(Cow
<'a
, str>, Loc
),
17 /// A value representing a single character.
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
25 /// A list of subexpressions.
26 List(Vec
<Sexp
<'a
, Loc
>>, Loc
),
29 impl<'a
, Loc
> Sexp
<'a
, Loc
> where Loc
: Span
{
30 pub fn get_loc(&self) -> &Loc
{
32 Sexp
::Sym(.., ref l
) => l
,
33 Sexp
::Str(.., ref l
) => l
,
34 Sexp
::Char(.., ref l
) => l
,
35 Sexp
::Int(.., ref l
) => l
,
36 Sexp
::Float(.., ref l
) => l
,
37 Sexp
::List(.., ref l
) => l
,
41 pub fn get_loc_mut(&mut self) -> &mut Loc
{
43 Sexp
::Sym(.., ref mut l
) => l
,
44 Sexp
::Str(.., ref mut l
) => l
,
45 Sexp
::Char(.., ref mut l
) => l
,
46 Sexp
::Int(.., ref mut l
) => l
,
47 Sexp
::Float(.., ref mut l
) => l
,
48 Sexp
::List(.., ref mut l
) => l
,
54 // General Parsing Types ///////////////////////////////////////////////////////
59 fn offset(&self, begin
: Self::Begin
) -> Self;
60 fn begin(&self) -> Self::Begin
;
61 fn union(&self, other
: &Self) -> Self;
64 #[derive(Debug, PartialEq, Eq, Clone)]
65 pub enum ParseResult
<'a
, T
, E
> {
73 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
75 /// Indicates how parsing failed.
76 #[derive(Debug, PartialEq, Eq, Clone)]
77 pub enum ParseError
<Loc
=ByteSpan
> where Loc
: Span
{
78 /// We can't explain how the parsing failed.
80 Char(Box
<ParseError
>, Loc
),
81 String(Box
<ParseError
>, Loc
),
82 Symbol(Box
<ParseError
>, Loc
),
83 Number(Box
<ParseError
>, Loc
),
84 Unexpected(char, Loc
::Begin
),
88 type ByteSpan
= (usize, usize);
90 impl Span
for ByteSpan
{
93 fn offset(&self, begin
: Self::Begin
) -> Self {
94 (self.0 + begin
, self.1 + begin
)
97 fn begin(&self) -> Self::Begin
{
101 fn union(&self, other
: &Self) -> Self {
102 use std
::cmp
::{min
, max
};
103 (min(self.0, other
.0), max(self.1, other
.1))
109 // Parsing Utilities ///////////////////////////////////////////////////////////
112 fn is_delimiter(&self) -> bool
;
115 impl IsDelimeter
for char {
116 fn is_delimiter(&self) -> bool
{
117 self.is
_wh
itespace
() || *self == '
;'
118 || *self == '
('
|| *self == '
)'
119 || *self == '
['
|| *self == '
]'
120 || *self == '
{'
|| *self == '
}'
121 || *self == '
"' || *self == '\''
122 || *self == '`' || *self == ','
127 // Parsers /////////////////////////////////////////////////////////////////////
129 // pub fn parse_one(input: &str) -> Result<Sexp, ParseError>;
131 // pub fn parse(input: &str) -> Result<Vec<Sexp>, ParseError>;
133 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
134 // Consume all the whitespace at the beginning of the string
135 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
138 return Error(ParseError::Number(
139 Box::new(ParseError::UnexpectedEof),
140 (input.len(), input.len()).offset(start_loc)));
143 let input = &input[end_of_white..];
144 let start_loc = start_loc + end_of_white;
146 match input.chars().next() {
147 Some(c) if !c.is_digit(10) => {
148 return Error(ParseError::Number(
149 Box::new(ParseError::Unexpected(c, start_loc)),
150 (0, c.len_utf8()).offset(start_loc)));
152 None => return Error(ParseError::Number(
153 Box::new(ParseError::UnexpectedEof),
154 (0, 0).offset(start_loc))),
161 // Before the decimal point
162 for (i, c) in input.char_indices() {
168 if c.is_delimiter() {
169 return Done(&input[i..],
170 Sexp::Int(input[..i].parse().expect("Already matched digits
"),
171 (0, i).offset(start_loc)));
174 if !c.is_digit(base) {
175 return Error(ParseError::Number(
176 Box::new(ParseError::Unexpected(c, start_loc + i)),
177 (i, i).offset(start_loc)));
180 end = i + c.len_utf8();
183 if input[end..].is_empty() {
184 return Done(&input[end..],
185 Sexp::Int(input.parse().expect("Already matched digits
"),
186 (0, end).offset(start_loc)));
189 // After the decimal point
190 for (i, c) in input[end..].char_indices() {
191 if c.is_delimiter() {
192 return Done(&input[i+end..],
193 Sexp::Float(input[..end+i].parse().expect("Already matched digits
.digits
"),
194 (0, end+i).offset(start_loc)));
197 if !c.is_digit(base) {
198 return Error(ParseError::Number(
199 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
200 (i+end, i+end).offset(start_loc)));
204 Done(&input[input.len()..],
205 Sexp::Float(input.parse().expect("Already matched digits
.digits
"),
206 (0, input.len()).offset(start_loc)))
209 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
210 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
213 return Error(ParseError::Symbol(
214 Box::new(ParseError::UnexpectedEof),
215 (input.len(), input.len()).offset(start_loc)));
218 let input = &input[end_of_white..];
219 let start_loc = start_loc + end_of_white;
221 match input.chars().next() {
222 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
223 return Error(ParseError::Symbol(
224 Box::new(ParseError::Unexpected(c, start_loc)),
225 (0, 0).offset(start_loc))),
226 Some(c) if c.is_delimiter() =>
227 return Error(ParseError::Symbol(
228 Box::new(ParseError::Unexpected(c, start_loc)),
229 (0, 0).offset(start_loc))),
231 None => unreachable!(),
234 for (i, c) in input.char_indices() {
235 if c.is_delimiter() {
236 return Done(&input[i..],
237 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
241 Done(&input[input.len()..],
242 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
245 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
246 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
249 return Error(ParseError::String(
250 Box::new(ParseError::UnexpectedEof),
251 (input.len(), input.len()).offset(start_loc)));
254 let input = &input[end_of_white..];
255 let start_loc = start_loc + end_of_white;
257 match input.chars().next() {
260 return Error(ParseError
::String(
261 Box
::new(ParseError
::Unexpected(c
, start_loc
)),
262 (0, 0).offset(start_loc
))),
263 None
=> unreachable
!(),
266 for (i
, c
) in input
[1..].char_indices() {
268 return Done(&input[2+i..],
269 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
273 Error(ParseError::String(
274 Box::new(ParseError::UnexpectedEof),
275 (0, input.len()).offset(start_loc)))
278 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
279 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
282 return Error(ParseError::String(
283 Box::new(ParseError::UnexpectedEof),
284 (input.len(), input.len()).offset(start_loc)));
287 let input = &input[end_of_white..];
288 let start_loc = start_loc + end_of_white;
290 match input.chars().nth(0) {
293 return Error(ParseError::Char(
294 Box::new(ParseError::Unexpected(c, start_loc)),
295 (0, 0).offset(start_loc))),
297 return Error(ParseError::Char(
298 Box::new(ParseError::UnexpectedEof),
299 (0, 0).offset(start_loc))),
302 match input.chars().nth(1) {
305 return Error(ParseError::Char(
306 Box::new(ParseError::Unexpected(c, start_loc + 1)),
307 (1, 1).offset(start_loc))),
309 return Error(ParseError::Char(
310 Box::new(ParseError::UnexpectedEof),
311 (1, 1).offset(start_loc)))
314 match input.chars().nth(2) {
316 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
318 Error(ParseError::Char(
319 Box::new(ParseError::UnexpectedEof),
320 (2, 2).offset(start_loc)))
325 // Tests ///////////////////////////////////////////////////////////////////////
330 use super::ParseResult::*;
333 fn test_parse_number() {
334 assert_eq!(parse_number("1", 0), Done("", Sexp::Int(1, (0, 1))));
335 assert_eq!(parse_number(" 13", 0), Done("", Sexp::Int(13, (1, 3))));
336 assert_eq!(parse_number("1.2", 0), Done("", Sexp::Float(1.2, (0, 3))));
337 assert_eq!(parse_number("\u{3000}4.2", 0), Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
338 assert_eq!(parse_number(" 42 ", 0), Done(" ", Sexp::Int(42, (2, 4))));
339 assert_eq!(parse_number(" 4.2 ", 0), Done(" ", Sexp::Float(4.2, (1, 4))));
340 assert_eq!(parse_number("1()", 0), Done("()", Sexp::Int(1, (0, 1))));
341 assert_eq!(parse_number("3.6()", 0), Done("()", Sexp::Float(3.6, (0, 3))));
343 assert_eq!(parse_number("", 0), Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
344 assert_eq!(parse_number("123a
", 0), Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
345 assert_eq!(parse_number("66.6+", 0), Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
349 fn test_parse_ident() {
350 assert_eq!(parse_symbol("+", 0), Done("", Sexp::Sym("+".into(), (0, 1))));
351 assert_eq!(parse_symbol(" nil?
", 0), Done("", Sexp::Sym("nil?
".into(), (1, 5))));
352 assert_eq!(parse_symbol(" ->socket
", 0), Done("", Sexp::Sym("->socket
".into(), (1, 9))));
353 assert_eq!(parse_symbol("fib(", 0), Done("(", Sexp::Sym("fib
".into(), (0, 3))));
354 assert_eq!(parse_symbol("foo2
", 0), Done("", Sexp::Sym("foo2
".into(), (0, 4))));
356 // We reserve #foo for the implementation to do as it wishes
357 assert_eq!(parse_symbol("#hi", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
358 // We reserve :foo for keywords
359 assert_eq!(parse_symbol(":hi", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
361 assert_eq!(parse_symbol("", 0), Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
362 assert_eq!(parse_symbol("0", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
363 assert_eq!(parse_symbol("()", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
367 fn test_parse_string() {
368 assert_eq!(parse_string(r#""""#, 0), Done("", Sexp::Str("".into(), (0, 2))));
369 assert_eq!(parse_string(r#""hello""#, 0), Done("", Sexp::Str("hello".into(), (0, 7))));
370 assert_eq!(parse_string(r#" "this is a nice string
371 with 0123 things in it""#, 0),
372 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
373 assert_eq!(parse_string(r#""hi"#, 0), Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
377 fn test_parse_char() {
378 assert_eq
!(parse_character(r
#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
379 assert_eq
!(parse_character(r
#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
380 assert_eq
!(parse_character(r
#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
382 assert_eq
!(parse_character("#", 0), Error(ParseError
::Char(Box
::new(ParseError
::UnexpectedEof
), (1, 1))));
383 assert_eq
!(parse_character("a", 0), Error(ParseError
::Char(Box
::new(ParseError
::Unexpected('a'
, 0)), (0, 0))));
390 // fn test_parse_list() {
391 // assert_eq!(list("()"), IResult::Done("", vec![]));
392 // assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
393 // assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
397 // Sexp::Sym("a".into()),
403 // fn test_parse_only_one() {
404 // assert!(parse_one("1 2").is_err());
409 // fn test_parse_expression() {
410 // assert_eq!(parse_one(r#"
412 // (print (str "say " #\" "Hello, World" #\" " today!")))
414 // Ok(Sexp::List(vec![
415 // Sexp::Sym("def".into()),
417 // vec![Sexp::Sym("main".into())]
420 // Sexp::Sym("print".into()),
422 // Sexp::Sym("str".into()),
423 // Sexp::Str("say ".into()),
425 // Sexp::Str("Hello, World".into()),
427 // Sexp::Str(" today!".into()),
435 // fn test_parse_multi() {
436 // assert_eq!(parse(" 1 2 3 "),
437 // Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));