]> Witch of Git - ess/blob - src/lib.rs
Parse general s-expressions
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 use std::borrow::Cow;
7
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
10 /// purposes.
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp<'a, Loc=ByteSpan> where Loc: Span {
13 /// A value representing a symbol.
14 Sym(Cow<'a, str>, Loc),
15 /// A value representing a string literal.
16 Str(Cow<'a, str>, Loc),
17 /// A value representing a single character.
18 Char(char, Loc),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64, Loc),
22 /// A value representing a floating point number. Any number containing a
23 /// decimal point will be parsed as a `Float`.
24 Float(f64, Loc),
25 /// A list of subexpressions.
26 List(Vec<Sexp<'a, Loc>>, Loc),
27 }
28
29 impl<'a, Loc> Sexp<'a, Loc> where Loc: Span {
30 pub fn get_loc(&self) -> &Loc {
31 match *self {
32 Sexp::Sym(.., ref l) => l,
33 Sexp::Str(.., ref l) => l,
34 Sexp::Char(.., ref l) => l,
35 Sexp::Int(.., ref l) => l,
36 Sexp::Float(.., ref l) => l,
37 Sexp::List(.., ref l) => l,
38 }
39 }
40
41 pub fn get_loc_mut(&mut self) -> &mut Loc {
42 match *self {
43 Sexp::Sym(.., ref mut l) => l,
44 Sexp::Str(.., ref mut l) => l,
45 Sexp::Char(.., ref mut l) => l,
46 Sexp::Int(.., ref mut l) => l,
47 Sexp::Float(.., ref mut l) => l,
48 Sexp::List(.., ref mut l) => l,
49 }
50 }
51 }
52
53 \f
54 // General Parsing Types ///////////////////////////////////////////////////////
55
56 pub trait Span {
57 type Begin;
58
59 fn offset(&self, begin: Self::Begin) -> Self;
60 fn begin(&self) -> Self::Begin;
61 fn union(&self, other: &Self) -> Self;
62 }
63
64 #[derive(Debug, PartialEq, Eq, Clone)]
65 pub enum ParseResult<'a, T, E> {
66 Done(&'a str, T),
67 Error(E),
68 }
69
70 use ParseResult::*;
71
72 \f
73 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
74
75 /// Indicates how parsing failed.
76 #[derive(Debug, PartialEq, Eq, Clone)]
77 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
78 /// We can't explain how the parsing failed.
79 UnexpectedEof,
80 Char(Box<ParseError>, Loc),
81 String(Box<ParseError>, Loc),
82 Symbol(Box<ParseError>, Loc),
83 Number(Box<ParseError>, Loc),
84 Unexpected(char, Loc::Begin),
85 Unimplemented,
86 }
87
88 type ByteSpan = (usize, usize);
89
90 impl Span for ByteSpan {
91 type Begin = usize;
92
93 fn offset(&self, begin: Self::Begin) -> Self {
94 (self.0 + begin, self.1 + begin)
95 }
96
97 fn begin(&self) -> Self::Begin {
98 self.0
99 }
100
101 fn union(&self, other: &Self) -> Self {
102 use std::cmp::{min, max};
103 (min(self.0, other.0), max(self.1, other.1))
104 }
105 }
106
107
108 \f
109 // Parsing Utilities ///////////////////////////////////////////////////////////
110
111 trait IsDelimeter {
112 fn is_delimiter(&self) -> bool;
113 }
114
115 impl IsDelimeter for char {
116 fn is_delimiter(&self) -> bool {
117 self.is_whitespace() || *self == ';'
118 || *self == '(' || *self == ')'
119 || *self == '[' || *self == ']'
120 || *self == '{' || *self == '}'
121 || *self == '"' || *self == '\''
122 || *self == '`' || *self == ','
123 }
124 }
125
126 \f
127 // Parsers /////////////////////////////////////////////////////////////////////
128
129 pub fn parse_sexp(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
130 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
131 pos
132 } else {
133 return Error(ParseError::Number(
134 Box::new(ParseError::UnexpectedEof),
135 (input.len(), input.len()).offset(start_loc)));
136 };
137
138 let input = &input[end_of_white..];
139 let start_loc = start_loc + end_of_white;
140
141 match input.chars().next() {
142 Some('0'...'9') => parse_number(input, start_loc),
143 Some('(') => unimplemented!(),
144 Some('#') => parse_character(input, start_loc),
145 Some('"') => parse_string(input, start_loc),
146 Some(_) => parse_symbol(input, start_loc),
147 None => unreachable!(),
148 }
149 }
150
151 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
152 // Consume all the whitespace at the beginning of the string
153 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
154 pos
155 } else {
156 return Error(ParseError::Number(
157 Box::new(ParseError::UnexpectedEof),
158 (input.len(), input.len()).offset(start_loc)));
159 };
160
161 let input = &input[end_of_white..];
162 let start_loc = start_loc + end_of_white;
163
164 match input.chars().next() {
165 Some(c) if !c.is_digit(10) => {
166 return Error(ParseError::Number(
167 Box::new(ParseError::Unexpected(c, start_loc)),
168 (0, c.len_utf8()).offset(start_loc)));
169 }
170 None => return Error(ParseError::Number(
171 Box::new(ParseError::UnexpectedEof),
172 (0, 0).offset(start_loc))),
173 _ => (),
174 }
175
176 let base = 10;
177
178 let mut end = 0;
179 // Before the decimal point
180 for (i, c) in input.char_indices() {
181 if c == '.' {
182 end = i + 1;
183 break;
184 }
185
186 if c.is_delimiter() {
187 return Done(&input[i..],
188 Sexp::Int(input[..i].parse().expect("Already matched digits"),
189 (0, i).offset(start_loc)));
190 }
191
192 if !c.is_digit(base) {
193 return Error(ParseError::Number(
194 Box::new(ParseError::Unexpected(c, start_loc + i)),
195 (i, i).offset(start_loc)));
196 }
197
198 end = i + c.len_utf8();
199 }
200
201 if input[end..].is_empty() {
202 return Done(&input[end..],
203 Sexp::Int(input.parse().expect("Already matched digits"),
204 (0, end).offset(start_loc)));
205 }
206
207 // After the decimal point
208 for (i, c) in input[end..].char_indices() {
209 if c.is_delimiter() {
210 return Done(&input[i+end..],
211 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
212 (0, end+i).offset(start_loc)));
213 }
214
215 if !c.is_digit(base) {
216 return Error(ParseError::Number(
217 Box::new(ParseError::Unexpected(c, start_loc + i + end)),
218 (i+end, i+end).offset(start_loc)));
219 }
220 }
221
222 Done(&input[input.len()..],
223 Sexp::Float(input.parse().expect("Already matched digits.digits"),
224 (0, input.len()).offset(start_loc)))
225 }
226
227 pub fn parse_symbol(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
228 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
229 pos
230 } else {
231 return Error(ParseError::Symbol(
232 Box::new(ParseError::UnexpectedEof),
233 (input.len(), input.len()).offset(start_loc)));
234 };
235
236 let input = &input[end_of_white..];
237 let start_loc = start_loc + end_of_white;
238
239 match input.chars().next() {
240 Some(c@'#') | Some(c@':') | Some(c@'0'...'9') =>
241 return Error(ParseError::Symbol(
242 Box::new(ParseError::Unexpected(c, start_loc)),
243 (0, 0).offset(start_loc))),
244 Some(c) if c.is_delimiter() =>
245 return Error(ParseError::Symbol(
246 Box::new(ParseError::Unexpected(c, start_loc)),
247 (0, 0).offset(start_loc))),
248 Some(_) => (),
249 None => unreachable!(),
250 }
251
252 for (i, c) in input.char_indices() {
253 if c.is_delimiter() {
254 return Done(&input[i..],
255 Sexp::Sym(input[..i].into(), (0, i).offset(start_loc)));
256 }
257 }
258
259 Done(&input[input.len()..],
260 Sexp::Sym(input.into(), (0, input.len()).offset(start_loc)))
261 }
262
263 pub fn parse_string(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
264 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
265 pos
266 } else {
267 return Error(ParseError::String(
268 Box::new(ParseError::UnexpectedEof),
269 (input.len(), input.len()).offset(start_loc)));
270 };
271
272 let input = &input[end_of_white..];
273 let start_loc = start_loc + end_of_white;
274
275 match input.chars().next() {
276 Some('"') => (),
277 Some(c) =>
278 return Error(ParseError::String(
279 Box::new(ParseError::Unexpected(c, start_loc)),
280 (0, 0).offset(start_loc))),
281 None => unreachable!(),
282 }
283
284 for (i, c) in input[1..].char_indices() {
285 if c == '"' {
286 return Done(&input[2+i..],
287 Sexp::Str(input[1..i+1].into(), (0, i+2).offset(start_loc)));
288 }
289 }
290
291 Error(ParseError::String(
292 Box::new(ParseError::UnexpectedEof),
293 (0, input.len()).offset(start_loc)))
294 }
295
296 pub fn parse_character(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
297 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
298 pos
299 } else {
300 return Error(ParseError::String(
301 Box::new(ParseError::UnexpectedEof),
302 (input.len(), input.len()).offset(start_loc)));
303 };
304
305 let input = &input[end_of_white..];
306 let start_loc = start_loc + end_of_white;
307
308 match input.chars().nth(0) {
309 Some('#') => (),
310 Some(c) =>
311 return Error(ParseError::Char(
312 Box::new(ParseError::Unexpected(c, start_loc)),
313 (0, 0).offset(start_loc))),
314 None =>
315 return Error(ParseError::Char(
316 Box::new(ParseError::UnexpectedEof),
317 (0, 0).offset(start_loc))),
318 }
319
320 match input.chars().nth(1) {
321 Some('\\') => (),
322 Some(c) =>
323 return Error(ParseError::Char(
324 Box::new(ParseError::Unexpected(c, start_loc + 1)),
325 (1, 1).offset(start_loc))),
326 None =>
327 return Error(ParseError::Char(
328 Box::new(ParseError::UnexpectedEof),
329 (1, 1).offset(start_loc)))
330 }
331
332 match input.chars().nth(2) {
333 Some(c) =>
334 Done(&input[3..], Sexp::Char(c, (0, 3).offset(start_loc))),
335 None =>
336 Error(ParseError::Char(
337 Box::new(ParseError::UnexpectedEof),
338 (2, 2).offset(start_loc)))
339 }
340 }
341
342 \f
343 // Tests ///////////////////////////////////////////////////////////////////////
344
345 #[cfg(test)]
346 mod test {
347 use super::*;
348 use super::ParseResult::*;
349
350 #[test]
351 fn test_parse_sexp() {
352 assert_eq!(parse_sexp(" 1", 0), Done("", Sexp::Int(1, (1, 2))));
353 assert_eq!(parse_sexp("2.2", 0), Done("", Sexp::Float(2.2, (0, 3))));
354 assert_eq!(parse_sexp(" a", 0), Done("", Sexp::Sym("a".into(), (1, 2))));
355 assert_eq!(parse_sexp("#\\c", 0), Done("", Sexp::Char('c', (0, 3))));
356 assert_eq!(parse_sexp(r#""hi""#, 0), Done("", Sexp::Str("hi".into(), (0, 4))));
357 }
358
359 #[test]
360 fn test_parse_number() {
361 assert_eq!(parse_number("1", 0), Done("", Sexp::Int(1, (0, 1))));
362 assert_eq!(parse_number(" 13", 0), Done("", Sexp::Int(13, (1, 3))));
363 assert_eq!(parse_number("1.2", 0), Done("", Sexp::Float(1.2, (0, 3))));
364 assert_eq!(parse_number("\u{3000}4.2", 0), Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
365 assert_eq!(parse_number(" 42 ", 0), Done(" ", Sexp::Int(42, (2, 4))));
366 assert_eq!(parse_number(" 4.2 ", 0), Done(" ", Sexp::Float(4.2, (1, 4))));
367 assert_eq!(parse_number("1()", 0), Done("()", Sexp::Int(1, (0, 1))));
368 assert_eq!(parse_number("3.6()", 0), Done("()", Sexp::Float(3.6, (0, 3))));
369
370 assert_eq!(parse_number("", 0), Error(ParseError::Number(Box::new(ParseError::UnexpectedEof), (0, 0))));
371 assert_eq!(parse_number("123a", 0), Error(ParseError::Number(Box::new(ParseError::Unexpected('a', 3)), (3, 3))));
372 assert_eq!(parse_number("66.6+", 0), Error(ParseError::Number(Box::new(ParseError::Unexpected('+', 4)), (4, 4))));
373 }
374
375 #[test]
376 fn test_parse_ident() {
377 assert_eq!(parse_symbol("+", 0), Done("", Sexp::Sym("+".into(), (0, 1))));
378 assert_eq!(parse_symbol(" nil?", 0), Done("", Sexp::Sym("nil?".into(), (1, 5))));
379 assert_eq!(parse_symbol(" ->socket", 0), Done("", Sexp::Sym("->socket".into(), (1, 9))));
380 assert_eq!(parse_symbol("fib(", 0), Done("(", Sexp::Sym("fib".into(), (0, 3))));
381 assert_eq!(parse_symbol("foo2", 0), Done("", Sexp::Sym("foo2".into(), (0, 4))));
382
383 // We reserve #foo for the implementation to do as it wishes
384 assert_eq!(parse_symbol("#hi", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('#', 0)), (0, 0))));
385 // We reserve :foo for keywords
386 assert_eq!(parse_symbol(":hi", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected(':', 0)), (0, 0))));
387
388 assert_eq!(parse_symbol("", 0), Error(ParseError::Symbol(Box::new(ParseError::UnexpectedEof), (0, 0))));
389 assert_eq!(parse_symbol("0", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('0', 0)), (0, 0))));
390 assert_eq!(parse_symbol("()", 0), Error(ParseError::Symbol(Box::new(ParseError::Unexpected('(', 0)), (0, 0))));
391 }
392
393 #[test]
394 fn test_parse_string() {
395 assert_eq!(parse_string(r#""""#, 0), Done("", Sexp::Str("".into(), (0, 2))));
396 assert_eq!(parse_string(r#""hello""#, 0), Done("", Sexp::Str("hello".into(), (0, 7))));
397 assert_eq!(parse_string(r#" "this is a nice string
398 with 0123 things in it""#, 0),
399 Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into(), (2, 48))));
400 assert_eq!(parse_string(r#""hi"#, 0), Error(ParseError::String(Box::new(ParseError::UnexpectedEof), (0, 3))));
401 }
402
403 #[test]
404 fn test_parse_char() {
405 assert_eq!(parse_character(r#"#\""#, 0), Done("", Sexp::Char('"', (0, 3))));
406 assert_eq!(parse_character(r#"#\ "#, 0), Done("", Sexp::Char(' ', (0, 3))));
407 assert_eq!(parse_character(r#" #\\"#, 0), Done("", Sexp::Char('\\', (2, 5))));
408
409 assert_eq!(parse_character("#", 0), Error(ParseError::Char(Box::new(ParseError::UnexpectedEof), (1, 1))));
410 assert_eq!(parse_character("a", 0), Error(ParseError::Char(Box::new(ParseError::Unexpected('a', 0)), (0, 0))));
411 }
412 }
413
414
415 // #[cfg(test)]
416 // #[test]
417 // fn test_parse_list() {
418 // assert_eq!(list("()"), IResult::Done("", vec![]));
419 // assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
420 // assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
421 // Sexp::Int(1),
422 // Sexp::Int(2),
423 // Sexp::Int(3),
424 // Sexp::Sym("a".into()),
425 // ]));
426 // }
427
428 // #[cfg(test)]
429 // #[test]
430 // fn test_parse_only_one() {
431 // assert!(parse_one("1 2").is_err());
432 // }
433
434 // #[cfg(test)]
435 // #[test]
436 // fn test_parse_expression() {
437 // assert_eq!(parse_one(r#"
438 // (def (main)
439 // (print (str "say " #\" "Hello, World" #\" " today!")))
440 // "#),
441 // Ok(Sexp::List(vec![
442 // Sexp::Sym("def".into()),
443 // Sexp::List(
444 // vec![Sexp::Sym("main".into())]
445 // ),
446 // Sexp::List(vec![
447 // Sexp::Sym("print".into()),
448 // Sexp::List(vec![
449 // Sexp::Sym("str".into()),
450 // Sexp::Str("say ".into()),
451 // Sexp::Char('"'),
452 // Sexp::Str("Hello, World".into()),
453 // Sexp::Char('"'),
454 // Sexp::Str(" today!".into()),
455 // ])
456 // ])
457 // ])));
458 // }
459
460 // #[cfg(test)]
461 // #[test]
462 // fn test_parse_multi() {
463 // assert_eq!(parse(" 1 2 3 "),
464 // Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));
465 // }