]> Witch of Git - ess/blob - src/lib.rs
Begin a reimplementation that tracks source locations
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 use std::borrow::Cow;
7
8 /// A type representing arbitrary symbolic expressions. `Sexp` carries the
9 /// source code location it came from along with it for later diagnostic
10 /// purposes.
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Sexp<'a, Loc=ByteSpan> where Loc: Span {
13 /// A value representing a symbol. A symbol is an atomic unit
14 Sym(Cow<'a, str>, Loc),
15 /// A value representing a string literal.
16 Str(Cow<'a, str>, Loc),
17 /// A value representing a single character.
18 Char(char, Loc),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64, Loc),
22 /// A value representing a float. Any number containing a decimal point will
23 /// be parsed as a `Float`.
24 Float(f64, Loc),
25 /// A list of subexpressions
26 List(Vec<Sexp<'a, Loc>>, Loc),
27 }
28
29 impl<'a, Loc> Sexp<'a, Loc> where Loc: Span {
30 pub fn get_loc(&self) -> &Loc {
31 match *self {
32 Sexp::Sym(.., ref l) => l,
33 Sexp::Str(.., ref l) => l,
34 Sexp::Char(.., ref l) => l,
35 Sexp::Int(.., ref l) => l,
36 Sexp::Float(.., ref l) => l,
37 Sexp::List(.., ref l) => l,
38 }
39 }
40
41 pub fn get_loc_mut(&mut self) -> &mut Loc {
42 match *self {
43 Sexp::Sym(.., ref mut l) => l,
44 Sexp::Str(.., ref mut l) => l,
45 Sexp::Char(.., ref mut l) => l,
46 Sexp::Int(.., ref mut l) => l,
47 Sexp::Float(.., ref mut l) => l,
48 Sexp::List(.., ref mut l) => l,
49 }
50 }
51 }
52
53 \f
54 // General Parsing Types ///////////////////////////////////////////////////////
55
56 pub trait Span {
57 type Begin;
58
59 fn offset(&self, begin: Self::Begin) -> Self;
60 fn begin(&self) -> Self::Begin;
61 fn union(&self, other: &Self) -> Self;
62 }
63
64 #[derive(Debug, PartialEq, Eq, Clone)]
65 pub enum ParseResult<'a, T, E> {
66 Done(&'a str, T),
67 Error(E),
68 }
69
70 use ParseResult::*;
71
72 \f
73 // Specific Parsing Types (ParseError, ByteSpan) ///////////////////////////////
74
75 /// Indicates how parsing failed.
76 #[derive(Debug, PartialEq, Eq, Clone)]
77 pub enum ParseError<Loc=ByteSpan> where Loc: Span {
78 /// We can't explain how the parsing failed.
79 UnexpectedEof,
80 Number(Option<Box<ParseError>>, Loc),
81 Unexpected(char, Loc::Begin),
82 Unimplemented,
83 }
84
85 type ByteSpan = (usize, usize);
86
87 impl Span for ByteSpan {
88 type Begin = usize;
89
90 fn offset(&self, begin: Self::Begin) -> Self {
91 (self.0 + begin, self.1 + begin)
92 }
93
94 fn begin(&self) -> Self::Begin {
95 self.0
96 }
97
98 fn union(&self, other: &Self) -> Self {
99 use std::cmp::{min, max};
100 (min(self.0, other.0), max(self.1, other.1))
101 }
102 }
103
104
105 \f
106 // Parsing Utilities ///////////////////////////////////////////////////////////
107
108 trait IsDelimeter {
109 fn is_delimiter(&self) -> bool;
110 }
111
112 impl IsDelimeter for char {
113 fn is_delimiter(&self) -> bool {
114 self.is_whitespace() || *self == ';'
115 || *self == '(' || *self == ')'
116 || *self == '[' || *self == ']'
117 || *self == '{' || *self == '}'
118 || *self == '"' || *self == '\''
119 || *self == '`' || *self == ','
120 }
121 }
122
123 \f
124 // Parsers /////////////////////////////////////////////////////////////////////
125
126 // pub fn parse_one(input: &str) -> Result<Sexp, ParseError>;
127
128 // pub fn parse(input: &str) -> Result<Vec<Sexp>, ParseError>;
129
130 pub fn parse_number(input: &str, start_loc: usize) -> ParseResult<Sexp, ParseError> {
131 // Consume all the whitespace at the beginning of the string
132 let end_of_white = if let Some(pos) = input.find(|c: char| !c.is_whitespace()) {
133 pos
134 } else {
135 return Error(ParseError::Number(
136 Some(Box::new(ParseError::UnexpectedEof)),
137 (input.len(), input.len()).offset(start_loc)));
138 };
139
140 let input = &input[end_of_white..];
141 let start_loc = start_loc + end_of_white;
142
143 match input.chars().next() {
144 Some(c) if !c.is_digit(10) => {
145 return Error(ParseError::Number(
146 Some(Box::new(ParseError::Unexpected(c, 0))),
147 (0, c.len_utf8()).offset(start_loc)));
148 }
149 None => return Error(ParseError::Number(
150 Some(Box::new(ParseError::UnexpectedEof)),
151 (0, 0).offset(start_loc))),
152 _ => (),
153 }
154
155 let base = 10;
156
157 let mut end = 0;
158 // Before the decimal point
159 for (i, c) in input.char_indices() {
160 if c == '.' {
161 end = i + 1;
162 break;
163 }
164
165 if c.is_delimiter() {
166 return Done(&input[i..],
167 Sexp::Int(input[..i].parse().expect("Already matched digits"),
168 (0, i).offset(start_loc)));
169 }
170
171 if !c.is_digit(base) {
172 return Error(ParseError::Number(
173 Some(Box::new(ParseError::Unexpected(c, i))),
174 (i, i).offset(start_loc)));
175 }
176
177 end = i + c.len_utf8();
178 }
179
180 if input[end..].is_empty() {
181 return Done(&input[end..],
182 Sexp::Int(input.parse().expect("Already matched digits"),
183 (0, end).offset(start_loc)));
184 }
185
186 // After the decimal point
187 for (i, c) in input[end..].char_indices() {
188 if c.is_delimiter() {
189 return Done(&input[i+end..],
190 Sexp::Float(input[..end+i].parse().expect("Already matched digits.digits"),
191 (0, end+i).offset(start_loc)));
192 }
193
194 if !c.is_digit(base) {
195 return Error(ParseError::Number(
196 Some(Box::new(ParseError::Unexpected(c, i + end))),
197 (i+end, i+end).offset(start_loc)));
198 }
199 }
200
201 Done(&input[input.len()..],
202 Sexp::Float(input.parse().expect("Already matched digits.digits"),
203 (0, input.len()).offset(start_loc)))
204 }
205
206 \f
207 // Tests ///////////////////////////////////////////////////////////////////////
208
209 #[cfg(test)]
210 mod test {
211 use super::*;
212 use super::ParseResult::*;
213
214 #[test]
215 fn test_parse_number() {
216 assert_eq!(parse_number("1", 0), Done("", Sexp::Int(1, (0, 1))));
217 assert_eq!(parse_number(" 13", 0), Done("", Sexp::Int(13, (1, 3))));
218 assert_eq!(parse_number("1.2", 0), Done("", Sexp::Float(1.2, (0, 3))));
219 assert_eq!(parse_number("\u{3000}4.2", 0), Done("", Sexp::Float(4.2, (0, 3).offset('\u{3000}'.len_utf8()))));
220 assert_eq!(parse_number(" 42 ", 0), Done(" ", Sexp::Int(42, (2, 4))));
221 assert_eq!(parse_number(" 4.2 ", 0), Done(" ", Sexp::Float(4.2, (1, 4))));
222 assert_eq!(parse_number("1()", 0), Done("()", Sexp::Int(1, (0, 1))));
223 assert_eq!(parse_number("3.6()", 0), Done("()", Sexp::Float(3.6, (0, 3))));
224
225 assert_eq!(parse_number("", 0), Error(ParseError::Number(Some(Box::new(ParseError::UnexpectedEof)), (0, 0))));
226 assert_eq!(parse_number("123a", 0), Error(ParseError::Number(Some(Box::new(ParseError::Unexpected('a', 3))), (3, 3))));
227 assert_eq!(parse_number("66.6+", 0), Error(ParseError::Number(Some(Box::new(ParseError::Unexpected('+', 4))), (4, 4))));
228 }
229 }
230
231 // #[cfg(test)]
232 // #[test]
233 // fn test_parse_ident() {
234 // assert_eq!(symbol("+"), IResult::Done("", Sexp::Sym("+".into())));
235 // assert_eq!(symbol(" nil?"), IResult::Done("", Sexp::Sym("nil?".into())));
236 // assert_eq!(symbol(" ->socket"), IResult::Done("", Sexp::Sym("->socket".into())));
237 // assert_eq!(symbol("fib("), IResult::Done("(", Sexp::Sym("fib".into())));
238
239 // // We reserve #foo for the implementation to do as it wishes
240 // assert!(symbol("#hi").is_err());
241
242 // assert!(symbol("0").is_err());
243 // assert!(symbol("()").is_err());
244 // assert!(symbol("").is_incomplete());
245 // }
246
247 // #[cfg(test)]
248 // #[test]
249 // fn test_parse_string() {
250 // assert_eq!(string(r#""hello""#), IResult::Done("", Sexp::Str("hello".into())));
251 // assert_eq!(string(r#" "this is a nice string
252 // with 0123 things in it""#),
253 // IResult::Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into())));
254
255 // assert!(string(r#""hi"#).is_err());
256 // }
257
258 // #[cfg(test)]
259 // #[test]
260 // fn test_parse_char() {
261 // assert_eq!(character(r#"#\""#), IResult::Done("", Sexp::Char('"')));
262 // assert_eq!(character(r#"#\ "#), IResult::Done("", Sexp::Char(' ')));
263 // assert_eq!(character(r#" #\\"#), IResult::Done("", Sexp::Char('\\')));
264
265 // assert!(character("#").is_incomplete());
266 // assert!(character("a").is_err());
267 // }
268
269
270 // #[cfg(test)]
271 // #[test]
272 // fn test_parse_list() {
273 // assert_eq!(list("()"), IResult::Done("", vec![]));
274 // assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
275 // assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
276 // Sexp::Int(1),
277 // Sexp::Int(2),
278 // Sexp::Int(3),
279 // Sexp::Sym("a".into()),
280 // ]));
281 // }
282
283 // #[cfg(test)]
284 // #[test]
285 // fn test_parse_only_one() {
286 // assert!(parse_one("1 2").is_err());
287 // }
288
289 // #[cfg(test)]
290 // #[test]
291 // fn test_parse_expression() {
292 // assert_eq!(parse_one(r#"
293 // (def (main)
294 // (print (str "say " #\" "Hello, World" #\" " today!")))
295 // "#),
296 // Ok(Sexp::List(vec![
297 // Sexp::Sym("def".into()),
298 // Sexp::List(
299 // vec![Sexp::Sym("main".into())]
300 // ),
301 // Sexp::List(vec![
302 // Sexp::Sym("print".into()),
303 // Sexp::List(vec![
304 // Sexp::Sym("str".into()),
305 // Sexp::Str("say ".into()),
306 // Sexp::Char('"'),
307 // Sexp::Str("Hello, World".into()),
308 // Sexp::Char('"'),
309 // Sexp::Str(" today!".into()),
310 // ])
311 // ])
312 // ])));
313 // }
314
315 // #[cfg(test)]
316 // #[test]
317 // fn test_parse_multi() {
318 // assert_eq!(parse(" 1 2 3 "),
319 // Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));
320 // }