]> Witch of Git - ess/blob - src/lib.rs
Improve the number parser
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10 use std::str::FromStr;
11
12 /// Indicates how parsing failed.
13 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
14 pub enum ParseError {
15 /// We can't explain how the parsing failed.
16 Unspecified,
17 }
18
19 #[derive(Debug, PartialEq, Clone, PartialOrd)]
20 pub enum Sexp {
21 /// A value representing a symbol. A symbol is an atomic unit
22 Sym(String),
23 /// A value representing a string literal.
24 Str(String),
25 /// A value representing a single character.
26 Char(char),
27 /// A value representing an integer. Any number containing no decimal point
28 /// will be parsed as an `Int`.
29 Int(i64),
30 /// A value representing a float. Any number containing a decimal point will
31 /// be parsed as a `Float`.
32 Float(f64),
33 /// A list of subexpressions
34 List(Vec<Sexp>),
35 }
36
37 pub fn parse_one(input: &str) -> Result<Sexp, ParseError> {
38 match do_parse!(input,
39 exp: sexp >>
40 opt!(complete!(multispace)) >>
41 eof!() >>
42 (exp)) {
43 IResult::Done(_, res) => Ok(res),
44 _ => Err(ParseError::Unspecified),
45 }
46 }
47
48 pub fn parse(input: &str) -> Result<Vec<Sexp>, ParseError> {
49 let parse_res: IResult<&str, Vec<Sexp>> =
50 do_parse!(input,
51 exps: many1!(complete!(sexp)) >>
52 opt!(complete!(multispace)) >>
53 eof!() >>
54 (exps));
55 match parse_res {
56 IResult::Done(_, res) => Ok(res),
57 e => {
58 println!("{:#?}", e);
59 Err(ParseError::Unspecified)
60 }
61 }
62 }
63
64 named!(sexp<&str, Sexp>,
65 alt_complete!(
66 list => { |list| Sexp::List(list) }
67 | atom
68 )
69 );
70
71 named!(list<&str, Vec<Sexp> >,
72 do_parse!(
73 opt!(multispace) >>
74 tag_s!("(") >>
75 entries: many0!(sexp) >>
76 opt!(multispace) >>
77 tag_s!(")") >>
78 (entries)
79 )
80 );
81
82 named!(atom<&str, Sexp>, alt_complete!(string | symbol | number | character));
83
84 named!(string<&str, Sexp>,
85 do_parse!(
86 opt!(multispace) >>
87 tag_s!(r#"""#) >>
88 contents: take_until_s!(r#"""#) >>
89 tag_s!(r#"""#) >>
90 (Sexp::Str(contents.into()))
91 )
92 );
93
94 named!(symbol<&str, Sexp>,
95 do_parse!(
96 opt!(multispace) >>
97 peek!(valid_ident_prefix) >>
98 name: take_while1_s!(valid_ident_char) >>
99 (Sexp::Sym(name.into()))
100 )
101 );
102
103 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
104 match ident.chars().next() {
105 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
106 IResult::Done(&ident[1..], ()),
107 None => IResult::Incomplete(nom::Needed::Unknown),
108 _ => IResult::Error(nom::ErrorKind::Custom(0)),
109 }
110 }
111
112 fn valid_ident_char(c: char) -> bool {
113 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
114 }
115
116 named!(number<&str, Sexp>,
117 preceded!(opt!(multispace),
118 map_res!(
119 recognize!(do_parse!(
120 digit >>
121 is_float: opt!(complete!(tag_s!("."))) >>
122 opt!(complete!(digit)) >>
123 peek!(not!(valid_ident_prefix)) >>
124 ()
125 )),
126 |text: &str| {
127 if text.contains(".") {
128 f64::from_str(text).map(Sexp::Float).or(Err(()))
129 } else {
130 i64::from_str(text).map(Sexp::Int).or(Err(()))
131 }
132 }
133 )
134 )
135 );
136
137 named!(character<&str, Sexp>,
138 do_parse!(
139 opt!(multispace) >>
140 tag_s!(r#"#\"#) >>
141 character: take_s!(1) >>
142 (Sexp::Char(character.chars().next().unwrap()))
143 )
144 );
145
146 #[cfg(test)]
147 #[test]
148 fn test_parse_number() {
149 assert_eq!(number("0"), IResult::Done("", Sexp::Int(0)));
150 assert_eq!(number("123"), IResult::Done("", Sexp::Int(123)));
151 assert_eq!(number("0123456789"), IResult::Done("", Sexp::Int(123456789)));
152 assert_eq!(number(" 42"), IResult::Done("", Sexp::Int(42)));
153
154 assert_eq!(number("4."), IResult::Done("", Sexp::Float(4.)));
155 assert_eq!(number("4.2"), IResult::Done("", Sexp::Float(4.2)));
156 assert_eq!(number("1.00000000001"),
157 IResult::Done("", Sexp::Float(1.00000000001)));
158
159 assert!(number(" 42a").is_err());
160 assert_eq!(number("13()"), IResult::Done("()", Sexp::Int(13)));
161
162 assert!(number("abc").is_err());
163 assert!(number("()").is_err());
164 assert!(number("").is_incomplete());
165 }
166
167 #[cfg(test)]
168 #[test]
169 fn test_parse_ident() {
170 assert_eq!(symbol("+"), IResult::Done("", Sexp::Sym("+".into())));
171 assert_eq!(symbol(" nil?"), IResult::Done("", Sexp::Sym("nil?".into())));
172 assert_eq!(symbol(" ->socket"), IResult::Done("", Sexp::Sym("->socket".into())));
173 assert_eq!(symbol("fib("), IResult::Done("(", Sexp::Sym("fib".into())));
174
175 // We reserve #foo for the implementation to do as it wishes
176 assert!(symbol("#hi").is_err());
177
178 assert!(symbol("0").is_err());
179 assert!(symbol("()").is_err());
180 assert!(symbol("").is_incomplete());
181 }
182
183 #[cfg(test)]
184 #[test]
185 fn test_parse_string() {
186 assert_eq!(string(r#""hello""#), IResult::Done("", Sexp::Str("hello".into())));
187 assert_eq!(string(r#" "this is a nice string
188 with 0123 things in it""#),
189 IResult::Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into())));
190
191 assert!(string(r#""hi"#).is_err());
192 }
193
194 #[cfg(test)]
195 #[test]
196 fn test_parse_char() {
197 assert_eq!(character(r#"#\""#), IResult::Done("", Sexp::Char('"')));
198 assert_eq!(character(r#"#\ "#), IResult::Done("", Sexp::Char(' ')));
199 assert_eq!(character(r#" #\\"#), IResult::Done("", Sexp::Char('\\')));
200
201 assert!(character("#").is_incomplete());
202 assert!(character("a").is_err());
203 }
204
205 #[cfg(test)]
206 #[test]
207 fn test_parse_list() {
208 assert_eq!(list("()"), IResult::Done("", vec![]));
209 assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
210 assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
211 Sexp::Int(1),
212 Sexp::Int(2),
213 Sexp::Int(3),
214 Sexp::Sym("a".into()),
215 ]));
216 }
217
218 #[cfg(test)]
219 #[test]
220 fn test_parse_only_one() {
221 assert!(parse_one("1 2").is_err());
222 }
223
224 #[cfg(test)]
225 #[test]
226 fn test_parse_expression() {
227 assert_eq!(parse_one(r#"
228 (def (main)
229 (print (str "say " #\" "Hello, World" #\" " today!")))
230 "#),
231 Ok(Sexp::List(vec![
232 Sexp::Sym("def".into()),
233 Sexp::List(
234 vec![Sexp::Sym("main".into())]
235 ),
236 Sexp::List(vec![
237 Sexp::Sym("print".into()),
238 Sexp::List(vec![
239 Sexp::Sym("str".into()),
240 Sexp::Str("say ".into()),
241 Sexp::Char('"'),
242 Sexp::Str("Hello, World".into()),
243 Sexp::Char('"'),
244 Sexp::Str(" today!".into()),
245 ])
246 ])
247 ])));
248 }
249
250 #[cfg(test)]
251 #[test]
252 fn test_parse_multi() {
253 assert_eq!(parse(" 1 2 3 "),
254 Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));
255 }