]> Witch of Git - ess/blob - src/lib.rs
Change parse to parse multiple expressions and add parse_one
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10
11 /// Indicates how parsing failed.
12 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
13 pub enum ParseError {
14 /// We can't explain how the parsing failed.
15 Unspecified,
16 }
17
18 #[derive(Debug, PartialEq, Clone, PartialOrd)]
19 /// An `Atom` is the representation of a non-composite object
20 pub enum Atom {
21 /// A value representing a symbol. A symbol is an atomic unit
22 Sym(String),
23 /// A value representing a string literal.
24 Str(String),
25 /// A value representing a single character.
26 Char(char),
27 /// A value representing an integer. Any number containing no decimal point
28 /// will be parsed as an `Int`.
29 Int(i64),
30 /// A value representing a float. Any number containing a decimal point will
31 /// be parsed as a `Float`.
32 Float(f64),
33 }
34
35 #[derive(Debug, PartialEq, Clone, PartialOrd)]
36 /// A `Sexp` represents either an `Atom` or a `List`. It encompasses all
37 /// possible lisp expressions.
38 pub enum Sexp {
39 /// A wrapper around the atom type
40 Atom {
41 atom: Atom,
42 },
43 /// A list of subexpressions
44 List {
45 list: Vec<Sexp>,
46 }
47 }
48
49 pub fn parse_one(input: &str) -> Result<Sexp, ParseError> {
50 match do_parse!(input,
51 exp: sexp >>
52 opt!(complete!(multispace)) >>
53 eof!() >>
54 (exp)) {
55 IResult::Done(_, res) => Ok(res),
56 _ => Err(ParseError::Unspecified),
57 }
58 }
59
60 pub fn parse(input: &str) -> Result<Vec<Sexp>, ParseError> {
61 let parse_res: IResult<&str, Vec<Sexp>> =
62 do_parse!(input,
63 exps: many1!(complete!(sexp)) >>
64 opt!(complete!(multispace)) >>
65 eof!() >>
66 (exps));
67 match parse_res {
68 IResult::Done(_, res) => Ok(res),
69 e => {
70 println!("{:#?}", e);
71 Err(ParseError::Unspecified)
72 }
73 }
74 }
75
76 named!(sexp<&str, Sexp>,
77 alt_complete!(
78 list => { |list| Sexp::List { list: list } }
79 | atom => { |atom| Sexp::Atom { atom: atom } }
80 )
81 );
82
83 named!(list<&str, Vec<Sexp> >,
84 do_parse!(
85 opt!(multispace) >>
86 tag_s!("(") >>
87 entries: many0!(sexp) >>
88 opt!(multispace) >>
89 tag_s!(")") >>
90 (entries)
91 )
92 );
93
94 named!(atom<&str, Atom>, alt_complete!(string | symbol | number | character));
95
96 named!(string<&str, Atom>,
97 do_parse!(
98 opt!(multispace) >>
99 tag_s!(r#"""#) >>
100 contents: take_until_s!(r#"""#) >>
101 tag_s!(r#"""#) >>
102 (Atom::Str(contents.into()))
103 )
104 );
105
106 named!(symbol<&str, Atom>,
107 do_parse!(
108 opt!(multispace) >>
109 peek!(valid_ident_prefix) >>
110 name: take_while1_s!(valid_ident_char) >>
111 (Atom::Sym(name.into()))
112 )
113 );
114
115 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
116 match ident.chars().next() {
117 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
118 IResult::Done(&ident[1..], ()),
119 None => IResult::Incomplete(nom::Needed::Unknown),
120 _ => IResult::Error(nom::ErrorKind::Custom(0)),
121 }
122 }
123
124 fn valid_ident_char(c: char) -> bool {
125 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
126 }
127
128 named!(number<&str, Atom>,
129 do_parse!(
130 opt!(multispace) >>
131 integral: digit >>
132 peek!(not!(valid_ident_prefix)) >>
133 (Atom::Int(integral.chars().fold(0, |i, c| i * 10 + c as i64 - '0' as i64)))
134 )
135 );
136
137 named!(character<&str, Atom>,
138 do_parse!(
139 opt!(multispace) >>
140 tag_s!(r#"#\"#) >>
141 character: take_s!(1) >>
142 (Atom::Char(character.chars().next().unwrap()))
143 )
144 );
145
146 #[cfg(test)]
147 #[test]
148 fn test_parse_number() {
149 assert_eq!(number("0"), IResult::Done("", Atom::Int(0)));
150 assert_eq!(number("123"), IResult::Done("", Atom::Int(123)));
151 assert_eq!(number("0123456789"), IResult::Done("", Atom::Int(123456789)));
152 assert_eq!(number(" 42"), IResult::Done("", Atom::Int(42)));
153
154 assert!(number(" 42a").is_err());
155 assert_eq!(number("13()"), IResult::Done("()", Atom::Int(13)));
156
157 assert!(number("abc").is_err());
158 assert!(number("()").is_err());
159 assert!(number("").is_incomplete());
160 }
161
162 #[cfg(test)]
163 #[test]
164 fn test_parse_ident() {
165 assert_eq!(symbol("+"), IResult::Done("", Atom::Sym("+".into())));
166 assert_eq!(symbol(" nil?"), IResult::Done("", Atom::Sym("nil?".into())));
167 assert_eq!(symbol(" ->socket"), IResult::Done("", Atom::Sym("->socket".into())));
168 assert_eq!(symbol("fib("), IResult::Done("(", Atom::Sym("fib".into())));
169
170 // We reserve #foo for the implementation to do as it wishes
171 assert!(symbol("#hi").is_err());
172
173 assert!(symbol("0").is_err());
174 assert!(symbol("()").is_err());
175 assert!(symbol("").is_incomplete());
176 }
177
178 #[cfg(test)]
179 #[test]
180 fn test_parse_string() {
181 assert_eq!(string(r#""hello""#), IResult::Done("", Atom::Str("hello".into())));
182 assert_eq!(string(r#" "this is a nice string
183 with 0123 things in it""#),
184 IResult::Done("", Atom::Str("this is a nice string\nwith 0123 things in it".into())));
185
186 assert!(string(r#""hi"#).is_err());
187 }
188
189 #[cfg(test)]
190 #[test]
191 fn test_parse_char() {
192 assert_eq!(character(r#"#\""#), IResult::Done("", Atom::Char('"')));
193 assert_eq!(character(r#"#\ "#), IResult::Done("", Atom::Char(' ')));
194 assert_eq!(character(r#" #\\"#), IResult::Done("", Atom::Char('\\')));
195
196 assert!(character("#").is_incomplete());
197 assert!(character("a").is_err());
198 }
199
200 #[cfg(test)]
201 #[test]
202 fn test_parse_list() {
203 assert_eq!(list("()"), IResult::Done("", vec![]));
204 assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Atom { atom: Atom::Int(1) }]));
205 assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
206 Sexp::Atom { atom: Atom::Int(1) },
207 Sexp::Atom { atom: Atom::Int(2) },
208 Sexp::Atom { atom: Atom::Int(3) },
209 Sexp::Atom { atom: Atom::Sym("a".into()) },
210 ]));
211 }
212
213 #[cfg(test)]
214 #[test]
215 fn test_parse_only_one() {
216 assert!(parse_one("1 2").is_err());
217 }
218
219 #[cfg(test)]
220 #[test]
221 fn test_parse_expression() {
222 assert_eq!(parse_one(r#"
223 (def (main)
224 (print (str "say " #\" "Hello, World" #\" " today!")))
225 "#),
226 Ok(Sexp::List {
227 list: vec![
228 Sexp::Atom { atom: Atom::Sym("def".into()) },
229 Sexp::List {
230 list: vec![
231 Sexp::Atom { atom: Atom::Sym("main".into()) }
232 ]
233 },
234 Sexp::List {
235 list: vec![
236 Sexp::Atom { atom: Atom::Sym("print".into()) },
237 Sexp::List {
238 list: vec![
239 Sexp::Atom {
240 atom: Atom::Sym("str".into())
241 },
242 Sexp::Atom {
243 atom: Atom::Str("say ".into())
244 },
245 Sexp::Atom { atom: Atom::Char('"') },
246 Sexp::Atom {
247 atom: Atom::Str("Hello, World".into())
248 },
249 Sexp::Atom { atom: Atom::Char('"') },
250 Sexp::Atom {
251 atom: Atom::Str(" today!".into())
252 }
253 ]
254 }
255 ]
256 }
257 ]
258 }));
259 }
260
261 #[cfg(test)]
262 #[test]
263 fn test_parse_multi() {
264 assert_eq!(parse(" 1 2 3 "),
265 Ok(vec![Sexp::Atom { atom: Atom::Int(1) },
266 Sexp::Atom { atom: Atom::Int(2) },
267 Sexp::Atom { atom: Atom::Int(3) }]));
268 }