]> Witch of Git - ess/blob - src/lib.rs
Change parse to return a ParseError instead of ()
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10
11 /// Indicates how parsing failed.
12 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
13 pub enum ParseError {
14 /// We can't explain how the parsing failed.
15 Unspecified,
16 }
17
18 #[derive(Debug, PartialEq, Clone, PartialOrd)]
19 /// An `Atom` is the representation of a non-composite object
20 pub enum Atom {
21 /// A value representing a symbol. A symbol is an atomic unit
22 Sym(String),
23 /// A value representing a string literal.
24 Str(String),
25 /// A value representing a single character.
26 Char(char),
27 /// A value representing an integer. Any number containing no decimal point
28 /// will be parsed as an `Int`.
29 Int(i64),
30 /// A value representing a float. Any number containing a decimal point will
31 /// be parsed as a `Float`.
32 Float(f64),
33 }
34
35 #[derive(Debug, PartialEq, Clone, PartialOrd)]
36 /// A `Sexp` represents either an `Atom` or a `List`. It encompasses all
37 /// possible lisp expressions.
38 pub enum Sexp {
39 /// A wrapper around the atom type
40 Atom {
41 atom: Atom,
42 },
43 /// A list of subexpressions
44 List {
45 list: Vec<Sexp>,
46 }
47 }
48
49 pub fn parse(input: &str) -> Result<Sexp, ParseError> {
50 match do_parse!(input, exp: sexp >> opt!(multispace) >> eof!() >> (exp)) {
51 IResult::Done(_, res) => Ok(res),
52 _ => Err(ParseError::Unspecified),
53 }
54 }
55
56 named!(sexp<&str, Sexp>,
57 alt!(
58 list => { |list| Sexp::List { list: list } }
59 | atom => { |atom| Sexp::Atom { atom: atom } }
60 )
61 );
62
63 named!(list<&str, Vec<Sexp> >,
64 do_parse!(
65 opt!(multispace) >>
66 tag_s!("(") >>
67 entries: many0!(sexp) >>
68 opt!(multispace) >>
69 tag_s!(")") >>
70 (entries)
71 )
72 );
73
74 named!(atom<&str, Atom>, alt!(string | symbol | number | character));
75
76 named!(string<&str, Atom>,
77 do_parse!(
78 opt!(multispace) >>
79 tag_s!("\"") >>
80 contents: take_until_s!("\"") >>
81 tag_s!("\"") >>
82 (Atom::Str(contents.into()))
83 )
84 );
85
86 named!(symbol<&str, Atom>,
87 do_parse!(
88 opt!(multispace) >>
89 peek!(valid_ident_prefix) >>
90 name: take_while1_s!(valid_ident_char) >>
91 (Atom::Sym(name.into()))
92 )
93 );
94
95 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
96 match ident.chars().next() {
97 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
98 IResult::Done(&ident[1..], ()),
99 None => IResult::Incomplete(nom::Needed::Unknown),
100 _ => IResult::Error(nom::ErrorKind::Custom(0)),
101 }
102 }
103
104 fn valid_ident_char(c: char) -> bool {
105 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
106 }
107
108 named!(number<&str, Atom>,
109 do_parse!(
110 opt!(multispace) >>
111 integral: digit >>
112 peek!(not!(valid_ident_prefix)) >>
113 (Atom::Int(integral.chars().fold(0, |i, c| i * 10 + c as i64 - '0' as i64)))
114 )
115 );
116
117 named!(character<&str, Atom>,
118 do_parse!(
119 opt!(multispace) >>
120 tag_s!("#\\") >>
121 character: take_s!(1) >>
122 (Atom::Char(character.chars().next().unwrap()))
123 )
124 );
125
126 #[cfg(test)]
127 #[test]
128 fn test_parse_number() {
129 assert_eq!(number("0"), IResult::Done("", Atom::Int(0)));
130 assert_eq!(number("123"), IResult::Done("", Atom::Int(123)));
131 assert_eq!(number("0123456789"), IResult::Done("", Atom::Int(123456789)));
132 assert_eq!(number(" 42"), IResult::Done("", Atom::Int(42)));
133
134 assert!(number(" 42a").is_err());
135 assert_eq!(number("13()"), IResult::Done("()", Atom::Int(13)));
136
137 assert!(number("abc").is_err());
138 assert!(number("()").is_err());
139 assert!(number("").is_incomplete());
140 }
141
142 #[cfg(test)]
143 #[test]
144 fn test_parse_ident() {
145 assert_eq!(symbol("+"), IResult::Done("", Atom::Sym("+".into())));
146 assert_eq!(symbol(" nil?"), IResult::Done("", Atom::Sym("nil?".into())));
147 assert_eq!(symbol(" ->socket"), IResult::Done("", Atom::Sym("->socket".into())));
148 assert_eq!(symbol("fib("), IResult::Done("(", Atom::Sym("fib".into())));
149
150 // We reserve #foo for the implementation to do as it wishes
151 assert!(symbol("#hi").is_err());
152
153 assert!(symbol("0").is_err());
154 assert!(symbol("()").is_err());
155 assert!(symbol("").is_incomplete());
156 }
157
158 #[cfg(test)]
159 #[test]
160 fn test_parse_string() {
161 assert_eq!(string(r#""hello""#), IResult::Done("", Atom::Str("hello".into())));
162 assert_eq!(string(r#" "this is a nice string
163 with 0123 things in it""#),
164 IResult::Done("", Atom::Str("this is a nice string\nwith 0123 things in it".into())));
165
166 assert!(string(r#""hi"#).is_err());
167 }
168
169 #[cfg(test)]
170 #[test]
171 fn test_parse_char() {
172 assert_eq!(character("#\\\""), IResult::Done("", Atom::Char('"')));
173 assert_eq!(character("#\\ "), IResult::Done("", Atom::Char(' ')));
174 assert_eq!(character(" #\\\\"), IResult::Done("", Atom::Char('\\')));
175
176 assert!(character("#").is_incomplete());
177 assert!(character("a").is_err());
178 }
179
180 #[cfg(test)]
181 #[test]
182 fn test_parse_list() {
183 assert_eq!(list("()"), IResult::Done("", vec![]));
184 assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Atom { atom: Atom::Int(1) }]));
185 assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
186 Sexp::Atom { atom: Atom::Int(1) },
187 Sexp::Atom { atom: Atom::Int(2) },
188 Sexp::Atom { atom: Atom::Int(3) },
189 Sexp::Atom { atom: Atom::Sym("a".into()) },
190 ]));
191 }
192
193 #[cfg(test)]
194 #[test]
195 fn test_cant_parse() {
196 assert!(parse("1 2").is_err());
197 }
198
199 #[cfg(test)]
200 #[test]
201 fn test_parse_expression() {
202 assert_eq!(parse(r#"
203 (def (main)
204 (print (str "say " #\" "Hello, World" #\" " today!")))
205 "#),
206 Ok(Sexp::List {
207 list: vec![
208 Sexp::Atom { atom: Atom::Sym("def".into()) },
209 Sexp::List {
210 list: vec![
211 Sexp::Atom { atom: Atom::Sym("main".into()) }
212 ]
213 },
214 Sexp::List {
215 list: vec![
216 Sexp::Atom { atom: Atom::Sym("print".into()) },
217 Sexp::List {
218 list: vec![
219 Sexp::Atom {
220 atom: Atom::Sym("str".into())
221 },
222 Sexp::Atom {
223 atom: Atom::Str("say ".into())
224 },
225 Sexp::Atom { atom: Atom::Char('"') },
226 Sexp::Atom {
227 atom: Atom::Str("Hello, World".into())
228 },
229 Sexp::Atom { atom: Atom::Char('"') },
230 Sexp::Atom {
231 atom: Atom::Str(" today!".into())
232 }
233 ]
234 }
235 ]
236 }
237 ]
238 }));
239 }