]> Witch of Git - ess/blob - src/lib.rs
Add more tests
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Atom {
13 /// A value representing a symbol. A symbol is an atomic unit
14 Sym(String),
15 /// A value representing a string literal.
16 Str(String),
17 /// A value representing a single character.
18 Char(char),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64),
22 /// A value representing a float. Any number containing a decimal point will
23 /// be parsed as a `Float`.
24 Float(f64),
25 }
26
27 #[derive(Debug, PartialEq, Clone, PartialOrd)]
28 pub enum Sexp {
29 /// A wrapper around the atom type
30 Atom {
31 atom: Atom,
32 },
33 /// A list of subexpressions
34 List {
35 list: Vec<Sexp>,
36 }
37 }
38
39 pub fn parse(input: &str) -> Result<Sexp, ()> {
40 match do_parse!(input, exp: sexp >> opt!(multispace) >> eof!() >> (exp)) {
41 IResult::Done(_, res) => Ok(res),
42 _ => Err(()),
43 }
44 }
45
46 named!(sexp<&str, Sexp>,
47 alt!(
48 list => { |list| Sexp::List { list: list } }
49 | atom => { |atom| Sexp::Atom { atom: atom } }
50 )
51 );
52
53 named!(list<&str, Vec<Sexp> >,
54 do_parse!(
55 opt!(multispace) >>
56 tag_s!("(") >>
57 entries: many0!(sexp) >>
58 opt!(multispace) >>
59 tag_s!(")") >>
60 (entries)
61 )
62 );
63
64 named!(atom<&str, Atom>, alt!(string | symbol | number | character));
65
66 named!(string<&str, Atom>,
67 do_parse!(
68 opt!(multispace) >>
69 tag_s!("\"") >>
70 contents: take_until_s!("\"") >>
71 tag_s!("\"") >>
72 (Atom::Str(contents.into()))
73 )
74 );
75
76 named!(symbol<&str, Atom>,
77 do_parse!(
78 opt!(multispace) >>
79 peek!(valid_ident_prefix) >>
80 name: take_while1_s!(valid_ident_char) >>
81 (Atom::Sym(name.into()))
82 )
83 );
84
85 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
86 match ident.chars().next() {
87 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
88 IResult::Done(&ident[1..], ()),
89 None => IResult::Incomplete(nom::Needed::Unknown),
90 _ => IResult::Error(nom::ErrorKind::Custom(0)),
91 }
92 }
93
94 fn valid_ident_char(c: char) -> bool {
95 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
96 }
97
98 named!(number<&str, Atom>,
99 do_parse!(
100 opt!(multispace) >>
101 integral: digit >>
102 peek!(not!(valid_ident_prefix)) >>
103 (Atom::Int(integral.chars().fold(0, |i, c| i * 10 + c as i64 - '0' as i64)))
104 )
105 );
106
107 named!(character<&str, Atom>,
108 do_parse!(
109 opt!(multispace) >>
110 tag_s!("#\\") >>
111 character: take_s!(1) >>
112 (Atom::Char(character.chars().next().unwrap()))
113 )
114 );
115
116 #[cfg(test)]
117 #[test]
118 fn test_parse_number() {
119 assert_eq!(number("0"), IResult::Done("", Atom::Int(0)));
120 assert_eq!(number("123"), IResult::Done("", Atom::Int(123)));
121 assert_eq!(number("0123456789"), IResult::Done("", Atom::Int(123456789)));
122 assert_eq!(number(" 42"), IResult::Done("", Atom::Int(42)));
123
124 assert!(number(" 42a").is_err());
125 assert_eq!(number("13()"), IResult::Done("()", Atom::Int(13)));
126
127 assert!(number("abc").is_err());
128 assert!(number("()").is_err());
129 assert!(number("").is_incomplete());
130 }
131
132 #[cfg(test)]
133 #[test]
134 fn test_parse_ident() {
135 assert_eq!(symbol("+"), IResult::Done("", Atom::Sym("+".into())));
136 assert_eq!(symbol(" nil?"), IResult::Done("", Atom::Sym("nil?".into())));
137 assert_eq!(symbol(" ->socket"), IResult::Done("", Atom::Sym("->socket".into())));
138 assert_eq!(symbol("fib("), IResult::Done("(", Atom::Sym("fib".into())));
139
140 // We reserve #foo for the implementation to do as it wishes
141 assert!(symbol("#hi").is_err());
142
143 assert!(symbol("0").is_err());
144 assert!(symbol("()").is_err());
145 assert!(symbol("").is_incomplete());
146 }
147
148 #[cfg(test)]
149 #[test]
150 fn test_parse_string() {
151 assert_eq!(string(r#""hello""#), IResult::Done("", Atom::Str("hello".into())));
152 assert_eq!(string(r#" "this is a nice string
153 with 0123 things in it""#),
154 IResult::Done("", Atom::Str("this is a nice string\nwith 0123 things in it".into())));
155
156 assert!(string(r#""hi"#).is_err());
157 }
158
159 #[cfg(test)]
160 #[test]
161 fn test_parse_char() {
162 assert_eq!(character("#\\\""), IResult::Done("", Atom::Char('"')));
163 assert_eq!(character("#\\ "), IResult::Done("", Atom::Char(' ')));
164 assert_eq!(character(" #\\\\"), IResult::Done("", Atom::Char('\\')));
165
166 assert!(character("#").is_incomplete());
167 assert!(character("a").is_err());
168 }
169
170 #[cfg(test)]
171 #[test]
172 fn test_parse_list() {
173 assert_eq!(list("()"), IResult::Done("", vec![]));
174 assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Atom { atom: Atom::Int(1) }]));
175 assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
176 Sexp::Atom { atom: Atom::Int(1) },
177 Sexp::Atom { atom: Atom::Int(2) },
178 Sexp::Atom { atom: Atom::Int(3) },
179 Sexp::Atom { atom: Atom::Sym("a".into()) },
180 ]));
181 }
182
183 #[cfg(test)]
184 #[test]
185 fn test_cant_parse() {
186 assert!(parse("1 2").is_err());
187 }
188
189 #[cfg(test)]
190 #[test]
191 fn test_parse_expression() {
192 assert_eq!(parse(r#"
193 (def (main)
194 (print (str "say " #\" "Hello, World" #\" " today!")))
195 "#),
196 Ok(Sexp::List {
197 list: vec![
198 Sexp::Atom { atom: Atom::Sym("def".into()) },
199 Sexp::List {
200 list: vec![
201 Sexp::Atom { atom: Atom::Sym("main".into()) }
202 ]
203 },
204 Sexp::List {
205 list: vec![
206 Sexp::Atom { atom: Atom::Sym("print".into()) },
207 Sexp::List {
208 list: vec![
209 Sexp::Atom {
210 atom: Atom::Sym("str".into())
211 },
212 Sexp::Atom {
213 atom: Atom::Str("say ".into())
214 },
215 Sexp::Atom { atom: Atom::Char('"') },
216 Sexp::Atom {
217 atom: Atom::Str("Hello, World".into())
218 },
219 Sexp::Atom { atom: Atom::Char('"') },
220 Sexp::Atom {
221 atom: Atom::Str(" today!".into())
222 }
223 ]
224 }
225 ]
226 }
227 ]
228 }));
229 }