]> Witch of Git - ess/blob - src/lib.rs
Remove the indirection introduced by separating Atom from Sexp
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 // #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10
11 /// Indicates how parsing failed.
12 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
13 pub enum ParseError {
14 /// We can't explain how the parsing failed.
15 Unspecified,
16 }
17
18 #[derive(Debug, PartialEq, Clone, PartialOrd)]
19 pub enum Sexp {
20 /// A value representing a symbol. A symbol is an atomic unit
21 Sym(String),
22 /// A value representing a string literal.
23 Str(String),
24 /// A value representing a single character.
25 Char(char),
26 /// A value representing an integer. Any number containing no decimal point
27 /// will be parsed as an `Int`.
28 Int(i64),
29 /// A value representing a float. Any number containing a decimal point will
30 /// be parsed as a `Float`.
31 Float(f64),
32 /// A list of subexpressions
33 List(Vec<Sexp>),
34 }
35
36 pub fn parse_one(input: &str) -> Result<Sexp, ParseError> {
37 match do_parse!(input,
38 exp: sexp >>
39 opt!(complete!(multispace)) >>
40 eof!() >>
41 (exp)) {
42 IResult::Done(_, res) => Ok(res),
43 _ => Err(ParseError::Unspecified),
44 }
45 }
46
47 pub fn parse(input: &str) -> Result<Vec<Sexp>, ParseError> {
48 let parse_res: IResult<&str, Vec<Sexp>> =
49 do_parse!(input,
50 exps: many1!(complete!(sexp)) >>
51 opt!(complete!(multispace)) >>
52 eof!() >>
53 (exps));
54 match parse_res {
55 IResult::Done(_, res) => Ok(res),
56 e => {
57 println!("{:#?}", e);
58 Err(ParseError::Unspecified)
59 }
60 }
61 }
62
63 named!(sexp<&str, Sexp>,
64 alt_complete!(
65 list => { |list| Sexp::List(list) }
66 | atom
67 )
68 );
69
70 named!(list<&str, Vec<Sexp> >,
71 do_parse!(
72 opt!(multispace) >>
73 tag_s!("(") >>
74 entries: many0!(sexp) >>
75 opt!(multispace) >>
76 tag_s!(")") >>
77 (entries)
78 )
79 );
80
81 named!(atom<&str, Sexp>, alt_complete!(string | symbol | number | character));
82
83 named!(string<&str, Sexp>,
84 do_parse!(
85 opt!(multispace) >>
86 tag_s!(r#"""#) >>
87 contents: take_until_s!(r#"""#) >>
88 tag_s!(r#"""#) >>
89 (Sexp::Str(contents.into()))
90 )
91 );
92
93 named!(symbol<&str, Sexp>,
94 do_parse!(
95 opt!(multispace) >>
96 peek!(valid_ident_prefix) >>
97 name: take_while1_s!(valid_ident_char) >>
98 (Sexp::Sym(name.into()))
99 )
100 );
101
102 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
103 match ident.chars().next() {
104 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
105 IResult::Done(&ident[1..], ()),
106 None => IResult::Incomplete(nom::Needed::Unknown),
107 _ => IResult::Error(nom::ErrorKind::Custom(0)),
108 }
109 }
110
111 fn valid_ident_char(c: char) -> bool {
112 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
113 }
114
115 named!(number<&str, Sexp>,
116 do_parse!(
117 opt!(multispace) >>
118 integral: digit >>
119 peek!(not!(valid_ident_prefix)) >>
120 (Sexp::Int(integral.chars().fold(0, |i, c| i * 10 + c as i64 - '0' as i64)))
121 )
122 );
123
124 named!(character<&str, Sexp>,
125 do_parse!(
126 opt!(multispace) >>
127 tag_s!(r#"#\"#) >>
128 character: take_s!(1) >>
129 (Sexp::Char(character.chars().next().unwrap()))
130 )
131 );
132
133 #[cfg(test)]
134 #[test]
135 fn test_parse_number() {
136 assert_eq!(number("0"), IResult::Done("", Sexp::Int(0)));
137 assert_eq!(number("123"), IResult::Done("", Sexp::Int(123)));
138 assert_eq!(number("0123456789"), IResult::Done("", Sexp::Int(123456789)));
139 assert_eq!(number(" 42"), IResult::Done("", Sexp::Int(42)));
140
141 assert!(number(" 42a").is_err());
142 assert_eq!(number("13()"), IResult::Done("()", Sexp::Int(13)));
143
144 assert!(number("abc").is_err());
145 assert!(number("()").is_err());
146 assert!(number("").is_incomplete());
147 }
148
149 #[cfg(test)]
150 #[test]
151 fn test_parse_ident() {
152 assert_eq!(symbol("+"), IResult::Done("", Sexp::Sym("+".into())));
153 assert_eq!(symbol(" nil?"), IResult::Done("", Sexp::Sym("nil?".into())));
154 assert_eq!(symbol(" ->socket"), IResult::Done("", Sexp::Sym("->socket".into())));
155 assert_eq!(symbol("fib("), IResult::Done("(", Sexp::Sym("fib".into())));
156
157 // We reserve #foo for the implementation to do as it wishes
158 assert!(symbol("#hi").is_err());
159
160 assert!(symbol("0").is_err());
161 assert!(symbol("()").is_err());
162 assert!(symbol("").is_incomplete());
163 }
164
165 #[cfg(test)]
166 #[test]
167 fn test_parse_string() {
168 assert_eq!(string(r#""hello""#), IResult::Done("", Sexp::Str("hello".into())));
169 assert_eq!(string(r#" "this is a nice string
170 with 0123 things in it""#),
171 IResult::Done("", Sexp::Str("this is a nice string\nwith 0123 things in it".into())));
172
173 assert!(string(r#""hi"#).is_err());
174 }
175
176 #[cfg(test)]
177 #[test]
178 fn test_parse_char() {
179 assert_eq!(character(r#"#\""#), IResult::Done("", Sexp::Char('"')));
180 assert_eq!(character(r#"#\ "#), IResult::Done("", Sexp::Char(' ')));
181 assert_eq!(character(r#" #\\"#), IResult::Done("", Sexp::Char('\\')));
182
183 assert!(character("#").is_incomplete());
184 assert!(character("a").is_err());
185 }
186
187 #[cfg(test)]
188 #[test]
189 fn test_parse_list() {
190 assert_eq!(list("()"), IResult::Done("", vec![]));
191 assert_eq!(list("(1)"), IResult::Done("", vec![Sexp::Int(1)]));
192 assert_eq!(list(" ( 1 2 3 a )"), IResult::Done("", vec![
193 Sexp::Int(1),
194 Sexp::Int(2),
195 Sexp::Int(3),
196 Sexp::Sym("a".into()),
197 ]));
198 }
199
200 #[cfg(test)]
201 #[test]
202 fn test_parse_only_one() {
203 assert!(parse_one("1 2").is_err());
204 }
205
206 #[cfg(test)]
207 #[test]
208 fn test_parse_expression() {
209 assert_eq!(parse_one(r#"
210 (def (main)
211 (print (str "say " #\" "Hello, World" #\" " today!")))
212 "#),
213 Ok(Sexp::List(vec![
214 Sexp::Sym("def".into()),
215 Sexp::List(
216 vec![Sexp::Sym("main".into())]
217 ),
218 Sexp::List(vec![
219 Sexp::Sym("print".into()),
220 Sexp::List(vec![
221 Sexp::Sym("str".into()),
222 Sexp::Str("say ".into()),
223 Sexp::Char('"'),
224 Sexp::Str("Hello, World".into()),
225 Sexp::Char('"'),
226 Sexp::Str(" today!".into()),
227 ])
228 ])
229 ])));
230 }
231
232 #[cfg(test)]
233 #[test]
234 fn test_parse_multi() {
235 assert_eq!(parse(" 1 2 3 "),
236 Ok(vec![Sexp::Int(1), Sexp::Int(2), Sexp::Int(3)]));
237 }