]> Witch of Git - ess/blob - src/lib.rs
Add parsing for characters
[ess] / src / lib.rs
1 //! A lightweight S-expression parser intended for language implementation.
2
3 #![warn(missing_docs)]
4 #![deny(unsafe_code)]
5
6 #[macro_use]
7 extern crate nom;
8
9 use nom::{digit, multispace, IResult};
10
11 #[derive(Debug, PartialEq, Clone, PartialOrd)]
12 pub enum Atom {
13 /// A value representing a symbol. A symbol is an atomic unit
14 Sym(String),
15 /// A value representing a string literal.
16 Str(String),
17 /// A value representing a single character.
18 Char(char),
19 /// A value representing an integer. Any number containing no decimal point
20 /// will be parsed as an `Int`.
21 Int(i64),
22 /// A value representing a float. Any number containing a decimal point will
23 /// be parsed as a `Float`.
24 Float(f64),
25 }
26
27 #[derive(Debug, PartialEq, Clone, PartialOrd)]
28 pub enum Sexp {
29 /// A wrapper around the atom type
30 Atom {
31 atom: Atom,
32 },
33 /// A list of subexpressions
34 List {
35 list: Vec<Sexp>,
36 }
37 }
38
39 pub fn parse(input: &str) -> Result<Sexp, ()> {
40 match sexp(input) {
41 IResult::Done(_, res) => Ok(res),
42 _ => Err(()),
43 }
44 }
45
46 named!(sexp<&str, Sexp>,
47 alt!(
48 list => { |list| Sexp::List { list: list } }
49 | atom => { |atom| Sexp::Atom { atom: atom } }
50 )
51 );
52
53 named!(list<&str, Vec<Sexp> >,
54 do_parse!(
55 opt!(multispace) >>
56 tag_s!("(") >>
57 entries: many0!(sexp) >>
58 opt!(multispace) >>
59 tag_s!(")") >>
60 (entries)
61 )
62 );
63
64 named!(atom<&str, Atom>, alt!(string | symbol | number | character));
65
66 named!(string<&str, Atom>,
67 do_parse!(
68 opt!(multispace) >>
69 tag_s!("\"") >>
70 contents: take_until_s!("\"") >>
71 tag_s!("\"") >>
72 opt!(multispace) >>
73 (Atom::Str(contents.into()))
74 )
75 );
76
77 named!(symbol<&str, Atom>,
78 do_parse!(
79 opt!(multispace) >>
80 peek!(valid_ident_prefix) >>
81 name: take_while1_s!(valid_ident_char) >>
82 (Atom::Sym(name.into()))
83 )
84 );
85
86 fn valid_ident_prefix(ident: &str) -> IResult<&str, ()> {
87 match ident.chars().next() {
88 Some(c) if c != '#' && !c.is_digit(10) && valid_ident_char(c) =>
89 IResult::Done(&ident[1..], ()),
90 None => IResult::Incomplete(nom::Needed::Unknown),
91 _ => IResult::Error(nom::ErrorKind::Custom(0)),
92 }
93 }
94
95 fn valid_ident_char(c: char) -> bool {
96 !c.is_whitespace() && c != '"' && c != '(' && c != ')'
97 }
98
99 named!(number<&str, Atom>,
100 do_parse!(
101 opt!(multispace) >>
102 integral: digit >>
103 peek!(not!(valid_ident_prefix)) >>
104 (Atom::Int(integral.chars().fold(0, |i, c| i * 10 + c as i64 - '0' as i64)))
105 )
106 );
107
108 named!(character<&str, Atom>,
109 do_parse!(
110 opt!(multispace) >>
111 tag_s!("#\\") >>
112 character: take_s!(1) >>
113 (Atom::Char(character.chars().next().unwrap()))
114 )
115 );
116
117 #[cfg(test)]
118 #[test]
119 fn test_parse_number() {
120 assert_eq!(number("0"), IResult::Done("", Atom::Int(0)));
121 assert_eq!(number("123"), IResult::Done("", Atom::Int(123)));
122 assert_eq!(number("0123456789"), IResult::Done("", Atom::Int(123456789)));
123 assert_eq!(number(" 42"), IResult::Done("", Atom::Int(42)));
124
125 assert!(number(" 42a").is_err());
126 assert_eq!(number("13()"), IResult::Done("()", Atom::Int(13)));
127
128 assert!(number("abc").is_err());
129 assert!(number("()").is_err());
130 assert!(number("").is_incomplete());
131 }
132
133 #[cfg(test)]
134 #[test]
135 fn test_parse_ident() {
136 assert_eq!(symbol("+"), IResult::Done("", Atom::Sym("+".into())));
137 assert_eq!(symbol(" nil?"), IResult::Done("", Atom::Sym("nil?".into())));
138 assert_eq!(symbol(" ->socket"), IResult::Done("", Atom::Sym("->socket".into())));
139 assert_eq!(symbol("fib("), IResult::Done("(", Atom::Sym("fib".into())));
140
141 // We reserve #foo for the implementation to do as it wishes
142 assert!(symbol("#hi").is_err());
143
144 assert!(symbol("0").is_err());
145 assert!(symbol("()").is_err());
146 assert!(symbol("").is_incomplete());
147 }
148
149 #[cfg(test)]
150 #[test]
151 fn test_parse_char() {
152 assert_eq!(character("#\\\""), IResult::Done("", Atom::Char('"')));
153 assert_eq!(character("#\\ "), IResult::Done("", Atom::Char(' ')));
154 assert_eq!(character(" #\\\\"), IResult::Done("", Atom::Char('\\')));
155
156 assert!(character("#").is_incomplete());
157 assert!(character("a").is_err());
158 }