From 80f4d7a54a81655c0df7e534e988ef11205a7035 Mon Sep 17 00:00:00 2001 From: Cassie Jones Date: Thu, 27 Feb 2020 17:58:19 +0100 Subject: [PATCH] Parse S-expressions into the abstract syntax tree While doing this, we handle name resolution and "upvar" captures. The name resolution context "Env" forms a stack that has a reference to the outer environment at each level. A new level is pushed at each lambda that's introduced. The environment includes a list of "upvars" that have to be copied into the closure, which is updated every time a variable lookup finds the variable in an outer environment instead of the local environment. This process has to use a RefCell because you can't make the lifetimes work out with a chain of mutable references. The parsing aside from that environment stack is relatively straightforward parsing. --- program.vy | 4 +- src/ast.rs | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 16 ++++- 3 files changed, 190 insertions(+), 6 deletions(-) create mode 100644 src/ast.rs diff --git a/program.vy b/program.vy index f226258..9a10f88 100644 --- a/program.vy +++ b/program.vy @@ -1,9 +1,7 @@ (let ([nil (lam (c n) (n))] [cons (lam (x y) (lam (c n) (c x y)))] [fix (lam (f) (f f))] - [if (lam (c t f) ((c t f)))] - [true (lam (t f) t)] - [false (lam (t f) f)]) + [if (lam (c t f) ((c t f)))]) ([fix (lam (recur a b l n) (if (<= n 0) [lam () l] diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..597c6de --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,176 @@ +use ess::{self, Sexp}; +use std::{ + cell::RefCell, + collections::HashMap, + sync::atomic::{AtomicU32, Ordering}, +}; + +#[derive(PartialEq, Eq, Clone, Copy, Debug)] +pub struct Name(u32); + +#[derive(PartialEq, Clone, Debug)] +pub enum Ast { + Let(Vec<(Name, Ast)>, Box), + Lam { + binds: Vec, + upvars: Vec, + body: Box, + }, + App(Vec), + Var(Name), + Num(i64), +} + +struct Env<'e> { + fresh: AtomicU32, + vars: HashMap, + upvars: RefCell>, + up: Option<&'e Env<'e>>, +} + +impl Env<'_> { + fn new() -> Env<'static> { + Env { + fresh: AtomicU32::new(0), + vars: HashMap::new(), + upvars: RefCell::new(HashMap::new()), + up: None, + } + } + + fn wrapping<'e, 'r: 'e>(env: &'r Env<'e>) -> Env<'r> { + Env { + fresh: AtomicU32::new(env.fresh.load(Ordering::Relaxed)), + vars: HashMap::new(), + upvars: RefCell::new(HashMap::new()), + up: Some(env), + } + } + + fn fresh(&mut self, text: &str) -> Name { + let name = Name(*self.fresh.get_mut()); + self.vars.insert(text.into(), name); + *self.fresh.get_mut() += 1; + name + } + + fn _get(&self, name: &str) -> Option { + if let Some(&resolved) = self.vars.get(name) { + return Some(resolved); + } + self.up.and_then(|up| up._get(name)).map(|resolved| { + self.upvars.borrow_mut().insert(name.into(), resolved); + resolved + }) + } + + fn lookup(&mut self, name: &str) -> Option { + if let Some(&resolved) = self.vars.get(name) { + return Some(resolved); + } + self.up.and_then(|up| up._get(name)).map(|resolved| { + self.vars.insert(name.into(), resolved); + self.upvars.get_mut().insert(name.into(), resolved); + resolved + }) + } + + fn upvars(&self) -> Vec { + self.upvars.borrow().values().map(|&x| x).collect() + } +} + +impl Drop for Env<'_> { + fn drop(&mut self) { + if let Some(up) = &self.up { + up.fresh.store(*self.fresh.get_mut(), Ordering::Relaxed); + } + } +} + +impl Ast { + pub fn parse(sexp: &Sexp) -> Result { + Ast::from_sexp(&mut Env::new(), sexp) + } + + fn from_sexp<'r, 'e: 'r>(env: &'r mut Env<'e>, sexp: &Sexp) -> Result { + match sexp { + Sexp::List(xs, _) => match xs.first() { + Some(Sexp::Sym(s, _)) if s == "let" => { + if xs.len() > 3 { + return Err("too many body expressions in 'let'"); + } + let binds = match xs.get(1) { + Some(Sexp::List(binds, _)) => binds + .iter() + .map(|bind| match bind { + Sexp::List(bind, _) => { + if bind.len() != 2 { + return Err("binding lists must have two elements"); + } + // We do the body before the name to avoid being "letrec" + let body = Ast::from_sexp(env, &bind[1])?; + let name = match &bind[0] { + Sexp::Sym(name, _) => env.fresh(&name), + _ => { + return Err("binding lists must start with a variable") + } + }; + Ok((name, body)) + } + _ => Err("bindings must be lists"), + }) + .collect::>()?, + Some(_) => return Err("the binding in 'let' is not a list"), + None => return Err("no binding form in 'let'"), + }; + let body = match xs.get(2) { + Some(x) => Box::new(Ast::from_sexp(env, x)?), + None => return Err("no body expression in 'let'"), + }; + Ok(Ast::Let(binds, body)) + } + Some(Sexp::Sym(s, _)) if s == "lam" => { + if xs.len() > 3 { + return Err("too many body expressions in 'lam'"); + } + let mut env = Env::wrapping(env); + let binds = match xs.get(1) { + Some(Sexp::List(names, _)) => names + .iter() + .map(|name| match name { + Sexp::Sym(s, _) => Ok(env.fresh(&s)), + _ => Err("non-symbol in 'lam' arguments"), + }) + .collect::>()?, + Some(_) => return Err("'lam' arguments weren't a list"), + None => return Err("no arguments in 'lam'"), + }; + let body = match xs.get(2) { + Some(x) => Box::new(Ast::from_sexp(&mut env, x)?), + None => return Err("no body expression in 'lam'"), + }; + let upvars = env.upvars(); + Ok(Ast::Lam { + binds, + upvars, + body, + }) + } + Some(_) => Ok(Ast::App( + xs.iter() + .map(|x| Ast::from_sexp(env, x)) + .collect::>()?, + )), + None => Err("expected a non-empty list for an application"), + }, + Sexp::Sym(s, _) if s == "let" => Err("unexpected id 'let'"), + Sexp::Sym(s, _) if s == "lam" => Err("unexpected id 'lam'"), + Sexp::Sym(s, _) => Ok(Ast::Var( + env.lookup(s).ok_or("variable used but not defined")?, + )), + Sexp::Int(i, _) => Ok(Ast::Num(*i)), + _ => Err("unexpected s-expression type"), + } + } +} diff --git a/src/main.rs b/src/main.rs index 4f09f59..3328b00 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,9 @@ -use ess::{self, Sexp, parser::ParseError}; +use ess::{self, parser::ParseError, Sexp}; use std::io::{self, Read}; +mod ast; +use ast::Ast; + fn parse(input: &str) -> Result { let (sexp, rest) = ess::parse_one(input)?; if !rest.trim().is_empty() { @@ -13,13 +16,20 @@ fn main() -> io::Result<()> { let stdin = std::io::stdin(); let mut text = String::new(); stdin.lock().read_to_string(&mut text)?; - let item = match parse(&text) { + let sexp = match parse(&text) { Ok(item) => item, Err(err) => { println!("{:?}", err); std::process::exit(1); } }; - println!("{:?}", item); + let ast = match Ast::parse(&sexp) { + Ok(ast) => ast, + Err(err) => { + println!("{}", err); + std::process::exit(1); + } + }; + println!("{:#?}", ast); Ok(()) } -- 2.43.2