From 9706509595c1ed1e6a0bdbe88b50781953df2b22 Mon Sep 17 00:00:00 2001 From: Cassie Jones Date: Sat, 11 Jan 2020 06:10:33 -0500 Subject: [PATCH] Add an instruction parser --- toolchain/Cargo.lock | 96 +++++++++++++++++++ toolchain/Cargo.toml | 1 + toolchain/src/inst.rs | 3 + toolchain/src/inst/parse.rs | 179 ++++++++++++++++++++++++++++++++++++ toolchain/src/inst/test.rs | 43 +++++++++ 5 files changed, 322 insertions(+) create mode 100644 toolchain/src/inst/parse.rs create mode 100644 toolchain/src/inst/test.rs diff --git a/toolchain/Cargo.lock b/toolchain/Cargo.lock index e24cc9f..130066f 100644 --- a/toolchain/Cargo.lock +++ b/toolchain/Cargo.lock @@ -1,5 +1,101 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "arrayvec" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" +dependencies = [ + "nodrop", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "lexical-core" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304bccb228c4b020f3a4835d247df0a02a7c4686098d4167762cfbbe4c5cb14" +dependencies = [ + "arrayvec", + "cfg-if", + "rustc_version", + "ryu", + "static_assertions", +] + +[[package]] +name = "memchr" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "nom" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c433f4d505fe6ce7ff78523d2fa13a0b9f2690e181fc26168bcbe5ccc5d14e07" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "static_assertions" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3" + [[package]] name = "toolchain" version = "0.1.0" +dependencies = [ + "nom", +] + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" diff --git a/toolchain/Cargo.toml b/toolchain/Cargo.toml index e910a73..c06dcab 100644 --- a/toolchain/Cargo.toml +++ b/toolchain/Cargo.toml @@ -7,3 +7,4 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +nom = "5.1.0" diff --git a/toolchain/src/inst.rs b/toolchain/src/inst.rs index cbb5292..15c793a 100644 --- a/toolchain/src/inst.rs +++ b/toolchain/src/inst.rs @@ -1,5 +1,8 @@ pub mod decode; pub mod encode; +pub mod parse; +#[cfg(test)] +pub mod test; #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] diff --git a/toolchain/src/inst/parse.rs b/toolchain/src/inst/parse.rs new file mode 100644 index 0000000..74533cf --- /dev/null +++ b/toolchain/src/inst/parse.rs @@ -0,0 +1,179 @@ +use super::{AddImm, AluI, Data, Inst, Op1, OpI3, OpR, Reg, Special, U3}; +use nom::{ + branch::alt, + bytes::complete::tag_no_case, + character::complete::{char, digit1, one_of, space1}, + combinator::{complete, map, map_opt, map_res, opt, recognize, value}, + sequence::{pair, preceded}, + IResult, +}; +use std::convert::TryFrom; + +pub fn parse_inst(s: &str) -> Option { + complete(inst())(s).ok().map(|x| x.1) +} + +pub fn box_alt< + 'a, + I: Clone + 'a, + O: 'a, + E: nom::error::ParseError + 'a, + List: nom::branch::Alt + 'a, +>( + l: List, +) -> Box IResult + 'a> { + Box::new(alt(l)) +} + +fn inst<'s>() -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + alt(( + box_alt(( + fixed("trap", Inst::Trap), + fixed("JABS", Inst::JAbs), + fixed("CABS", Inst::CAbs), + fixed("JOFF", Inst::JOff), + fixed("COFF", Inst::COff), + )), + box_alt(( + with_reg("SWAP", |r| Inst::Swap(r)), + with_reg("GETR", |r| Inst::GetR(r)), + with_reg("SETR", |r| Inst::SetR(r)), + )), + box_alt(( + fixed("GET1", Inst::Get(Special::Data(Data::D1))), + fixed("GET2", Inst::Get(Special::Data(Data::D2))), + fixed("GETC", Inst::Get(Special::Code)), + fixed("SET1", Inst::Set(Special::Data(Data::D1))), + fixed("SET2", Inst::Set(Special::Data(Data::D2))), + fixed("SETC", Inst::Set(Special::Code)), + fixed("ZERO", Inst::Alu1(Op1::Zero)), + fixed("LSL1", Inst::Alu1(Op1::Lsl1)), + fixed("LSR1", Inst::Alu1(Op1::Lsr1)), + fixed("ASR1", Inst::Alu1(Op1::Asr1)), + fixed("INCR", Inst::Alu1(Op1::Incr)), + fixed("DECR", Inst::Alu1(Op1::Decr)), + fixed("COMP", Inst::Alu1(Op1::Comp)), + fixed("NEGT", Inst::Alu1(Op1::Negt)), + )), + box_alt(( + with_reg("ISLT", |r| Inst::IsLt(r)), + with_reg("ADDR", |r| Inst::AluR(OpR::Add, r)), + with_reg("SUBR", |r| Inst::AluR(OpR::Sub, r)), + with_reg("ANDR", |r| Inst::AluR(OpR::And, r)), + with_reg("IORR", |r| Inst::AluR(OpR::Ior, r)), + with_reg("XORR", |r| Inst::AluR(OpR::Xor, r)), + with_reg("LSLR", |r| Inst::AluR(OpR::Lsl, r)), + with_reg("LSRR", |r| Inst::AluR(OpR::Lsr, r)), + with_reg("ASRR", |r| Inst::AluR(OpR::Asr, r)), + )), + box_alt(( + with_reg_bang("LD1U", |r, b| Inst::LdD(Data::D1, r, b)), + with_reg_bang("ST1U", |r, b| Inst::StD(Data::D1, r, b)), + with_reg_bang("LD2U", |r, b| Inst::StD(Data::D2, r, b)), + with_reg_bang("ST2U", |r, b| Inst::StD(Data::D2, r, b)), + )), + box_alt(( + with_imm8("ANDI", |imm| Inst::AluI(AluI::And(imm))), + with_imm8("IORI", |imm| Inst::AluI(AluI::Ior(imm))), + with_imm8("XORI", |imm| Inst::AluI(AluI::Xor(imm))), + )), + with_add_imm("ADDI", |imm| Inst::AluI(AluI::Add(imm))), + box_alt(( + with_imm3("ROLI", |imm| Inst::AluI(AluI::Compact(OpI3::Rol, imm))), + with_imm3("LSLI", |imm| Inst::AluI(AluI::Compact(OpI3::Lsl, imm))), + with_imm3("LSRI", |imm| Inst::AluI(AluI::Compact(OpI3::Lsr, imm))), + with_imm3("ASRI", |imm| Inst::AluI(AluI::Compact(OpI3::Asr, imm))), + with_imm3("CLRI", |imm| Inst::AluI(AluI::Compact(OpI3::Clr, imm))), + with_imm3("SETI", |imm| Inst::AluI(AluI::Compact(OpI3::Set, imm))), + with_imm3("TOGI", |imm| Inst::AluI(AluI::Compact(OpI3::Tog, imm))), + with_imm3("EXTI", |imm| Inst::AluI(AluI::Compact(OpI3::Ext, imm))), + )), + box_alt(( + with_off("BEZI", |off| Inst::BEzI(off)), + with_off("JOFI", |off| Inst::JOfI(off)), + with_off("COFI", |off| Inst::COfI(off)), + )), + )) +} + +fn fixed<'s>(tag: &'s str, inst: Inst) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + value(inst, tag_no_case(tag)) +} + +fn reg<'s>() -> impl Fn(&'s str) -> IResult<&'s str, Reg> { + let raw = preceded(tag_no_case("r"), digit1); + let num = map_res(raw, |x: &str| x.parse()); + map_opt(num, Reg::new) +} + +fn imm<'s>() -> impl Fn(&'s str) -> IResult<&'s str, i16> { + let text = recognize(pair(opt(one_of("+-")), digit1)); + map_res(preceded(char('#'), text), |x: &str| x.parse()) +} + +fn imm8<'s>() -> impl Fn(&'s str) -> IResult<&'s str, u8> { + map_res(imm(), TryFrom::try_from) +} + +fn add_imm<'s>() -> impl Fn(&'s str) -> IResult<&'s str, AddImm> { + map_opt(map_res(imm(), TryFrom::try_from), AddImm::new) +} + +fn imm3<'s>() -> impl Fn(&'s str) -> IResult<&'s str, U3> { + map_opt(map_res(imm(), TryFrom::try_from), U3::new) +} + +fn imm_off<'s>() -> impl Fn(&'s str) -> IResult<&'s str, i8> { + map_res(imm(), TryFrom::try_from) +} + +fn with_reg<'s>( + tag: &'s str, + make_inst: impl Fn(Reg) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let reg = preceded(pair(tag_no_case(tag), space1), reg()); + map(reg, make_inst) +} + +fn with_reg_bang<'s>( + tag: &'s str, + make_inst: impl Fn(Reg, bool) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let reg_bang = pair( + preceded(pair(tag_no_case(tag), space1), reg()), + map(opt(char('!')), |x| x.is_some()), + ); + map(reg_bang, move |(r, b)| make_inst(r, b)) +} + +fn with_imm8<'s>( + tag: &'s str, + make_inst: impl Fn(u8) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let imm = preceded(pair(tag_no_case(tag), space1), imm8()); + map(imm, make_inst) +} + +fn with_add_imm<'s>( + tag: &'s str, + make_inst: impl Fn(AddImm) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let imm = preceded(pair(tag_no_case(tag), space1), add_imm()); + map(imm, make_inst) +} + +fn with_imm3<'s>( + tag: &'s str, + make_inst: impl Fn(U3) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let imm = preceded(pair(tag_no_case(tag), space1), imm3()); + map(imm, make_inst) +} + +fn with_off<'s>( + tag: &'s str, + make_inst: impl Fn(i8) -> Inst, +) -> impl Fn(&'s str) -> IResult<&'s str, Inst> { + let imm = preceded(pair(tag_no_case(tag), space1), imm_off()); + map(imm, make_inst) +} diff --git a/toolchain/src/inst/test.rs b/toolchain/src/inst/test.rs new file mode 100644 index 0000000..9086637 --- /dev/null +++ b/toolchain/src/inst/test.rs @@ -0,0 +1,43 @@ +use super::{parse, encode::Encode}; +use std::io::Cursor; + +fn display(buf: &[u8]) -> String { + println!("{}", buf.len()); + match buf.len() { + 1 => format!("{:04b}_{:04b}", buf[0] >> 4, buf[0] & 0xf), + 2 => format!("{:04b}_{:04b} {:04b}_{:04b}", buf[0] >> 4, buf[0] & 0xf, buf[1] >> 4, buf[1] & 0xf), + n => unreachable!("Shouldn't be instructions of length {}", n), + } +} + +macro_rules! test_parse { + ($($txt:expr => $res:expr),* $(,)?) => { + $( + let mut buf = [0, 0]; + let mut cursor = Cursor::new(&mut buf[..]); + let parsed = parse::parse_inst($txt).expect(&format!("parsed {:?}", $txt)); + parsed.encode(&mut cursor).expect(&format!("encoded {:x?}", parsed)); + let pos = cursor.position() as usize; + assert_eq!($res, &display(&buf[..pos])); + )* + } +} + +#[test] +fn trap() { + test_parse! { + "TRAP" => "0000_0000", + "trap" => "0000_0000", + } +} + +#[test] +fn jump_call() { + test_parse! { + "JABS" => "0000_0100", + "CABS" => "0000_0101", + "JOFF" => "0000_0110", + "COFF" => "0000_0111", + } +} + -- 2.43.2