Browse Source

switch to using a string interner

Getty Ritter 2 years ago
parent
commit
1f9474548a
8 changed files with 86 additions and 23 deletions
  1. 32 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 2 1
      build.rs
  4. 26 2
      src/ast.rs
  5. 6 5
      src/grammar.lalrpop
  6. 14 13
      src/interp.rs
  7. 3 1
      src/repl.rs
  8. 2 1
      tools/regenerate.rs

+ 32 - 0
Cargo.lock

@@ -2,6 +2,17 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "ahash"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
+dependencies = [
+ "getrandom",
+ "once_cell",
+ "version_check",
+]
+
 [[package]]
 name = "aho-corasick"
 version = "0.7.18"
@@ -298,6 +309,9 @@ name = "hashbrown"
 version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+dependencies = [
+ "ahash",
+]
 
 [[package]]
 name = "hermit-abi"
@@ -484,6 +498,7 @@ dependencies = [
  "rand",
  "regex",
  "rustyline",
+ "string-interner",
  "vergen",
 ]
 
@@ -838,6 +853,12 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012"
 
+[[package]]
+name = "serde"
+version = "1.0.130"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913"
+
 [[package]]
 name = "siphasher"
 version = "0.3.7"
@@ -856,6 +877,17 @@ version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d44a3643b4ff9caf57abcee9c2c621d6c03d9135e0d8b589bd9afb5992cb176a"
 
+[[package]]
+name = "string-interner"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e2531d8525b29b514d25e275a43581320d587b86db302b9a7e464bac579648"
+dependencies = [
+ "cfg-if",
+ "hashbrown",
+ "serde",
+]
+
 [[package]]
 name = "string_cache"
 version = "0.8.2"

+ 1 - 0
Cargo.toml

@@ -25,6 +25,7 @@ lalrpop-util = { version = "*", features = ["lexer"] }
 logos = "*"
 rustyline = "*"
 ansi_term = "*"
+string-interner = "*"
 
 [build-dependencies]
 vergen = "*"

+ 2 - 1
build.rs

@@ -30,9 +30,10 @@ fn assert_eq(x: &str, y: &str) {
 const TEST_TEMPLATE: &str = "
 #[test]
 fn test_%PREFIX%() {
+  let mut ast = crate::ast::ASTArena::new();
   let source = include_str!(\"%ROOT%/tests/%PREFIX%.matzo\");
   let lexer = lexer::tokens(source);
-  let ast = grammar::StmtsParser::new().parse(lexer);
+  let ast = grammar::StmtsParser::new().parse(&mut ast, lexer);
   assert!(ast.is_ok());
   let ast = ast.unwrap();
 

+ 26 - 2
src/ast.rs

@@ -1,4 +1,28 @@
-type Name = String;
+pub type Name = string_interner::DefaultSymbol;
+
+pub struct ASTArena {
+    strings: string_interner::StringInterner,
+}
+
+impl ASTArena {
+    pub fn new() -> ASTArena {
+        ASTArena {
+            strings: string_interner::StringInterner::new(),
+        }
+    }
+
+    pub fn add_string(&mut self, s: &str) -> Name {
+        self.strings.get_or_intern(s)
+    }
+}
+
+impl std::ops::Index<string_interner::DefaultSymbol> for ASTArena {
+    type Output = str;
+
+    fn index(&self, sf: string_interner::DefaultSymbol) -> &str {
+        self.strings.resolve(sf).unwrap()
+    }
+}
 
 #[derive(Debug, Clone)]
 pub enum Stmt {
@@ -51,6 +75,6 @@ impl Choice {
 #[derive(Debug, Clone)]
 pub enum Literal {
     Str(String),
-    Atom(String),
+    Atom(Name),
     Num(i64),
 }

+ 6 - 5
src/grammar.lalrpop

@@ -1,7 +1,7 @@
 use crate::ast::*;
 use crate::lexer::*;
 
-grammar<'input>;
+grammar<'input>(ast: &mut ASTArena);
 
 extern {
     type Location = usize;
@@ -47,11 +47,12 @@ pub Stmts: Vec<Stmt> = {
 pub Stmt: Stmt = {
     "puts" <Expr> => Stmt::Puts(<>),
     <Name> ":=" <Expr> => Stmt::Assn(<>),
-    <name:Name> "::=" <strs:(<Name>)*> => Stmt::LitAssn(name, strs),
+    <name:Name> "::=" <strs:(<"var">)*> =>
+        Stmt::LitAssn(name, strs.iter().map(|x| x.to_string()).collect()),
 };
 
-pub Name: String = {
-    "var" => <>.to_owned(),
+pub Name: Name = {
+    "var" => ast.add_string(<>),
 };
 
 pub Expr: Expr = {
@@ -118,5 +119,5 @@ pub Pat: Pat = {
 pub Literal: Literal = {
     "num" => Literal::Num(<>),
     "str" => Literal::Str(<>),
-    "atom" => Literal::Atom(<>.to_owned()),
+    "atom" => Literal::Atom(ast.add_string(<>)),
 };

+ 14 - 13
src/interp.rs

@@ -33,7 +33,7 @@ impl Value {
     fn with_str<U>(&self, f: impl FnOnce(&str) -> U) -> U {
         match self {
             Value::Lit(Literal::Str(s)) => f(s),
-            Value::Lit(Literal::Atom(s)) => f(s),
+            Value::Lit(Literal::Atom(s)) => f(&format!("{:?}", s)),
             Value::Lit(Literal::Num(n)) => f(&format!("{}", n)),
             Value::Tup(values) => {
                 let mut buf = String::new();
@@ -142,7 +142,8 @@ enum NamedItem {
 }
 
 pub struct State {
-    scope: HashMap<String, NamedItem>,
+    ast: ASTArena,
+    scope: HashMap<Name, NamedItem>,
     rand: rand::rngs::ThreadRng,
     parser: crate::grammar::StmtsParser,
 }
@@ -159,17 +160,18 @@ impl State {
             scope: HashMap::new(),
             rand: rand::thread_rng(),
             parser: crate::grammar::StmtsParser::new(),
+            ast: ASTArena::new(),
         };
         for builtin in BUILTINS {
-            s.scope
-                .insert(builtin.name.to_string(), NamedItem::Builtin(builtin));
+            let sym = s.ast.add_string(builtin.name);
+            s.scope.insert(sym, NamedItem::Builtin(builtin));
         }
         s
     }
 
     pub fn run(&mut self, src: &str) -> Result<(), Error> {
         let lexed = crate::lexer::tokens(src);
-        let stmts = self.parser.parse(lexed)?;
+        let stmts = self.parser.parse(&mut self.ast, lexed)?;
         for stmt in stmts {
             self.execute(&stmt)?;
         }
@@ -178,12 +180,12 @@ impl State {
 
     pub fn run_repl(&mut self, src: &str) -> Result<(), Error> {
         let lexed = crate::lexer::tokens(src);
-        let stmts = match self.parser.parse(lexed) {
+        let stmts = match self.parser.parse(&mut self.ast, lexed) {
             Ok(stmts) => stmts,
             Err(err) => {
                 let with_puts = format!("puts {}", src);
                 let lexed = crate::lexer::tokens(&with_puts);
-                if let Ok(stmts) = self.parser.parse(lexed) {
+                if let Ok(stmts) = self.parser.parse(&mut self.ast, lexed) {
                     stmts
                 } else {
                     return Err(err.into());
@@ -199,8 +201,8 @@ impl State {
     pub fn autocomplete(&self, fragment: &str, at_beginning: bool) -> Vec<String> {
         let mut possibilities = Vec::new();
         for name in self.scope.keys() {
-            if name.starts_with(fragment) {
-                possibilities.push(name.clone());
+            if self.ast[*name].starts_with(fragment) {
+                possibilities.push(self.ast[*name].to_string());
             }
         }
         if at_beginning && "puts".starts_with(fragment) {
@@ -216,8 +218,7 @@ impl State {
                 println!("{}", val.to_string());
             }
             Stmt::Assn(name, expr) => {
-                self.scope
-                    .insert(name.to_string(), NamedItem::Expr(expr.clone()));
+                self.scope.insert(*name, NamedItem::Expr(expr.clone()));
             }
             Stmt::LitAssn(name, strs) => {
                 let choices = strs
@@ -228,7 +229,7 @@ impl State {
                     })
                     .collect();
                 self.scope
-                    .insert(name.to_string(), NamedItem::Expr(Expr::Chc(choices)));
+                    .insert(*name, NamedItem::Expr(Expr::Chc(choices)));
             }
             _ => bail!("unimplemented"),
         }
@@ -242,7 +243,7 @@ impl State {
                 let e = match self.scope.get(v) {
                     Some(NamedItem::Expr(e)) => e.clone(),
                     Some(NamedItem::Builtin(b)) => return Ok(Value::Builtin(b)),
-                    None => bail!("no such thing: {}", v),
+                    None => bail!("no such thing: {:?}", v),
                 };
                 self.eval(&e)
             }

+ 3 - 1
src/repl.rs

@@ -79,7 +79,9 @@ impl Hinter for Repl {
                 let autocompletes = self.state.borrow().autocomplete(so_far, str_start == 0);
                 if autocompletes.len() == 1 {
                     let known = autocompletes.first().unwrap();
-                    return known.strip_prefix(so_far).map(|s| ansi_term::Colour::Blue.dimmed().paint(s).to_string())
+                    return known
+                        .strip_prefix(so_far)
+                        .map(|s| ansi_term::Colour::Blue.dimmed().paint(s).to_string());
                 } else {
                     return None;
                 }

+ 2 - 1
tools/regenerate.rs

@@ -16,9 +16,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 f
             };
 
+            let mut ast = matzo::ast::ASTArena::new();
             let src = std::fs::read_to_string(&exp)?;
             let tokens = lexer::tokens(&src);
-            if let Ok(ast) = grammar::StmtsParser::new().parse(tokens) {
+            if let Ok(ast) = grammar::StmtsParser::new().parse(&mut ast, tokens) {
                 let mut f = std::fs::File::create(exp_filename("parsed"))?;
                 writeln!(f, "{:#?}", ast)?;
             }