git » jacl.git » commit 61fdc40

Tokenizing

author Alan Dipert
2019-08-14 13:14:49 UTC
committer Alan Dipert
2019-08-14 13:14:49 UTC
parent 90e5942e876284c2446ede7f2d283e70891713a5

Tokenizing

jacl.js +57 -30

diff --git a/jacl.js b/jacl.js
index 4bed07b..dccb9cd 100644
--- a/jacl.js
+++ b/jacl.js
@@ -150,6 +150,9 @@ class BufferedStream {
   writeEach(xs) {
     for (const x of xs) this.write(x);
   }
+  unreadEach(xs) {
+    for (const x of xs) this.unread(x);
+  }
   unread(obj) {
     if (this.resolveQueue.length) {
       this.resolveQueue.shift()(obj)
@@ -193,15 +196,25 @@ class ReadTable {
 
 const READTABLE = Package.get('CL').intern('*READTABLE*');
 
-// TODO
+class Token extends String {
+  // TODO figure out implications of jacl:undefined/jacl:true/jacl:false etc here
+  interpret() {
+    return readInteger(this) || LispSymbol.fromString(this)
+  }
+}
+
 const CLOSE_PAREN = new Object();
-const readList = async rdr => {
-  rdr.depth = (rdr.depth || 0) + 1;
-  const x = await rdr.read();
+
+const readList = async stream => {
+  let x = await (new Tokenizer(stream)).nextToken()
   if (x === CLOSE_PAREN) {
     return new Values(null);
   } else {
-    return new Values(new Cons(x, (await readList(rdr))[0]));
+    if (x instanceof Token) {
+      stream.unreadEach(x);
+      x = await (new Reader(new Tokenizer(stream))).read();
+    }
+    return new Values(new Cons(x, (await readList(stream))[0]));
   }
   //while (true) {
   //  const x = await rdr.read();
@@ -217,31 +230,25 @@ const readList = async rdr => {
 }
 
 READTABLE.value = new ReadTable()
-  .setMacro(';', true, async rdr => {
-    for await(const ch of rdr.stream) {
+  .setMacro(';', true, async stream => {
+    for await(const ch of stream) {
       if (ch === '\n') return new Values();
     }
   })
-  .setMacro('"', true, async rdr => {
+  .setMacro('"', true, async stream => {
     let str = new LispString();
-    for await(const x of rdr.stream) {
+    for await(const x of stream) {
       if (x === '"') {
         return new Values(str);
       } else if(x === '\\') {
-        str.push(await rdr.stream.read());
+        str.push(await stream.read());
       } else {
         str.push(x);
       }
     }
   })
-  .setMacro('(', true, readList)
-  .setMacro(')', true, async rdr => {
-    if (!rdr.depth || rdr.depth-- == 0) {
-      throw new Error(`Unmatched ')'`);
-    } else {
-      return new Values(CLOSE_PAREN);
-    }
-  });
+  .setMacro(')', true, async stream => new Values(CLOSE_PAREN))
+  .setMacro('(', true, readList);
 
 const isWhitespace = ch => ' \t\n\r\b'.indexOf(ch) > -1;
 
@@ -284,13 +291,6 @@ const readInteger = token => {
   }
 };
 
-// TODO figure out implications of jacl:undefined/jacl:true/jacl:false etc here
-const interpretToken = token => {
-  return readInteger(token)
-    || LispSymbol.fromString(token)
-    //[+-]?[0-9]+\.?/.test/|| throw new Error(`Unknown token: '${token}'`);
-};
-
 const readSingleEscaped = async function(stream, token) {
   for await(const y of stream) {
     if (isConstituent(y)) {
@@ -303,7 +303,7 @@ const readSingleEscaped = async function(stream, token) {
       return readMultiEscaped(stream, token);
     } else if (READTABLE.val().isTerminating(y) || isWhitespace(y)) {
       stream.unread(y);
-      return interpretToken(token);
+      return new Token(token);
     } else {
       throw new Error(`Illegal character: '${y}'`);
     }
@@ -311,17 +311,20 @@ const readSingleEscaped = async function(stream, token) {
 };
 
 
-class Reader {
+class Tokenizer {
   constructor(stream) {
     this.stream = stream;
   }
-  async read() {
+  unread(token) {
+    this.stream.writeEach(token);
+  }
+  async nextToken() {
     let macroFun;
     for await(const x of this.stream) {
       if (isWhitespace(x)) {
         continue;
       } else if (macroFun = READTABLE.val().getMacro(x)) {
-        const vals = await macroFun(this);
+        const vals = await macroFun(this.stream);
         if (vals.length) {
           return vals[0];
         } else {
@@ -339,6 +342,29 @@ class Reader {
       }
     }
   }
+  [Symbol.asyncIterator]() {
+    return {
+      next: () => this.nextToken().then(obj => {
+        return { value: obj, done: false };
+      })
+    }
+  }
+}
+
+class Reader {
+  constructor(tokenizer) {
+    this.tokenizer = tokenizer;
+  }
+  async read() {
+    const obj = await this.tokenizer.nextToken();
+    if (obj === CLOSE_PAREN) {
+      throw new Error(`Unmatched ')'`);
+    } else if (obj instanceof Token) {
+      return obj.interpret();
+    } else {
+      return obj;
+    }
+  }
   [Symbol.asyncIterator]() {
     return {
       next: () => this.read().then(obj => {
@@ -349,7 +375,8 @@ class Reader {
 }
 
 var buf = new BufferedStream();
-var rdr = new Reader(buf);
+var tok = new Tokenizer(buf);
+var rdr = new Reader(tok);
 
 (async function() {
   for await(const obj of rdr) {