| author | Alan
<alan@minerva.local> 2026-05-01 21:33:33 UTC |
| committer | Alan
<alan@minerva.local> 2026-05-01 21:33:33 UTC |
| parent | 57eca72841e58886de3b8845cde62f76c242dbe4 |
| Makefile | +16 | -14 |
| bootstrap/stage1-compiler.jar | +0 | -0 |
| specs/stage0.md | +3 | -3 |
| specs/stage1.md | +21 | -23 |
| src/com/tailrecursion/larquil/stage0/IFunction.java | +0 | -5 |
| src/com/tailrecursion/larquil/stage0/Stage1Main.java | +0 | -12 |
| src/com/tailrecursion/larquil/stage0/Symbol.java | +0 | -35 |
| test/run-stage1.sh | +3 | -3 |
| test/run.sh | +4 | -5 |
diff --git a/Makefile b/Makefile index ccec321..e5060fa 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,20 @@ -JAVAC ?= javac JAVA ?= java -SRC_DIR := src BUILD_DIR := build -CLASSES_DIR := $(BUILD_DIR)/classes GEN_DIR := $(BUILD_DIR)/generated STAGE1_DIR := $(BUILD_DIR)/stage1 STAGE2_DIR := $(BUILD_DIR)/stage2 STAGE3_DIR := $(BUILD_DIR)/stage3 BOOTSTRAP_TEST_DIR := $(BUILD_DIR)/bootstrap-test BOOTSTRAP_DIR := bootstrap +BOOTSTRAP_JAR := $(BOOTSTRAP_DIR)/stage1-compiler.jar +MAIN_CLASS := com.tailrecursion.larquil.stage0.Stage1Main +RUNTIME_CLASSES := com/tailrecursion/larquil/stage0/IFunction.class com/tailrecursion/larquil/stage0/Symbol.class com/tailrecursion/larquil/stage0/Stage1Main.class .PHONY: build test clean compile-example run-example stage1-build stage1-test stage1-self-host bootstrap-artifact bootstrap-test build: - mkdir -p $(CLASSES_DIR) - $(JAVAC) -d $(CLASSES_DIR) $$(find $(SRC_DIR) -name '*.java' | sort) + test -f $(BOOTSTRAP_JAR) test: build sh test/run.sh @@ -24,10 +23,10 @@ stage1-build: build rm -rf $(STAGE1_DIR) mkdir -p $(STAGE1_DIR) for f in stage1/core.lql stage1/munge.lql; do \ - $(JAVA) -cp $(BOOTSTRAP_DIR)/stage1-compiler.jar:$(CLASSES_DIR) com.tailrecursion.larquil.stage0.Stage1Main $$f $(STAGE1_DIR); \ + $(JAVA) -cp $(BOOTSTRAP_JAR) $(MAIN_CLASS) $$f $(STAGE1_DIR); \ done cat stage1/core.lql stage1/munge.lql stage1/forms.lql stage1/reader.lql stage1/classfile.lql stage1/emit.lql stage1/instructions.lql stage1/backend.lql stage1/compiler.lql > $(STAGE1_DIR)/compiler.lql - $(JAVA) -cp $(BOOTSTRAP_DIR)/stage1-compiler.jar:$(CLASSES_DIR) com.tailrecursion.larquil.stage0.Stage1Main $(STAGE1_DIR)/compiler.lql $(STAGE1_DIR) + $(JAVA) -cp $(BOOTSTRAP_JAR) $(MAIN_CLASS) $(STAGE1_DIR)/compiler.lql $(STAGE1_DIR) stage1-test: stage1-build sh test/run-stage1.sh @@ -35,30 +34,33 @@ stage1-test: stage1-build stage1-self-host: stage1-build rm -rf $(STAGE2_DIR) $(STAGE3_DIR) mkdir -p $(STAGE2_DIR) $(STAGE3_DIR) - $(JAVA) -cp $(CLASSES_DIR):$(STAGE1_DIR) com.tailrecursion.larquil.stage0.Stage1Main $(STAGE1_DIR)/compiler.lql $(STAGE2_DIR) - $(JAVA) -cp $(STAGE2_DIR):$(CLASSES_DIR) com.tailrecursion.larquil.stage0.Stage1Main $(STAGE1_DIR)/compiler.lql $(STAGE3_DIR) + $(JAVA) -cp $(STAGE1_DIR):$(BOOTSTRAP_JAR) $(MAIN_CLASS) $(STAGE1_DIR)/compiler.lql $(STAGE2_DIR) + $(JAVA) -cp $(STAGE2_DIR):$(BOOTSTRAP_JAR) $(MAIN_CLASS) $(STAGE1_DIR)/compiler.lql $(STAGE3_DIR) sh test/run-stage1.sh $(STAGE2_DIR) sh test/run-stage1.sh $(STAGE3_DIR) bootstrap-artifact: stage1-self-host mkdir -p $(BOOTSTRAP_DIR) - rm -f $(BOOTSTRAP_DIR)/stage1-compiler.jar - jar --create --file $(BOOTSTRAP_DIR)/stage1-compiler.jar -C $(STAGE3_DIR) . + tmp=$$(mktemp -d); \ + (cd $$tmp && jar xf $(abspath $(BOOTSTRAP_JAR)) $(RUNTIME_CLASSES)); \ + rm -f $(BOOTSTRAP_JAR); \ + jar --create --file $(BOOTSTRAP_JAR) -C $$tmp . -C $(STAGE3_DIR) .; \ + rm -rf $$tmp bootstrap-test: build rm -rf $(BOOTSTRAP_TEST_DIR) mkdir -p $(BOOTSTRAP_TEST_DIR) cat stage1/core.lql stage1/munge.lql stage1/forms.lql stage1/reader.lql stage1/classfile.lql stage1/emit.lql stage1/instructions.lql stage1/backend.lql stage1/compiler.lql > $(BOOTSTRAP_TEST_DIR)/compiler.lql - $(JAVA) -cp $(BOOTSTRAP_DIR)/stage1-compiler.jar:$(CLASSES_DIR) com.tailrecursion.larquil.stage0.Stage1Main $(BOOTSTRAP_TEST_DIR)/compiler.lql $(BOOTSTRAP_TEST_DIR) + $(JAVA) -cp $(BOOTSTRAP_JAR) $(MAIN_CLASS) $(BOOTSTRAP_TEST_DIR)/compiler.lql $(BOOTSTRAP_TEST_DIR) sh test/run-stage1.sh $(BOOTSTRAP_TEST_DIR) compile-example: build rm -rf $(GEN_DIR) mkdir -p $(GEN_DIR) - $(JAVA) -cp $(BOOTSTRAP_DIR)/stage1-compiler.jar:$(CLASSES_DIR) com.tailrecursion.larquil.stage0.Stage1Main examples/fact_recursive.lql $(GEN_DIR) + $(JAVA) -cp $(BOOTSTRAP_JAR) $(MAIN_CLASS) examples/fact_recursive.lql $(GEN_DIR) run-example: compile-example - $(JAVA) -cp $(CLASSES_DIR):$(GEN_DIR) com.tailrecursion.larquil.stage0.fact_recursive + $(JAVA) -cp $(GEN_DIR):$(BOOTSTRAP_JAR) com.tailrecursion.larquil.stage0.fact_recursive clean: rm -rf $(BUILD_DIR) diff --git a/bootstrap/stage1-compiler.jar b/bootstrap/stage1-compiler.jar index c7f5c29..275ff03 100644 Binary files a/bootstrap/stage1-compiler.jar and b/bootstrap/stage1-compiler.jar differ diff --git a/specs/stage0.md b/specs/stage0.md index b74a7cd..3f80046 100644 --- a/specs/stage0.md +++ b/specs/stage0.md @@ -160,10 +160,10 @@ Class files are written under the package path `com/tailrecursion/larquil/stage0 File names and function names are converted to JVM class names with deterministic Clojure-style munging. Path separators are not part of the generated class name. -Stage 0 starts with handwritten runtime support classes: +Stage 0-compatible generated classes depend on these runtime support classes from the bootstrap artifact: -- `IFunction.java` -- `Symbol.java` +- `IFunction.class` +- `Symbol.class` Example loader shape: diff --git a/specs/stage1.md b/specs/stage1.md index e9b1b66..1c44d02 100644 --- a/specs/stage1.md +++ b/specs/stage1.md @@ -1,7 +1,7 @@ # Larquil Stage 1 Bootstrap Plan Stage 1 is the first Larquil implementation of the compiler. -It is compiled by the Java Stage 0 compiler and should remain close enough to Stage 0 that each step can be tested independently. +It is compiled by the checked-in bootstrap compiler artifact and should remain close enough to Stage 0 semantics that each step can be tested independently. The goal is not to design the final Larquil language. The goal is to replace the Java compiler with a Larquil compiler that emits the same Stage 0-compatible class files. @@ -12,12 +12,12 @@ The goal is to replace the Java compiler with a Larquil compiler that emits the - Keep Stage 1 source in `stage1/`. - Port in small executable chunks. -- Every independent chunk must compile with the Stage 0 compiler before another chunk depends on it. +- Every independent chunk must compile with the bootstrap compiler before another chunk depends on it. - Until Larquil has modules or includes, the Stage 1 compiler is built by concatenating ordered chunks into one generated `compiler.lql`. - Chunk files are development units; the concatenated file is the compilation unit that allows same-file function resolution. - Prefer ordinary Larquil functions over new Stage 0 compiler intrinsics. - Add a Stage 0 instruction only when it represents a real JVM bytecode operation needed by portable Larquil code. -- Keep Java runtime support limited and explicit while bootstrapping. +- Keep runtime class artifacts limited and explicit while bootstrapping. - Do not introduce parser AST classes or Java-side compiler behavior for Stage 1. - Stage 1 data remains Larquil-shaped: `Long`, `String`, `Boolean`, `Symbol`, `List<Object>`, `IFunction`, and `null`. @@ -25,13 +25,13 @@ The goal is to replace the Java compiler with a Larquil compiler that emits the # Generation Names -- Stage 0 compiler: the current Java compiler in `src/`. +- Bootstrap compiler: checked-in `bootstrap/stage1-compiler.jar`. - Stage 1 source: Larquil compiler source files in `stage1/`. - Stage 1 compiler: class files produced by compiling Stage 1 source with the Stage 0 compiler. - Stage 2 compiler: class files produced by compiling Stage 1 source with the Stage 1 compiler. - Stage 3 compiler: class files produced by compiling Stage 1 source with the Stage 2 compiler. -Bootstrap means the Stage 2 compiler can compile Stage 1 source again, and the resulting compiler passes the same integration tests without depending on the Java Stage 0 compiler. +Bootstrap means the Stage 2 compiler can compile Stage 1 source again, and the resulting compiler passes the same integration tests without depending on Java source in this repository. Generated class bytes do not need to be byte-identical across generations. Observable behavior must match. @@ -259,7 +259,9 @@ Done when: ## 3. Port The Small Byte Emitters -Port stable `MethodCode` byte emission helpers first. +Status: done in `stage1/emit.lql`. + +Port stable byte emission helpers first. These are the least semantic compiler functions and the easiest to verify. Source target: @@ -270,7 +272,6 @@ Functions to port: - `emit_u1` - `emit_u2` -- `emit_u4` - `emit_push_int` - `emit_aload` - `emit_astore` @@ -284,13 +285,13 @@ Functions to port: - `emit_invokeinterface` - `emit_branch` - `emit_label` -- `finish_labels` +- label finalization is owned by `stage1/classfile.lql` Done when: -- Stage 0 compiles `stage1/emit.lql` -- generated emit helper classes load -- a Java smoke harness or `.lql` IIFE can call a few emit helpers against a `MethodCode` object +- the bootstrap compiler compiles the concatenated Stage 1 compiler +- generated emit helper classes load as part of self-hosting +- stage2 and stage3 compilers pass the example integration tests ## 4. Port Portable List And Symbol Helpers @@ -313,15 +314,13 @@ Functions to port: - `require_string` - `int_value` -Temporary Java support allowed: +Runtime support allowed: -- `Pair` -- list indexing helpers if direct Java `ArrayList` calls are too noisy -- symbol interning through `Symbol.intern` +- symbol interning through the runtime `Symbol.class` seed until it is Larquil-generated Done when: -- Stage 0 compiles `stage1/core.lql` +- the bootstrap compiler compiles `stage1/core.lql` - positive and negative helper behavior is covered by tiny executable `.lql` tests ## 5. Port Name Munging @@ -447,13 +446,12 @@ Source target: - JDK `DataOutputStream` is used for classfile output - JDK `ByteBuffer` is used for mutable method bytecode and branch patching -Functions/data to port: +Functions/data ported: -- `ByteVec.add` -- `ByteVec.u2` -- `ByteVec.u4` -- `ByteVec.patchS2` -- `ByteVec.bytes` +- method byte buffer append +- method byte buffer `u2` +- branch offset patching +- method byte array extraction - `cp_utf8` - `cp_class` - `cp_long` @@ -649,4 +647,4 @@ The classfile backend is now Larquil-owned: Next best chunk: -- generate `IFunction.class` from Larquil classfile helpers and remove `IFunction.java` +- replace the runtime `.class` seeds in `bootstrap/stage1-compiler.jar` with Larquil-generated equivalents diff --git a/src/com/tailrecursion/larquil/stage0/IFunction.java b/src/com/tailrecursion/larquil/stage0/IFunction.java deleted file mode 100644 index 2a7c7b2..0000000 --- a/src/com/tailrecursion/larquil/stage0/IFunction.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.tailrecursion.larquil.stage0; - -public interface IFunction { - Object invoke(Object[] args); -} diff --git a/src/com/tailrecursion/larquil/stage0/Stage1Main.java b/src/com/tailrecursion/larquil/stage0/Stage1Main.java deleted file mode 100644 index c747edd..0000000 --- a/src/com/tailrecursion/larquil/stage0/Stage1Main.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.tailrecursion.larquil.stage0; - -public final class Stage1Main { - public static void main(String[] args) throws Exception { - if (args.length != 2) { - throw new IllegalArgumentException("usage: Stage1Main input.lql outdir"); - } - Class<?> cls = Class.forName("com.tailrecursion.larquil.stage0.compiler__compile_file"); - IFunction fn = (IFunction) cls.getField("INSTANCE").get(null); - fn.invoke(new Object[]{args[0], args[1]}); - } -} diff --git a/src/com/tailrecursion/larquil/stage0/Symbol.java b/src/com/tailrecursion/larquil/stage0/Symbol.java deleted file mode 100644 index ed620ee..0000000 --- a/src/com/tailrecursion/larquil/stage0/Symbol.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.tailrecursion.larquil.stage0; - -import java.util.HashMap; - -public final class Symbol { - private static final HashMap<String, Symbol> INTERNED = new HashMap<String, Symbol>(); - - public final String name; - - private Symbol(String name) { - this.name = name; - } - - public static Symbol intern(String name) { - Symbol existing = INTERNED.get(name); - if (existing != null) { - return existing; - } - Symbol created = new Symbol(name); - INTERNED.put(name, created); - return created; - } - - public boolean equals(Object other) { - return other instanceof Symbol && name.equals(((Symbol) other).name); - } - - public int hashCode() { - return name.hashCode(); - } - - public String toString() { - return name; - } -} diff --git a/test/run-stage1.sh b/test/run-stage1.sh index f616d68..c343f65 100644 --- a/test/run-stage1.sh +++ b/test/run-stage1.sh @@ -3,7 +3,7 @@ set -eu ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd) JAVA=${JAVA:-java} -CLASSES="$ROOT/build/classes" +BOOTSTRAP="$ROOT/bootstrap/stage1-compiler.jar" STAGE1=${1:-"$ROOT/build/stage1"} OUT="$ROOT/build/stage1-test-generated" @@ -22,7 +22,7 @@ compile_stage1() { out="$OUT/$name" rm -rf "$out" mkdir -p "$out" - "$JAVA" -cp "$CLASSES:$STAGE1" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-stage1.out 2>/tmp/larquil-stage1.err || { + "$JAVA" -cp "$STAGE1:$BOOTSTRAP" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-stage1.out 2>/tmp/larquil-stage1.err || { cat /tmp/larquil-stage1.err fail "$name compile" } @@ -33,7 +33,7 @@ run_loader() { name=$1 class=$2 out="$OUT/$name" - "$JAVA" -cp "$CLASSES:$STAGE1:$out" "$class" >/tmp/larquil-stage1.out 2>/tmp/larquil-stage1.err || { + "$JAVA" -cp "$out:$STAGE1:$BOOTSTRAP" "$class" >/tmp/larquil-stage1.out 2>/tmp/larquil-stage1.err || { cat /tmp/larquil-stage1.err fail "$name run" } diff --git a/test/run.sh b/test/run.sh index cc45b29..9177041 100755 --- a/test/run.sh +++ b/test/run.sh @@ -3,7 +3,6 @@ set -eu ROOT=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd) JAVA=${JAVA:-java} -CLASSES="$ROOT/build/classes" BOOTSTRAP="$ROOT/bootstrap/stage1-compiler.jar" OUT="$ROOT/build/test-generated" @@ -22,7 +21,7 @@ compile_ok() { out="$OUT/$name" rm -rf "$out" mkdir -p "$out" - "$JAVA" -cp "$BOOTSTRAP:$CLASSES" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-test.out 2>/tmp/larquil-test.err || { + "$JAVA" -cp "$BOOTSTRAP" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-test.out 2>/tmp/larquil-test.err || { cat /tmp/larquil-test.err fail "$name compile" } @@ -35,7 +34,7 @@ compile_fail() { out="$OUT/$name" rm -rf "$out" mkdir -p "$out" - if "$JAVA" -cp "$BOOTSTRAP:$CLASSES" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-test.out 2>/tmp/larquil-test.err; then + if "$JAVA" -cp "$BOOTSTRAP" com.tailrecursion.larquil.stage0.Stage1Main "$input" "$out" >/tmp/larquil-test.out 2>/tmp/larquil-test.err; then fail "$name should fail" fi ok "$name fails" @@ -45,7 +44,7 @@ run_loader() { name=$1 class=$2 out="$OUT/$name" - "$JAVA" -cp "$CLASSES:$out" "$class" >/tmp/larquil-test.out 2>/tmp/larquil-test.err || { + "$JAVA" -cp "$out:$BOOTSTRAP" "$class" >/tmp/larquil-test.out 2>/tmp/larquil-test.err || { cat /tmp/larquil-test.err fail "$name run" } @@ -56,7 +55,7 @@ verify_no_main_class() { name=$1 out=$2 class=$3 - if "$JAVA" -Xverify:all -cp "$CLASSES:$out" "$class" >/tmp/larquil-test.out 2>/tmp/larquil-test.err; then + if "$JAVA" -Xverify:all -cp "$out:$BOOTSTRAP" "$class" >/tmp/larquil-test.out 2>/tmp/larquil-test.err; then fail "$name should not have main" fi if ! grep -q 'Main method not found' /tmp/larquil-test.err; then