author | Alan Dipert
<alan@dipert.org> 2020-02-22 07:11:32 UTC |
committer | Alan Dipert
<alan@dipert.org> 2020-02-22 07:11:32 UTC |
parent | 8696390b002d3e41c91f705c4db5253f8dd4ae18 |
paper/Makefile | +1 | -1 |
paper/jacl-els-2020.bib | +26 | -1 |
paper/jacl-els-2020.tex | +175 | -145 |
diff --git a/paper/Makefile b/paper/Makefile index 0438c4e..65d9f90 100644 --- a/paper/Makefile +++ b/paper/Makefile @@ -1,4 +1,4 @@ -.PHONY=all clean +.PHONY=all deploy clean DOC := jacl-els-2020.tex CF_DIST := E6GOTXLS9MCZF diff --git a/paper/jacl-els-2020.bib b/paper/jacl-els-2020.bib index 6d9d3c5..8768202 100644 --- a/paper/jacl-els-2020.bib +++ b/paper/jacl-els-2020.bib @@ -208,4 +208,29 @@ isbn = {978-1449381875}, publisher = {O'Reilly Media}, address = {Sebastopol, CA, USA}, -} \ No newline at end of file +} + +@online{SICLTagbody, + author={Robert Strandh}, + year={2020}, + title="compile-general-purpose-asts.lisp", + url="https://github.com/robert-strandh/SICL/blob/2d322d3c7794eb4e89720b9a2fce42395a787376/Code/Cleavir2/AST-to-HIR/compile-general-purpose-asts.lisp#L200-L303", + lastaccessed="February 21, 2020", +} + +@online{SBCLManual, + author={{SBCL Project Contributors}}, + year={2020}, + title="SBCL 2.0.1 User Manual", + url="http://www.sbcl.org/manual/", + lastaccessed="February 21, 2020", +} + +@online{LispWorksDeliver, + author={{LispWorks Ltd.}}, + year={2017}, + title="deliver", + url="http://www.lispworks.com/documentation/lw71/DV/html/delivery-220.htm", + lastaccessed="February 21, 2020", +} + diff --git a/paper/jacl-els-2020.tex b/paper/jacl-els-2020.tex index 92c81b7..fce4dea 100644 --- a/paper/jacl-els-2020.tex +++ b/paper/jacl-els-2020.tex @@ -493,60 +493,105 @@ recognizes Lisp or JavaScript strings as JavaScript identifiers. \subsubsection{\texttt{TAGBODY} compilation strategy} -JSCL, the existing Lisp closest to JACL, implements \texttt{TAGBODY} -through the combination of: +Consider the following Common Lisp program that decrements the local +variable \texttt{X} 10 times: -\begin{itemize} - \item Generating a unique identifier, the \texttt{tbidx}, for each - \texttt{TAGBODY} encountered during compilation. - \item A local JavaScript \texttt{branch} variable in the generated - code containing the label of the next branch to execute. - \item Generated \texttt{while(true) { ... }} loops each containing a - \texttt{try/catch} statement. The \texttt{catch} arm of the - \texttt{try/catch} inspects the type of thrown exception, and if - it is of the marker type \texttt{TagNLX} and corresponds to the - current \texttt{TAGBODY}, \texttt{branch} is set to the tag - carried by the \texttt{TagNLX} object. The \texttt{try} arm of the - \texttt{try/catch} contains a \texttt{switch} statement that - dispatches on \texttt{branch}. Each labeled block of the - \texttt{switch} corresponds to the code succeeding a \texttt{GO} - tag. The \texttt{switch} falls through to a \texttt{break} to the - enclosing labeled \texttt{while} loop, concluding execution of the - \texttt{TAGBODY}. - \item An implementation of \texttt{GO} that throws a JavaScript - exception of type \texttt{TagNLX} that contains \texttt{tbidx} and - target \texttt{branch} values. -\end{itemize} +\begin{verbatim} +(let ((x 10)) + (tagbody + start + (when (zerop x) (go end)) + (decf x) + (go start) + end)) +\end{verbatim} -This mechanism is ingenious and general, and it's roughly how JACL -also currently implements \texttt{TAGBODY}. Unfortunately, every jump -requires a JavaScript exception be thrown, severely penalizing nearly -every iteration construct. Fortunately, a slight improvement in the -\texttt{GO} compilation strategy can yield a tremendous performance -benefit. The improvement follows from the observation that any -\emph{local jump} --- such as that from a \texttt{GO} to its relevant -enclosing \texttt{TAGBODY}, with no lexically-intervening -\texttt{LAMBDA} forms --- may rely on JavaScript's -\texttt{continue}\cite{MozLabel} operator with a label argument for -control transfer, instead of on throwing an exception. - -JACL's high-level AST will facilitate the identification of usages of -\texttt{GO} that are amenable to the aforementioned optimization. The -availability of a high-level AST will also facilitate other related -optimizations, such as \texttt{GO} tag escape\footnote{Such as by use - within a \texttt{LAMBDA} form that is assigned as the value of a - global variable or passed to a function.} analysis. It should be -possible to identify \texttt{TAGBODY}s that consist only of local -jumps, and to avoid generating \texttt{try/catch} boilerplate for -them, thereby reducing generated code size. - -Code generated for \texttt{TAGBODY} in JACL will still generally -require a \texttt{switch}, and so a slowdown relative to hand-coded -JavaScript loops would not be unexpected. However, preliminary -benchmark \ref{appendix:hypothetical} of a factorial function on a -recent version of Google Chrome revealed only a very small discrepancy -between JACL's proposed approach and a hand-coded JavaScript -\texttt{while} loop-based approach \ref{appendix:baseline}. +JSCL, the existing Lisp closest to JACL, would compile the preceding +code into approximately\footnote{Actual JSCL output is not used + because it includes type checks, generated variable names, and other + code that would obscure the relevant machinery.} the following +JavaScript: + +\begin{verbatim} +function Jump(id, label) { + this.id = id; + this.label = label; +} + +var X = 10; +var id = []; +var label = 0; +LOOP: while (true) { + try { + switch(label) { + case 0: + if (X === 0) throw new Jump(id, 1); + X--; + throw new Jump(id, 0); + case 1: + default: + break LOOP; + } + } catch (e) { + if (e instanceof Jump && e.id === id) { + label = e.label; + } else { + throw e; + } + } +} +\end{verbatim} + +The mechanism is ingenious and general. \texttt{GO} tags became +\texttt{switch} labels, and jumps are performed by throwing an +instance of \texttt{Jump} containing the destination label. + +Unfortunately, every jump requires a JavaScript exception be thrown, +severely penalizing iteration constructs built on +\texttt{TAGBODY}. Fortunately, a slight improvement in the compilation +strategy can yield a tremendous performance benefit. The improvement +follows from the observation that any \emph{local jump} --- such as +that from a \texttt{GO} to its relevant enclosing \texttt{TAGBODY}, +with no lexically-intervening \texttt{LAMBDA} forms --- may rely on +JavaScript's \texttt{continue}\cite{MozLabel} operator with a label +argument for control transfer, instead of on throwing an exception. + +Additionally, \texttt{TAGBODY} forms that are determined statically to +contain \emph{only} local jumps need not generate a \texttt{try/catch} +block, saving on generated code size. + +The following is an example of what the generated code would look like +after applying local jump optimization. Cursory benchmarks run on +Google Chrome \ref{appendix:benchmarks} show it runs several orders of +magnitude faster than the previous, unoptimized version, and is nearly +as fast as the JavaScript statement \texttt{while(X--)}: + +\begin{verbatim} +var X = 10; +var label = 0; +LOOP: while (true) { + switch(label) { + case 0: + if (X === 0) { + label = 1; + continue LOOP; + } + X--; + label = 0; + continue LOOP; + case 1: + default: + break LOOP; + } +} +\end{verbatim} + +JACL's AST will facilitate the identification of usages of +\texttt{TAGBODY} that are amenable to local jump optimization, and the +transformation of such code into a dramatically faster +equivalent. Local \texttt{GO} optimization is a known technique +\cite{SICLTagbody}, but its implementation using JavaScript's +\texttt{continue} is not. \subsection{Delivery} @@ -557,32 +602,33 @@ implemented at all. Fortunately, since JACL development is image-based, JACL should support the traditional approach of specifying a Lisp function entrypoint and dumping the Lisp image to native (JavaScript) -code. SBCL's \texttt{SAVE-LISP-AND-DIE} and LispWork's -\texttt{DELIVER} function are two examples of this in other -implementations. +code. SBCL's \texttt{SAVE-LISP-AND-DIE}\cite{SBCLManual} and +LispWork's \texttt{DELIVER}\cite{LispWorksDeliver} functions are two +examples of this in other implementations. -It is imagined that JACL's delivery function will be called in a REPL -at development time by a developer, or by an automated build +It is imagined that JACL's \texttt{DELIVER} function will be called in +a REPL at development time by a developer, or by an automated build script. When the function is called, a JavaScript deliverable will be produced, and the Web browser will either prompt the user to accept a -download, or the JavaScript code will be written to standard output. +download, or the JavaScript code will be written to standard output by +\texttt{jacl-client}. -JavaScript optimization tools such as Babel or Google Closure Compiler -may then be applied to the executable. Such tools perform -sophisticated optimizations such as dead-code elimination, loop -invariant code motion, and constant folding. +JavaScript whole-program optimization tools such as Babel or Google +Closure Compiler may then be optionally applied to the +executable. Such tools perform sophisticated optimizations such as +dead-code elimination, loop invariant code motion, and constant +folding. It is anticipated that one important high-level optimization must be performed in order for dumped images to be amenable to further -optimization by JavaScript tools: Global symbol and function -references must be converted to static references in order for -JavaScript tools to successfully identify and eliminate dead code. - -This required optimization implies that JACL must retain the ASTs -associated with global definitions, so that statically-linked code for -these definitions can be generated at delivery time. Alternatively, -statically-linked code could be created automatically in addition to -dynamically-linked code. +optimization by JavaScript tools: global symbol and function +references must be converted to static references. In order to perform +this optimization, JACL will need to retain the ASTs associated with +global definitions, so that statically-linked code for these +definitions can be generated at delivery time. Alternatively, +statically-linked code could be created automatically whenever +dynamically-linked code, and stored as an implementation-specific +attribute of the function object. \section{Conclusions and Future Work} @@ -591,7 +637,8 @@ introduced. Four novel aspects of JACL were described with respect to other, comparable Lisp implementations: \begin{enumerate} - \item An asynchronous reader that doesn't necessitate a pre-reader and is suitable for embedded use. + \item An asynchronous reader that doesn't necessitate a pre-reader + and is suitable for embedded use. \item An analyzing compiler and AST to support high-level optimizations, such as those necessary for a high-performance \texttt{TAGBODY} implementation. @@ -644,86 +691,35 @@ beautiful wife, Sandra Dipert, for her skillful editing. \appendix -\section{Loop Performance Benchmarks} - -\subsection{JavaScript Baseline} -\label{appendix:baseline} +\section{Tagbody Performance Benchmarks} +\label{appendix:benchmarks} -This is a simple JavaScript function for computing the factorial of a -positive integer as efficiently as possible. Its performance is -considered baseline for the purposes of benchmarking JACL's -\texttt{TAGBODY}. +The following benchmark code was run on Google Chrome 80.0.3987.116, +on Linux, using a computer with an Intel i7-3520M CPU. \begin{verbatim} -function fact1(n) { - var prod = 1; - while(n) prod *= n--; - return prod; +function Jump(id, label) { + this.id = id; + this.label = label; } -var start = performance.now(); -for (var i = 0; i < 1e8; i++) fact1(20); -console.log("fact1", performance.now() - start); -// fact1 1860.9749999595806 -\end{verbatim} - -\subsection{Equivalent Lisp} -\label{appendix:lisp} - -The following is an analagous Common Lisp factorial -function. \texttt{TAGBODY} is the means of iteration. This function -cannot currently be compiled by JACL because \texttt{SETF}, -\texttt{DECF}, and \texttt{RETURN-FROM} are not yet implemented. - -\begin{verbatim} -(defun fact2 (n &aux (prod 1)) - (tagbody - start - (when (zerop n) (go done)) - (setf prod (* prod n)) - (decf n) - (go start) - done - (return-from fact2 prod))) -\end{verbatim} - -\subsection{Hypothetical Generated Code} -\label{appendix:hypothetical} - -The following is the hypothetical output for a future version of the -JACL compiler that implements a \texttt{continue}-based strategy for -efficient control transfer. Actual generated code will include type -checks by default, but those have been omitted for clarity. - -\texttt{GoTag} is defined for completeness but is never used because -the only jump, to the tag \texttt{START}, is local. - -\begin{verbatim} -function GoTag(tag) { - this.tag = tag; - return this; -} - -function fact2(n) { - var branch = 0; - var prod = 1; - loop1: - while(true) { +function tagbody_unoptimized(X) { + var id = []; + var label = 0; + LOOP: while (true) { try { - switch(branch) { + switch(label) { case 0: - if(n) { - prod *= n--; - branch = 0; - continue loop1; - } + if (X === 0) throw new Jump(id, 1); + X--; + throw new Jump(id, 0); case 1: - return prod; + default: + break LOOP; } } catch (e) { - if (e instanceof GoTag) { - branch = e.tag; - continue loop1; + if (e instanceof Jump && e.id === id) { + label = e.label; } else { throw e; } @@ -731,10 +727,44 @@ function fact2(n) { } } +function tagbody_optimized(X) { + var X = 10; + var label = 0; + LOOP: while (true) { + switch(label) { + case 0: + if (X === 0) { + label = 1; + continue LOOP; + } + X--; + label = 0; + continue LOOP; + case 1: + default: + break LOOP; + } + } +} + +function baseline_js(X) { + while(X--); +} + +var start = performance.now(); +for (var i = 0; i < 1e6; i++) tagbody_unoptimized(10); +console.log("tagbody_unoptimized", performance.now() - start); +// tagbody_unoptimized 61572.78000000224 + +var start = performance.now(); +for (var i = 0; i < 1e6; i++) tagbody_optimized(10); +console.log("tagbody_optimized", performance.now() - start); +// tagbody_optimized 26.714999999967404 + var start = performance.now(); -for (var i = 0; i < 1e8; i++) fact2(20); -console.log("fact2", performance.now() - start); -// fact2 1981.7150000017136 +for (var i = 0; i < 1e6; i++) baseline_js(10); +console.log("baseline_js", performance.now() - start); +// baseline_js 21.240000001853332 \end{verbatim} \end{document}