From 0bc19f1cfd8df00eabb0bab82eca3f38f178e141 Mon Sep 17 00:00:00 2001 Message-Id: <0bc19f1cfd8df00eabb0bab82eca3f38f178e141.1717731264.git.mdw@distorted.org.uk> From: Mark Wooding Date: Thu, 22 Oct 2015 00:46:28 +0100 Subject: [PATCH] doc/syntax.tex: Improve the syntax notation. Organization: Straylight/Edgeware From: Mark Wooding Rather than having a bunch of special rules, define the notational extensions in terms of the parametrized nonterminals we have anyway for declarators, and set that on a more formal footing. I'm not quite sure how powerful this formalism is. I believe it's strictly more powerful than context-free grammars, but I haven't decided whether it's Turing-equivalent. It doesn't matter much here either way, because the actual grammar is indeed context-free even if the formalism is more expressive than that. --- doc/syntax.tex | 67 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/doc/syntax.tex b/doc/syntax.tex index ca011cb..11e80b6 100644 --- a/doc/syntax.tex +++ b/doc/syntax.tex @@ -27,30 +27,45 @@ %%%-------------------------------------------------------------------------- -Fortunately, Sod is syntactically quite simple. I've used a little slightly -unusual notation in order to make the presentation easier to read. For any -nonterminal $x$: +Fortunately, Sod is syntactically quite simple. The notation is slightly +unusual in order to make the presentation shorter and easier to read. + +Anywhere a simple nonterminal name $x$ may appear in the grammar, an +\emph{indexed} nonterminal $x[a_1, \ldots, a_n]$ may also appear. On the +left-hand side of a production rule, the indices $a_1$, \ldots, $a_n$ are +variables which vary over all nonterminal and terminal symbols, and the +variables may also appear on the right-hand side in place of a nonterminal. +Such a rule stands for a family of rules, in each variable is replaced by +each possible simple nonterminal or terminal symbol. + +The letter $\epsilon$ denotes the empty nonterminal +\begin{quote} + \syntax{$\epsilon$ ::=} +\end{quote} + +The following indexed productions are used throughout the grammar, some often +enough that they deserve special notation. \begin{itemize} -\item $\epsilon$ denotes the empty nonterminal: - \begin{quote} - $\epsilon$ ::= - \end{quote} -\item @[$x$@] means an optional $x$: +\item @[$x$@] abbreviates @$[x]$, denoting an optional occurrence + of $x$: \begin{quote} - \syntax{@[$x$@] ::= $\epsilon$ @! $x$} + \syntax{@[$x$@] ::= $[x]$ ::= $\epsilon$ @! $x$} \end{quote} -\item $x^*$ means a sequence of zero or more $x$s: +\item $x^*$ abbreviates @$[x]$, denoting a sequence of zero or + more occurrences of $x$: \begin{quote} - \syntax{$x^*$ ::= $\epsilon$ @! $x^*$ $x$} + \syntax{$x^*$ ::= $[x]$ ::= + $\epsilon$ @! $[x]$ $x$} \end{quote} -\item $x^+$ means a sequence of one or more $x$s: +\item $x^+$ abbreviates @$[x]$, denoting a sequence of zero or + more occurrences of $x$: \begin{quote} - \syntax{$x^+$ ::= $x$ $x^*$} + \syntax{$x^+$ ::= $[x]$ ::= $[x]$ $x$} \end{quote} -\item $x$@<-list> means a sequence of one or more $x$s separated - by commas: +\item @$[x]$ denotes a sequence of one or more occurrences of $x$ + separated by commas: \begin{quote} - \syntax{$x$<-list> ::= $x$ @! $x$<-list> "," $x$} + \syntax{$[x]$ ::= $x$ @! $[x]$ "," $x$} \end{quote} \end{itemize} @@ -323,7 +338,7 @@ The @ is evaluated immediately. It can do anything it likes. \begin{grammar} ::= - "typename" ";" + "typename" @[@] ";" \end{grammar} Each @ is declared as naming a C type. This is important because @@ -340,7 +355,7 @@ declarations instead. "code" ":" @[@] "{" "}" - ::= "[" "]" + ::= "[" @[@] "]" ::= @^+ \end{grammar} @@ -372,7 +387,7 @@ elsewhere in the generated output files. \subsection{Property sets} \label{sec:syntax.propset} \begin{grammar} - ::= "[" "]" + ::= "[" @[@] "]" ::= "=" \end{grammar} @@ -474,7 +489,7 @@ All of these have their usual C meanings. \alt "(" ")" ::= $\epsilon$ | "..." -\alt @["," "..."@] +\alt @[@] @["," "..."@] ::= @^+ @@ -532,13 +547,13 @@ class Sub : Super { \begin{grammar} ::= @[@] - "class" ":" + "class" ":" @[@] "{" @^* "}" ::= ";" +\alt ";" \alt \alt -\alt ";" \end{grammar} A full class definition provides a complete description of a class. @@ -548,7 +563,7 @@ give the name of an existing class (other than a forward-referenced class), or an existing type name. It is conventional to give classes `MixedCase' names, to distinguish them from other kinds of identifiers. -The @ names the direct superclasses for the new class. It +The @@[@] names the direct superclasses for the new class. It is an error if any of these @s does not name a defined class. The @ provide additional information. The standard class @@ -579,9 +594,9 @@ These items are discussed on the following sections. \begin{grammar} ::= @[@] - @^+ + @^+ @[@] - ::= @["=" @] + ::= @["=" @] \end{grammar} A @ defines one or more slots. All instances of the class and any @@ -614,7 +629,7 @@ class Example : Super { \subsubsection{Initializer items} \label{sec:syntax.class.init} \begin{grammar} - ::= @["class"@] + ::= @["class"@] @[@] ::= "=" -- [mdw]