chiark - git - mdw - sod/blob - doc/sod.tex

   1 \documentclass[noarticle]{strayman}
   2
   3 \usepackage[T1]{fontenc}
   4 \usepackage[utf8]{inputenc}
   5 \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
   6 \usepackage{syntax}
   7 \usepackage{sverb}
   8 \usepackage{mdwtab}
   9 \usepackage{footnote}
  10 \usepackage{at}
  11 \usepackage{mdwref}
  12
  13 \title{A Sensible Object Design for C}
  14 \author{Mark Wooding}
  15
  16 \makeatletter
  17
  18 \errorcontextlines999
  19
  20 \def\syntleft{\normalfont\itshape}
  21 \let\syntright\empty
  22
  23 \let\codeface\sffamily
  24
  25 \def\ulitleft{\normalfont\codeface}
  26 \let\ulitright\empty
  27
  28 \let\listingsize\relax
  29
  30 \let\epsilon\varepsilon
  31
  32 \atdef <#1>{\synt{#1}\@scripts}
  33 \atdef "#1"{\lit*{#1}\@scripts}
  34 \atdef `#1'{\lit{#1}\@scripts}
  35 \atdef |#1|{\textsf{#1}\@scripts}
  36 \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i}
  37 \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa%
  38   \expandafter\@firstoftwo\expandafter\@scripts%
  39   \else\@tempa\expandafter\@scripts\fi}
  40 \atdef [{\dbl@maybe[}
  41 \atdef ]{\dbl@maybe]}
  42 \atdef {{\m@maybe\{\@scripts}
  43 \atdef }{\m@maybe\}\@scripts}
  44 \atdef ({\m@maybe(\@scripts}
  45 \atdef ){\m@maybe)\@scripts}
  46 \atdef !{\m@maybe|\@scripts}
  47 \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad}
  48 \let\m@maybe@end\relax
  49 \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi}
  50 \def\@scripts{\futurelet\@ch\@scripts@i}
  51
  52 \atdef ;#1\\{\normalfont\itshape;#1\\}
  53
  54 \begingroup\lccode`\~=`\_\lowercase{\endgroup
  55 \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
  56   \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}}
  57 \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts}
  58
  59 \def\Cplusplus{C\kern-\p@++}
  60 \def\Csharp{C\#}
  61 \def\man#1#2{\textbf{#1}(#2)}
  62
  63 \begingroup\lccode`\~=`\
  64 \lowercase{
  65 \endgroup
  66 \def\prog{%
  67   \codeface%
  68   \quote%
  69   \let\old@nl\\%
  70   \obeylines%
  71   \tabbing%
  72   \global\let~\\%
  73   \global\let\\\textbackslash%
  74 }
  75 \def\endprog{%
  76   \endtabbing%
  77   \global\let\\\old@nl%
  78   \endquote%
  79 }}
  80
  81 \newenvironment{boxy}[1][\q@]{%
  82   \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex%
  83   \medskip%
  84   \vbox\bgroup\hrule\hbox\bgroup\vrule%
  85   \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@%
  86   \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi%
  87 }{%
  88   \endminipage\hskip1ex\egroup\vskip1ex\egroup%
  89   \vrule\egroup\hrule\egroup%
  90   \medskip%
  91 }
  92
  93 \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}}
  94 \def\describecategoryname#1{%
  95   \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname%
  96   \ifx\@tempa\relax#1\else\@tempa\fi}
  97 \definedescribecategory{fun}{function}
  98 \definedescribecategory{gf}{generic function}
  99 \definedescribecategory{var}{variable}
 100 \definedescribecategory{const}{constant}
 101 \definedescribecategory{meth}{primary method}
 102 \definedescribecategory{ar-meth}{around-method}
 103 \definedescribecategory{be-meth}{before-method}
 104 \definedescribecategory{af-meth}{after-method}
 105 \definedescribecategory{cls}{class}
 106 \definedescribecategory{ty}{type}
 107 \definedescribecategory{mac}{macro}
 108
 109 \def\q@{\q@}
 110 \newenvironment{describe}[3][\q@]{%
 111   \normalfont%
 112   \par\goodbreak%
 113   \vspace{\bigskipamount}%
 114   \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}%
 115   \dimen@\linewidth\advance\dimen@-\wd\z@%
 116   \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}%
 117   \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi%
 118   \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}%
 119       @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}%
 120   \noindent\csname tabular*\expandafter\endcsname\@temp%
 121   \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular%
 122 %  \@afterheading%
 123   \list{}{\rightmargin\z@}\item%
 124 }{%
 125   \endlist%
 126 }
 127
 128 \def\push{\quad\=\+\kill}
 129
 130 \begin{document}
 131
 132 \maketitle
 133
 134 \include{sod-tut}
 135
 136 %%%--------------------------------------------------------------------------
 137 \chapter{Internals}
 138
 139 \section{Generated names}
 140
 141 The generated names for functions and objects related to a class are
 142 constructed systematically so as not to interfere with each other.  The rules
 143 on class, slot and message naming exist so as to ensure that the generated
 144 names don't collide with each other.
 145
 146 The following notation is used in this section.
 147 \begin{description}
 148 \item[@<class>] The full name of the `focus' class: the one for which we are
 149   generating name.
 150 \item[@<super-nick>] The nickname of a superclass.
 151 \item[@<head-nick>] The nickname of the chain-head class of the chain
 152   in question.
 153 \end{description}
 154
 155 \subsection{Instance layout}
 156
 157 %%%--------------------------------------------------------------------------
 158 \section{Syntax}
 159 \label{sec:syntax}
 160
 161 Fortunately, Sod is syntactically quite simple.  I've used a little slightly
 162 unusual notation in order to make the presentation easier to read.
 163 \begin{itemize}
 164 \item $\epsilon$ denotes the empty nonterminal:
 165   \begin{quote}
 166     $\epsilon$ ::=
 167   \end{quote}
 168 \item $[$@<item>$]$ means an optional @<item>:
 169   \begin{quote}
 170     \syntax{$[$<item>$]$ ::= $\epsilon$ | <item>}
 171   \end{quote}
 172 \item @<item>$^*$ means a sequence of zero or more @<item>s:
 173   \begin{quote}
 174     \syntax{<item>$^*$ ::= $\epsilon$ | <item>$^*$ <item>}
 175   \end{quote}
 176 \item @<item>$^+$ means a sequence of one or more @<item>s:
 177   \begin{quote}
 178     \syntax{<item>$^+$ ::= <item> <item>$^*$}
 179   \end{quote}
 180 \item @<item-list> means a sequence of one or more @<item>s separated
 181   by commas:
 182   \begin{quote}
 183     \syntax{<item-list> ::= <item> | <item-list> "," <item>}
 184   \end{quote}
 185 \end{itemize}
 186
 187 \subsection{Lexical syntax}
 188 \label{sec:syntax.lex}
 189
 190 Whitespace and comments are discarded.  The remaining characters are
 191 collected into tokens according to the following syntax.
 192
 193 \begin{grammar}
 194 <token> ::= <identifier>
 195 \alt <reserved-word>
 196 \alt <string-literal>
 197 \alt <char-literal>
 198 \alt <integer-literal>
 199 \alt <punctuation>
 200 \end{grammar}
 201
 202 This syntax is slightly ambiguous.  The following two rules serve to
 203 disambiguate:
 204 \begin{enumerate}
 205 \item Reserved words take precedence.  All @<reserved-word>s are
 206   syntactically @<identifier>s; Sod resolves the ambiguity in favour of
 207   @<reserved-word>.
 208 \item `Maximal munch'.  In other cases, at each stage we take the longest
 209   sequence of characters which could be a token.
 210 \end{enumerate}
 211
 212 \subsubsection{Identifiers} \label{sec:syntax.lex.id}
 213
 214 \begin{grammar}
 215 <identifier> ::= <id-start-char> <id-body-char>$^*$
 216
 217 <id-start-char> ::= <alpha-char> $|$ "_"
 218
 219 <id-body-char> ::= <id-start-char> $|$ <digit-char>
 220
 221 <alpha-char> ::= "A" $|$ "B" $|$ \dots\ $|$ "Z"
 222 \alt "a" $|$ "b" $|$ \dots\ $|$ "z"
 223 \alt <extended-alpha-char>
 224
 225 <digit-char> ::= "0" $|$ <nonzero-digit-char>
 226
 227 <nonzero-digit-char> ::= "1" $|$ "2" $| \cdots |$ "9"
 228 \end{grammar}
 229
 230 The precise definition of @<alpha-char> is left to the function
 231 \textsf{alpha-char-p} in the hosting Lisp system.  For portability,
 232 programmers are encouraged to limit themselves to the standard ASCII letters.
 233
 234 \subsubsection{Reserved words} \label{sec:syntax.lex.reserved}
 235
 236 \begin{grammar}
 237 <reserved-word> ::=
 238 "char" $|$ "class" $|$ "code" $|$ "const" $|$ "double" $|$ "enum" $|$
 239 "extern" $|$ "float" $|$ "import" $|$ "int" $|$ "lisp" $|$ "load" $|$ "long"
 240 $|$ "restrict" $|$ "short" $|$ "signed" $|$ "struct" $|$ "typename" $|$
 241 "union" $|$ "unsigned" $|$ "void" $|$ "volatile"
 242 \end{grammar}
 243
 244 Many of these are borrowed from~C; however, some (e.g., @"import" and
 245 @"lisp") are not, and some C reserved words are not reserved (e.g.,
 246 @"static").
 247
 248 \subsubsection{String and character literals} \label{sec:syntax.lex.string}
 249
 250 \begin{grammar}
 251 <string-literal> ::= "\"" <string-literal-char>$^*$ "\""
 252
 253 <char-literal> ::= "'" <char-literal-char> "'"
 254
 255 <string-literal-char> ::= any character other than "\\" or "\""
 256 \alt "\\" <char>
 257
 258 <char-literal-char> ::= any character other than "\\" or "'"
 259 \alt "\\" <char>
 260
 261 <char> ::= any single character
 262 \end{grammar}
 263
 264 The syntax for string and character literals differs from~C.  In particular,
 265 escape sequences such as @`\textbackslash n' are not recognized.  The use
 266 of string and character literals in Sod, outside of C~fragments, is limited,
 267 and the simple syntax seems adequate.  For the sake of future compatibility,
 268 the use of character sequences which resemble C escape sequences is
 269 discouraged.
 270
 271 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
 272
 273 \begin{grammar}
 274 <integer-literal> ::= <decimal-integer>
 275 \alt <binary-integer>
 276 \alt <octal-integer>
 277 \alt <hex-integer>
 278
 279 <decimal-integer> ::= <nonzero-digit-char> <digit-char>$^*$
 280
 281 <binary-integer> ::= "0" $($"b"$|$"B"$)$ <binary-digit-char>$^+$
 282
 283 <binary-digit-char> ::= "0" $|$ "1"
 284
 285 <octal-integer> ::= "0" $[$"o"$|$"O"$]$ <octal-digit-char>$^+$
 286
 287 <octal-digit-char> ::= "0" $|$ "1" $| \cdots |$ "7"
 288
 289 <hex-integer> ::= "0" $($"x"$|$"X"$)$ <hex-digit-char>$^+$
 290
 291 <hex-digit-char> ::= <digit-char>
 292 \alt "A" $|$ "B" $|$ "C" $|$ "D" $|$ "E" $|$ "F"
 293 \alt "a" $|$ "b" $|$ "c" $|$ "d" $|$ "e" $|$ "f"
 294 \end{grammar}
 295
 296 Sod understands only integers, not floating-point numbers; its integer syntax
 297 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 298 binary.  However, length and signedness indicators are not permitted.
 299
 300 \subsubsection{Punctuation} \label{sec:syntax.lex.punct}
 301
 302 \begin{grammar}
 303 <punctuation> ::= any character other than "\"" or "'"
 304 \end{grammar}
 305
 306 Due to the `maximal munch' rule, @<punctuation> tokens cannot be
 307 alphanumeric.
 308
 309 \subsubsection{Comments} \label{sec:lex-comment}
 310
 311 \begin{grammar}
 312 <comment> ::= <block-comment>
 313 \alt <line-comment>
 314
 315 <block-comment> ::=
 316   "/*"
 317   <not-star>$^*$ $($<star>$^+$ <not-star-or-slash> <not-star>$^*)^*$
 318   <star>$^*$
 319   "*/"
 320
 321 <star> ::= "*"
 322
 323 <not-star> ::= any character other than "*"
 324
 325 <not-star-or-slash> ::= any character other than "*" or  "/"
 326
 327 <line-comment> ::= "//" <not-newline>$^*$ <newline>
 328
 329 <newline> ::= a newline character
 330
 331 <not-newline> ::= any character other than newline
 332 \end{grammar}
 333
 334 Comments are exactly as in C99: both traditional block comments `\texttt{/*}
 335 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
 336 permitted and ignored.
 337
 338 \subsection{Special nonterminals}
 339 \label{sec:special-nonterminals}
 340
 341 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 342 two special nonterminals occur in the module syntax.
 343
 344 \subsubsection{S-expressions} \label{sec:syntax-sexp}
 345
 346 \begin{grammar}
 347 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 348 \end{grammar}
 349
 350 When an S-expression is expected, the Sod parser simply calls the host Lisp
 351 system's \textsf{read} function.  Sod modules are permitted to modify the
 352 read table to extend the S-expression syntax.
 353
 354 S-expressions are self-delimiting, so no end-marker is needed.
 355
 356 \subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
 357
 358 \begin{grammar}
 359 <c-fragment> ::= a sequence of C tokens, with matching brackets
 360 \end{grammar}
 361
 362 Sequences of C code are simply stored and written to the output unchanged
 363 during translation.  They are read using a simple scanner which nonetheless
 364 understands C comments and string and character literals.
 365
 366 A C fragment is terminated by one of a small number of delimiter characters
 367 determined by the immediately surrounding context -- usually a closing brace
 368 or bracket.  The first such delimiter character which is not enclosed in
 369 brackets, braces or parenthesis ends the fragment.
 370
 371 \subsection{Module syntax} \label{sec:syntax-module}
 372
 373 \begin{grammar}
 374 <module> ::= <definition>$^*$
 375
 376 <definition> ::= <import-definition>
 377 \alt <load-definition>
 378 \alt <lisp-definition>
 379 \alt <code-definition>
 380 \alt <typename-definition>
 381 \alt <class-definition>
 382 \end{grammar}
 383
 384 A module is the top-level syntactic item.  A module consists of a sequence of
 385 definitions.
 386
 387 \subsection{Simple definitions} \label{sec:syntax.defs}
 388
 389 \subsubsection{Importing modules} \label{sec:syntax.defs.import}
 390
 391 \begin{grammar}
 392 <import-definition> ::= "import" <string> ";"
 393 \end{grammar}
 394
 395 The module named @<string> is processed and its definitions made available.
 396
 397 A search is made for a module source file as follows.
 398 \begin{itemize}
 399 \item The module name @<string> is converted into a filename by appending
 400   @`.sod', if it has no extension already.\footnote{%
 401     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 402     :type "SOD" :case :common))}, so exactly what this means varies
 403     according to the host system.} %
 404 \item The file is looked for relative to the directory containing the
 405   importing module.
 406 \item If that fails, then the file is looked for in each directory on the
 407   module search path in turn.
 408 \item If the file still isn't found, an error is reported and the import
 409   fails.
 410 \end{itemize}
 411 At this point, if the file has previously been imported, nothing further
 412 happens.\footnote{%
 413   This check is done using \textsf{truename}, so it should see through simple
 414   tricks like symbolic links.  However, it may be confused by fancy things
 415   like bind mounts and so on.} %
 416
 417 Recursive imports, either direct or indirect, are an error.
 418
 419 \subsubsection{Loading extensions} \label{sec:syntax.defs.load}
 420
 421 \begin{grammar}
 422 <load-definition> ::= "load" <string> ";"
 423 \end{grammar}
 424
 425 The Lisp file named @<string> is loaded and evaluated.
 426
 427 A search is made for a Lisp source file as follows.
 428 \begin{itemize}
 429 \item The name @<string> is converted into a filename by appending @`.lisp',
 430   if it has no extension already.\footnote{%
 431     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 432     :type "LISP" :case :common))}, so exactly what this means varies
 433     according to the host system.} %
 434 \item A search is then made in the same manner as for module imports
 435   (\xref{sec:syntax-module}).
 436 \end{itemize}
 437 If the file is found, it is loaded using the host Lisp's \textsf{load}
 438 function.
 439
 440 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 441 existing compiled files.  The right way to package a substantial extension to
 442 the Sod translator is to provide the extension as a standard ASDF system (or
 443 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
 444 something like
 445 \begin{listing}
 446 (asdf:operate 'asdf:load-op :foo-extension)
 447 \end{listing}
 448 which will arrange for the extension to be compiled if necessary.
 449
 450 (This approach means that the language doesn't need to depend on any
 451 particular system definition facility.  It's bad enough already that it
 452 depends on Common Lisp.)
 453
 454 \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
 455
 456 \begin{grammar}
 457 <lisp-definition> ::= "lisp" <s-expression> ";"
 458 \end{grammar}
 459
 460 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 461
 462 \textbf{Warning!}  This means that hostile Sod modules are a security hazard.
 463 Lisp code can read and write files, start other programs, and make network
 464 connections.  Don't install Sod modules from sources that you don't
 465 trust.\footnote{%
 466   Presumably you were going to run the corresponding code at some point, so
 467   this isn't as unusually scary as it sounds.  But please be careful.} %
 468
 469 \subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
 470
 471 \begin{grammar}
 472 <typename-definition> ::=
 473   "typename" <identifier-list> ";"
 474 \end{grammar}
 475
 476 Each @<identifier> is declared as naming a C type.  This is important because
 477 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 478 done by distinguishing type names from other identifiers.
 479
 480 Don't declare class names using @"typename"; use @"class" forward
 481 declarations instead.
 482
 483 \subsection{Literal code} \label{sec:syntax-code}
 484
 485 \begin{grammar}
 486 <code-definition> ::=
 487   "code" <identifier> ":" <identifier> $[$<constraints>$]$
 488   "{" <c-fragment> "}"
 489
 490 <constraints> ::= "[" <constraint-list> "]"
 491
 492 <constraint> ::= <identifier>$^+$
 493 \end{grammar}
 494
 495 The @<c-fragment> will be output unchanged to one of the output files.
 496
 497 The first @<identifier> is the symbolic name of an output file.  Predefined
 498 output file names are @"c" and @"h", which are the implementation code and
 499 header file respectively; other output files can be defined by extensions.
 500
 501 The second @<identifier> provides a name for the output item.  Several C
 502 fragments can have the same name: they will be concatenated together in the
 503 order in which they were encountered.
 504
 505 The @<constraints> provide a means for specifying where in the output file
 506 the output item should appear.  (Note the two kinds of square brackets shown
 507 in the syntax: square brackets must appear around the constraints if they are
 508 present, but that they may be omitted.)  Each comma-separated @<constraint>
 509 is a sequence of identifiers naming output items, and indicates that the
 510 output items must appear in the order given -- though the translator is free
 511 to insert additional items in between them.  (The particular output items
 512 needn't be defined already -- indeed, they needn't be defined ever.)
 513
 514 There is a predefined output item @"includes" in both the @"c" and @"h"
 515 output files which is a suitable place for inserting @"\#include"
 516 preprocessor directives in order to declare types and functions for use
 517 elsewhere in the generated output files.
 518
 519 \subsection{Property sets} \label{sec:syntax.propset}
 520
 521 \begin{grammar}
 522 <properties> ::= "[" <property-list> "]"
 523
 524 <property> ::= <identifier> "=" <expression>
 525 \end{grammar}
 526
 527 Property sets are a means for associating miscellaneous information with
 528 classes and related items.  By using property sets, additional information
 529 can be passed to extensions without the need to introduce idiosyncratic
 530 syntax.
 531
 532 A property has a name, given as an @<identifier>, and a value computed by
 533 evaluating an @<expression>.  The value can be one of a number of types,
 534 though the only operators currently defined act on integer values only.
 535
 536 \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
 537
 538 \begin{grammar}
 539 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
 540
 541 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
 542
 543 <factor> ::= <primary> | "+" <factor> | "-" <factor>
 544
 545 <primary> ::=
 546      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 547 \alt "?" <s-expression>
 548 \alt "(" <expression> ")"
 549 \end{grammar}
 550
 551 The arithmetic expression syntax is simple and standard; there are currently
 552 no bitwise, logical, or comparison operators.
 553
 554 A @<primary> expression may be a literal or an identifier.  Note that
 555 identifiers stand for themselves: they \emph{do not} denote values.  For more
 556 fancy expressions, the syntax
 557 \begin{quote}
 558   @"?" @<s-expression>
 559 \end{quote}
 560 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 561 function.
 562 %%% FIXME crossref to extension docs
 563
 564 \subsection{C types} \label{sec:syntax.c-types}
 565
 566 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 567 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 568 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 569 `implicit @"int"' is forbidden), and storage-class specifiers are not
 570 recognized.
 571
 572 \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
 573
 574 \begin{grammar}
 575 <declaration-specifier> ::= <type-name>
 576 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 577 \alt "void" | "char" | "int" | "float" | "double"
 578 \alt "short" | "long"
 579 \alt "signed" | "unsigned"
 580 \alt <qualifier>
 581
 582 <qualifier> ::= "const" | "volatile" | "restrict"
 583
 584 <type-name> ::= <identifier>
 585 \end{grammar}
 586
 587 A @<type-name> is an identifier which has been declared as being a type name,
 588 using the @"typename" or @"class" definitions.
 589
 590 Declaration specifiers may appear in any order.  However, not all
 591 combinations are permitted.  A declaration specifier must consist of zero or
 592 more @<qualifiers>, and one of the following, up to reordering.
 593 \begin{itemize}
 594 \item @<type-name>
 595 \item @"struct" <identifier>, @"union" <identifier>, @"enum" <identifier>
 596 \item @"void"
 597 \item @"char", @"unsigned char", @"signed char"
 598 \item @"short", @"unsigned short", @"signed short"
 599 \item @"short int", @"unsigned short int", @"signed short int"
 600 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
 601 \item @"long", @"unsigned long", @"signed long"
 602 \item @"long int", @"unsigned long int", @"signed long int"
 603 \item @"long long", @"unsigned long long", @"signed long long"
 604 \item @"long long int", @"unsigned long long int", @"signed long long int"
 605 \item @"float", @"double", @"long double"
 606 \end{itemize}
 607 All of these have their usual C meanings.
 608
 609 \subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
 610
 611 \begin{grammar}
 612 <declarator> ::=
 613   <pointer>$^*$ <inner-declarator> <declarator-suffix>$^*$
 614
 615 <inner-declarator> ::= <identifier> | <qualified-identifier>
 616 \alt "(" <declarator> ")"
 617
 618 <qualified-identifier> ::= <identifier> "." <identifier>
 619
 620 <pointer> ::= "*" <qualifier>$^*$
 621
 622 <declarator-suffix> ::= "[" <c-fragment> "]"
 623 \alt "(" <arguments> ")"
 624
 625 <arguments> ::= <empty> | "..."
 626 \alt <argument-list> $[$"," "..."$]$
 627
 628 <argument> ::= <declaration-specifier>$^+$ <argument-declarator>
 629
 630 <argument-declarator> ::= <declarator> | $[$<abstract-declarator>$]$
 631
 632 <abstract-declarator> ::=
 633   <pointer>$^+$ | <pointer>$^*$ <inner-abstract-declarator>
 634
 635 <inner-abstract-declarator> ::= "(" <abstract-declarator> ")"
 636 \alt $[$<inner-abstract-declarator>$]$ <declarator-suffix>$^+$
 637 \end{grammar}
 638
 639 The declarator syntax is taken from C, but with some differences.
 640 \begin{itemize}
 641 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 642   closing square bracket.  This allows array dimensions to contain arbitrary
 643   constant expressions.
 644 \item A declarator may have either a single @<identifier> at its centre or a
 645   pair of @<identifier>s separated by a @`.'; this is used to refer to
 646   slots or messages defined in superclasses.
 647 \end{itemize}
 648 The remaining differences are (I hope) a matter of presentation rather than
 649 substance.
 650
 651 \subsection{Defining classes} \label{sec:syntax.class}
 652
 653 \begin{grammar}
 654 <class-definition> ::= <class-forward-declaration>
 655 \alt <full-class-definition>
 656 \end{grammar}
 657
 658 \subsubsection{Forward declarations} \label{sec:class.class.forward}
 659
 660 \begin{grammar}
 661 <class-forward-declaration> ::= "class" <identifier> ";"
 662 \end{grammar}
 663
 664 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 665 to name a class which is currently undefined.  Forward declarations are
 666 necessary in order to resolve certain kinds of circularity.  For example,
 667 \begin{listing}
 668 class Sub;
 669
 670 class Super : SodObject {
 671   Sub *sub;
 672 };
 673
 674 class Sub : Super {
 675   /* ... */
 676 };
 677 \end{listing}
 678
 679 \subsubsection{Full class definitions} \label{sec:class.class.full}
 680
 681 \begin{grammar}
 682 <full-class-definition> ::=
 683   $[$<properties>$]$
 684   "class" <identifier> ":" <identifier-list>
 685   "{" <class-item>$^*$ "}"
 686
 687 <class-item> ::= <slot-item> ";"
 688 \alt <message-item>
 689 \alt <method-item>
 690 \alt  <initializer-item> ";"
 691 \end{grammar}
 692
 693 A full class definition provides a complete description of a class.
 694
 695 The first @<identifier> gives the name of the class.  It is an error to
 696 give the name of an existing class (other than a forward-referenced class),
 697 or an existing type name.  It is conventional to give classes `MixedCase'
 698 names, to distinguish them from other kinds of identifiers.
 699
 700 The @<identifier-list> names the direct superclasses for the new class.  It
 701 is an error if any of these @<identifier>s does not name a defined class.
 702
 703 The @<properties> provide additional information.  The standard class
 704 properties are as follows.
 705 \begin{description}
 706 \item[@"lisp_class"] The name of the Lisp class to use within the translator
 707   to represent this class.  The property value must be an identifier; the
 708   default is @"sod_class".  Extensions may define classes with additional
 709   behaviour, and may recognize additional class properties.
 710 \item[@"metaclass"] The name of the Sod metaclass for this class.  In the
 711   generated code, a class is itself an instance of another class -- its
 712   \emph{metaclass}.  The metaclass defines which slots the class will have,
 713   which messages it will respond to, and what its behaviour will be when it
 714   receives them.  The property value must be an identifier naming a defined
 715   subclass of @"SodClass".  The default metaclass is @"SodClass".
 716   %%% FIXME xref to theory
 717 \item[@"nick"] A nickname for the class, to be used to distinguish it from
 718   other classes in various limited contexts.  The property value must be an
 719   identifier; the default is constructed by forcing the class name to
 720   lower-case.
 721 \end{description}
 722
 723 The class body consists of a sequence of @<class-item>s enclosed in braces.
 724 These items are discussed on the following sections.
 725
 726 \subsubsection{Slot items} \label{sec:sntax.class.slot}
 727
 728 \begin{grammar}
 729 <slot-item> ::=
 730   $[$<properties>$]$
 731   <declaration-specifier>$^+$ <init-declarator-list>
 732
 733 <init-declarator> ::= <declarator> $[$"=" <initializer>$]$
 734 \end{grammar}
 735
 736 A @<slot-item> defines one or more slots.  All instances of the class and any
 737 subclass will contain these slot, with the names and types given by the
 738 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 739 contain qualified identifiers.
 740
 741 It is not possible to declare a slot with function type: such an item is
 742 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 743 functions are fine.
 744
 745 An @<initializer>, if present, is treated as if a separate
 746 @<initializer-item> containing the slot name and initializer were present.
 747 For example,
 748 \begin{listing}
 749 [nick = eg]
 750 class Example : Super {
 751   int foo = 17;
 752 };
 753 \end{listing}
 754 means the same as
 755 \begin{listing}
 756 [nick = eg]
 757 class Example : Super {
 758   int foo;
 759   eg.foo = 17;
 760 };
 761 \end{listing}
 762
 763 \subsubsection{Initializer items} \label{sec:syntax.class.init}
 764
 765 \begin{grammar}
 766 <initializer-item> ::= $[$"class"$]$ <slot-initializer-list>
 767
 768 <slot-initializer> ::= <qualified-identifier> "=" <initializer>
 769
 770 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
 771 \end{grammar}
 772
 773 An @<initializer-item> provides an initial value for one or more slots.  If
 774 prefixed by @"class", then the initial values are for class slots (i.e.,
 775 slots of the class object itself); otherwise they are for instance slots.
 776
 777 The first component of the @<qualified-identifier> must be the nickname of
 778 one of the class's superclasses (including itself); the second must be the
 779 name of a slot defined in that superclass.
 780
 781 The initializer has one of two forms.
 782 \begin{itemize}
 783 \item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
 784   This is suitable for initializing structure, union or array slots.
 785 \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
 786   initializer, and continues until the next @`,' or @`;' which is not within
 787   nested brackets.  Bare initializers are suitable for initializing scalar
 788   slots, such as pointers or integers, and strings.
 789 \end{itemize}
 790
 791 \subsubsection{Message items} \label{sec:syntax.class.message}
 792
 793 \begin{grammar}
 794 <message-item> ::=
 795   $[$<properties>$]$
 796   <declaration-specifier>$^+$ <declarator> $[$<method-body>$]$
 797 \end{grammar}
 798
 799 \subsubsection{Method items} \label{sec:syntax.class.method}
 800
 801 \begin{grammar}
 802 <method-item> ::=
 803   $[$<properties>$]$
 804   <declaration-specifier>$^+$ <declarator> <method-body>
 805
 806 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 807 \end{grammar}
 808
 809 %%%--------------------------------------------------------------------------
 810 \section{Class objects}
 811
 812 \begin{listing}
 813 typedef struct SodClass__ichain_obj SodClass;
 814
 815 struct sod_chain {
 816   size_t n_classes;                     /* Number of classes in chain */
 817   const SodClass *const *classes;       /* Vector of classes, head first */
 818   size_t off_ichain;                    /* Offset of ichain from instance base */
 819   const struct sod_vtable *vt;          /* Vtable pointer for chain */
 820   size_t ichainsz;                      /* Size of the ichain structure */
 821 };
 822
 823 struct sod_vtable {
 824   SodClass *_class;                     /* Pointer to instance's class */
 825   size_t _base;                         /* Offset to instance base */
 826 };
 827
 828 struct SodClass__islots {
 829
 830   /* Basic information */
 831   const char *name;                     /* The class's name as a string */
 832   const char *nick;                     /* The nickname as a string */
 833
 834   /* Instance allocation and initialization */
 835   size_t instsz;                        /* Instance layout size in bytes */
 836   void *(*imprint)(void *);             /* Stamp instance with vtable ptrs */
 837   void *(*init)(void *);                /* Initialize instance */
 838
 839   /* Superclass structure */
 840   size_t n_supers;                      /* Number of direct superclasses */
 841   const SodClass *const *supers;        /* Vector of direct superclasses */
 842   size_t n_cpl;                         /* Length of class precedence list */
 843   const SodClass *const *cpl;           /* Vector for class precedence list */
 844
 845   /* Chain structure */
 846   const SodClass *link;                 /* Link to next class in chain */
 847   const SodClass *head;                 /* Pointer to head of chain */
 848   size_t level;                         /* Index of class in its chain */
 849   size_t n_chains;                      /* Number of superclass chains */
 850   const sod_chain *chains;              /* Vector of chain structures */
 851
 852   /* Layout */
 853   size_t off_islots;                    /* Offset of islots from ichain base */
 854   size_t islotsz;                       /* Size of instance slots */
 855 };
 856
 857 struct SodClass__ichain_obj {
 858   const SodClass__vt_obj *_vt;
 859   struct SodClass__islots cls;
 860 };
 861
 862 struct sod_instance {
 863   struct sod_vtable *_vt;
 864 };
 865 \end{listing}
 866
 867 \begin{listing}
 868 void *sod_convert(const SodClass *cls, const void *obj)
 869 {
 870   const struct sod_instance *inst = obj;
 871   const SodClass *real = inst->_vt->_cls;
 872   const struct sod_chain *chain;
 873   size_t i, index;
 874
 875   for (i = 0; i < real->cls.n_chains; i++) {
 876     chain = &real->cls.chains[i];
 877     if (chain->classes[0] == cls->cls.head) {
 878       index = cls->cls.index;
 879       if (index < chain->n_classes && chain->classes[index] == cls)
 880         return ((char *)cls - inst->_vt._base + chain->off_ichain);
 881       else
 882         return (0);
 883     }
 884   }
 885   return (0);
 886 }
 887 \end{listing}
 888
 889 %%%--------------------------------------------------------------------------
 890 \section{Classes}
 891
 892 \subsection{Classes and superclasses}
 893
 894 A @<full-class-definition> must list one or more existing classes to be the
 895 \emph{direct superclasses} for the new class being defined.  We make the
 896 following definitions.
 897 \begin{itemize}
 898 \item The \emph{superclasses} of a class consist of the class itself together
 899   with the superclasses of its direct superclasses.
 900 \item The \emph{proper superclasses} of a class are its superclasses other
 901   than itself.
 902 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
 903   \emph{subclass} of $C$.
 904 \end{itemize}
 905 The predefined class @|SodObject| has no direct superclasses; it is unique in
 906 this respect.  All classes are subclasses of @|SodObject|.
 907
 908 \subsection{The class precedence list}
 909
 910 Let $C$ be a class.  The superclasses of $C$ form a directed graph, with an
 911 edge from each class to each of its direct superclasses.  This is the
 912 \emph{superclass graph of $C$}.
 913
 914 In order to resolve inheritance of items, we define a \emph{class precedence
 915   list} (or CPL) for each class, which imposes a total order on that class's
 916 superclasses.  The default algorithm for computing the CPL is the \emph{C3}
 917 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
 918
 919 The default algorithm works as follows.  Let $C$ be the class whose CPL we
 920 are to compute.  Let $X$ and $Y$ be two of $C$'s superclasses.
 921 \begin{itemize}
 922 \item $C$ must appear first in the CPL.
 923 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
 924   superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
 925 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
 926   of $X$ and $Y$ has a subclass which appears further left in the list of
 927   $C$'s direct superclasses will appear earlier in the CPL.
 928 \end{itemize}
 929 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
 930 superclasses of the same direct superclass of $C$ then that direct
 931 superclass's CPL will order $X$ and $Y$.
 932
 933 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
 934 $X$ is earlier than $Y$ in $C$'s class precedence list.  If $C$ is clear from
 935 context then we omit it, saying simply that $X$ is more specific than $Y$.
 936
 937 \subsection{Instances and metaclasses}
 938
 939 A class defines the structure and behaviour of its \emph{instances}: run-time
 940 objects created (possibly) dynamically.  An instance is an instance of only
 941 one class, though structurally it may be used in place of an instance of any
 942 of that class's superclasses.  It is possible, with care, to change the class
 943 of an instance at run-time.
 944
 945 Classes are themselves represented as instances -- called \emph{class
 946   objects} -- in the running program.  Being instances, they have a class,
 947 called the \emph{metaclass}.  The metaclass defines the structure and
 948 behaviour of the class object.
 949
 950 The predefined class @|SodClass| is the default metaclass for new classes.
 951 @|SodClass| has @|SodObject| as its only direct superclass.  @|SodClass| is
 952 its own metaclass.
 953
 954 \subsection{Items and inheritance}
 955
 956 A class definition also declares \emph{slots}, \emph{messages},
 957 \emph{initializers} and \emph{methods} -- collectively referred to as
 958 \emph{items}.  In addition to the items declared in the class definition --
 959 the class's \emph{direct items} -- a class also \emph{inherits} items from
 960 its superclasses.
 961
 962 The precise rules for item inheritance vary according to the kinds of items
 963 involved.
 964
 965 Some object systems have a notion of `repeated inheritance': if there are
 966 multiple paths in the superclass graph from a class to one of its
 967 superclasses then items defined in that superclass may appear duplicated in
 968 the subclass.  Sod does not have this notion.
 969
 970 \subsubsection{Slots}
 971 A \emph{slot} is a unit of state.  In other object systems, slots may be
 972 called `fields', `member variables', or `instance variables'.
 973
 974 A slot has a \emph{name} and a \emph{type}.  The name serves only to
 975 distinguish the slot from other direct slots defined by the same class.  A
 976 class inherits all of its proper superclasses' slots.  Slots inherited from
 977 superclasses do not conflict with each other or with direct slots, even if
 978 they have the same names.
 979
 980 At run-time, each instance of the class holds a separate value for each slot,
 981 whether direct or inherited.  Changing the value of an instance's slot
 982 doesn't affect other instances.
 983
 984 \subsubsection{Initializers}
 985 Mumble.
 986
 987 \subsubsection{Messages}
 988 A \emph{message} is the stimulus for behaviour.  In Sod, a class must define,
 989 statically, the name and format of the messages it is able to receive and the
 990 values it will return in reply.  In this respect, a message is similar to
 991 `abstract member functions' or `interface member functions' in other object
 992 systems.
 993
 994 Like slots, a message has a \emph{name} and a \emph{type}.  Again, the name
 995 serves only to distinguish the message from other direct messages defined by
 996 the same class.  Messages inherited from superclasses do not conflict with
 997 each other or with direct messages, even if they have the same name.
 998
 999 At run-time, one sends a message to an instance by invoking a function
1000 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
1001
1002 \subsubsection{Methods}
1003 A \emph{method} is a unit of behaviour.  In other object systems, methods may
1004 be called `member functions'.
1005
1006 A method is associated with a message.  When a message is received by an
1007 instance, all of the methods associated with that message on the instance's
1008 class or any of its superclasses are \emph{applicable}.  The details of how
1009 the applicable methods are invoked are described fully in
1010 \xref{sec:fixme-method-combination}.
1011
1012 \subsection{Chains and instance layout}
1013
1014 \include{sod-backg}
1015 \include{sod-protocol}
1016
1017 \end{document}
1018 \f
1019 %%% Local variables:
1020 %%% mode: LaTeX
1021 %%% TeX-PDF-mode: t
1022 %%% End: