chiark - git - mdw - sod/blob - doc/sod.tex

   1 \documentclass[noarticle]{strayman}
   2
   3 \usepackage[T1]{fontenc}
   4 \usepackage[utf8]{inputenc}
   5 \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
   6 \usepackage{syntax}
   7 \usepackage{sverb}
   8 \usepackage{mdwtab}
   9 \usepackage{footnote}
  10 \usepackage{at}
  11 \usepackage{mdwref}
  12
  13 \title{A Sensible Object Design for C}
  14 \author{Mark Wooding}
  15
  16 \makeatletter
  17
  18 \errorcontextlines999
  19
  20 \def\syntleft{\normalfont\itshape}
  21 \let\syntright\empty
  22
  23 \let\codeface\sffamily
  24
  25 \def\ulitleft{\normalfont\codeface}
  26 \let\ulitright\empty
  27
  28 \let\listingsize\relax
  29
  30 \let\epsilon\varepsilon
  31
  32 \atdef <#1>{\synt{#1}\@scripts}
  33 \atdef "#1"{\lit*{#1}\@scripts}
  34 \atdef `#1'{\lit{#1}\@scripts}
  35 \atdef |#1|{\textsf{#1}\@scripts}
  36 \def\dbl@maybe#1{\let\@tempa#1\futurelet\@ch\dbl@maybe@i}
  37 \def\dbl@maybe@i{\m@maybe\ifx\@ch\@tempa\@tempa\!\@tempa%
  38   \expandafter\@firstoftwo\expandafter\@scripts%
  39   \else\@tempa\expandafter\@scripts\fi}
  40 \atdef [{\dbl@maybe[}
  41 \atdef ]{\dbl@maybe]}
  42 \atdef {{\m@maybe\{\@scripts}
  43 \atdef }{\m@maybe\}\@scripts}
  44 \atdef ({\m@maybe(\@scripts}
  45 \atdef ){\m@maybe)\@scripts}
  46 \atdef !{\m@maybe|\@scripts}
  47 \atdef to{\leavevmode\unskip\quad\m@maybe\longrightarrow\m@maybe@end\quad}
  48 \let\m@maybe@end\relax
  49 \def\m@maybe{\ifmmode\else$\let\m@maybe@end$\fi}
  50 \def\@scripts{\futurelet\@ch\@scripts@i}
  51
  52 \atdef ;#1\\{\normalfont\itshape;#1\\}
  53 \let\@@grammar\grammar
  54 \def\grammar{\def\textbar{\hbox{$|$}}\@@grammar}
  55
  56 \begingroup\lccode`\~=`\_\lowercase{\endgroup
  57 \def\@scripts@i{\if1\ifx\@ch~1\else\ifx\@ch^1\else0\fi\fi%
  58   \expandafter\@scripts@ii\else\expandafter\m@maybe@end\fi}}
  59 \def\@scripts@ii#1#2{\m@maybe#1{#2}\@scripts}
  60
  61 \def\Cplusplus{C\kern-\p@++}
  62 \def\Csharp{C\#}
  63 \def\man#1#2{\textbf{#1}(#2)}
  64
  65 \begingroup\lccode`\~=`\
  66 \lowercase{
  67 \endgroup
  68 \def\prog{%
  69   \codeface%
  70   \quote%
  71   \let\old@nl\\%
  72   \obeylines%
  73   \tabbing%
  74   \global\let~\\%
  75   \global\let\\\textbackslash%
  76 }
  77 \def\endprog{%
  78   \endtabbing%
  79   \global\let\\\old@nl%
  80   \endquote%
  81 }}
  82
  83 \newenvironment{boxy}[1][\q@]{%
  84   \dimen@\linewidth\advance\dimen@-1.2pt\advance\dimen@-2ex%
  85   \medskip%
  86   \vbox\bgroup\hrule\hbox\bgroup\vrule%
  87   \vbox\bgroup\vskip1ex\hbox\bgroup\hskip1ex\minipage\dimen@%
  88   \def\@temp{#1}\ifx\@temp\q@\else\leavevmode{\headfam\bfseries#1\quad}\fi%
  89 }{%
  90   \endminipage\hskip1ex\egroup\vskip1ex\egroup%
  91   \vrule\egroup\hrule\egroup%
  92   \medskip%
  93 }
  94
  95 \def\definedescribecategory#1#2{\@namedef{cat!#1}{#2}}
  96 \def\describecategoryname#1{%
  97   \expandafter\let\expandafter\@tempa\csname cat!#1\endcsname%
  98   \ifx\@tempa\relax#1\else\@tempa\fi}
  99 \definedescribecategory{fun}{function}
 100 \definedescribecategory{gf}{generic function}
 101 \definedescribecategory{var}{variable}
 102 \definedescribecategory{const}{constant}
 103 \definedescribecategory{meth}{primary method}
 104 \definedescribecategory{ar-meth}{around-method}
 105 \definedescribecategory{be-meth}{before-method}
 106 \definedescribecategory{af-meth}{after-method}
 107 \definedescribecategory{cls}{class}
 108 \definedescribecategory{ty}{type}
 109 \definedescribecategory{mac}{macro}
 110
 111 \def\q@{\q@}
 112 \newenvironment{describe}[3][\q@]{%
 113   \normalfont%
 114   \par\goodbreak%
 115   \vspace{\bigskipamount}%
 116   \setbox\z@\hbox{\bfseries[\describecategoryname{#2}]}%
 117   \dimen@\linewidth\advance\dimen@-\wd\z@%
 118   \def\@temp##1 ##2\q@{\message{#2:##1}\label{#2:##1}}%
 119   \def\@tempa{#1}\ifx\@tempa\q@\@temp#3 \q@\else\@temp{#1} \\\fi%
 120   \edef\@temp{{\the\linewidth}{@{}p{\the\dimen@}%
 121       @{\extracolsep{\fill}}l@{\extracolsep{0pt}}}}%
 122   \noindent\csname tabular*\expandafter\endcsname\@temp%
 123   \tabbing\codeface#3\endtabbing&\unhbox\z@\\\endtabular%
 124 %  \@afterheading%
 125   \list{}{\rightmargin\z@}\item%
 126 }{%
 127   \endlist%
 128 }
 129
 130 \def\push{\quad\=\+\kill}
 131
 132 \begin{document}
 133
 134 \maketitle
 135
 136 \include{sod-tut}
 137
 138 %%%--------------------------------------------------------------------------
 139 \chapter{Internals}
 140
 141 \section{Generated names}
 142
 143 The generated names for functions and objects related to a class are
 144 constructed systematically so as not to interfere with each other.  The rules
 145 on class, slot and message naming exist so as to ensure that the generated
 146 names don't collide with each other.
 147
 148 The following notation is used in this section.
 149 \begin{description}
 150 \item[@<class>] The full name of the `focus' class: the one for which we are
 151   generating name.
 152 \item[@<super-nick>] The nickname of a superclass.
 153 \item[@<head-nick>] The nickname of the chain-head class of the chain
 154   in question.
 155 \end{description}
 156
 157 \subsection{Instance layout}
 158
 159 %%%--------------------------------------------------------------------------
 160 \section{Syntax}
 161 \label{sec:syntax}
 162
 163 Fortunately, Sod is syntactically quite simple.  I've used a little slightly
 164 unusual notation in order to make the presentation easier to read.
 165 \begin{itemize}
 166 \item $\epsilon$ denotes the empty nonterminal:
 167   \begin{quote}
 168     $\epsilon$ ::=
 169   \end{quote}
 170 \item @[@<item>@] means an optional @<item>:
 171   \begin{quote}
 172     \syntax{@[<item>@] ::= $\epsilon$ | <item>}
 173   \end{quote}
 174 \item @<item>^* means a sequence of zero or more @<item>s:
 175   \begin{quote}
 176     \syntax{@<item>^* ::= $\epsilon$ | @<item>^* <item>}
 177   \end{quote}
 178 \item @<item>^+ means a sequence of one or more @<item>s:
 179   \begin{quote}
 180     \syntax{@<item>^+ ::= <item> @<item>^*}
 181   \end{quote}
 182 \item @<item-list> means a sequence of one or more @<item>s separated
 183   by commas:
 184   \begin{quote}
 185     \syntax{<item-list> ::= <item> | <item-list> "," <item>}
 186   \end{quote}
 187 \end{itemize}
 188
 189 \subsection{Lexical syntax}
 190 \label{sec:syntax.lex}
 191
 192 Whitespace and comments are discarded.  The remaining characters are
 193 collected into tokens according to the following syntax.
 194
 195 \begin{grammar}
 196 <token> ::= <identifier>
 197 \alt <reserved-word>
 198 \alt <string-literal>
 199 \alt <char-literal>
 200 \alt <integer-literal>
 201 \alt <punctuation>
 202 \end{grammar}
 203
 204 This syntax is slightly ambiguous.  The following two rules serve to
 205 disambiguate:
 206 \begin{enumerate}
 207 \item Reserved words take precedence.  All @<reserved-word>s are
 208   syntactically @<identifier>s; Sod resolves the ambiguity in favour of
 209   @<reserved-word>.
 210 \item `Maximal munch'.  In other cases, at each stage we take the longest
 211   sequence of characters which could be a token.
 212 \end{enumerate}
 213
 214 \subsubsection{Identifiers} \label{sec:syntax.lex.id}
 215
 216 \begin{grammar}
 217 <identifier> ::= <id-start-char> @<id-body-char>^*
 218
 219 <id-start-char> ::= <alpha-char> | "_"
 220
 221 <id-body-char> ::= <id-start-char> @! <digit-char>
 222
 223 <alpha-char> ::= "A" | "B" | \dots\ | "Z"
 224 \alt "a" | "b" | \dots\ | "z"
 225 \alt <extended-alpha-char>
 226
 227 <digit-char> ::= "0" | <nonzero-digit-char>
 228
 229 <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
 230 \end{grammar}
 231
 232 The precise definition of @<alpha-char> is left to the function
 233 \textsf{alpha-char-p} in the hosting Lisp system.  For portability,
 234 programmers are encouraged to limit themselves to the standard ASCII letters.
 235
 236 \subsubsection{Reserved words} \label{sec:syntax.lex.reserved}
 237
 238 \begin{grammar}
 239 <reserved-word> ::=
 240 "char" | "class" | "code" | "const" | "double" | "enum" |
 241 "extern" | "float" | "import" | "int" | "lisp" | "load" | "long"
 242 | "restrict" | "short" | "signed" | "struct" | "typename" |
 243 "union" | "unsigned" | "void" | "volatile"
 244 \end{grammar}
 245
 246 Many of these are borrowed from~C; however, some (e.g., @"import" and
 247 @"lisp") are not, and some C reserved words are not reserved (e.g.,
 248 @"static").
 249
 250 \subsubsection{String and character literals} \label{sec:syntax.lex.string}
 251
 252 \begin{grammar}
 253 <string-literal> ::= "\"" @<string-literal-char>^* "\""
 254
 255 <char-literal> ::= "'" <char-literal-char> "'"
 256
 257 <string-literal-char> ::= any character other than "\\" or "\""
 258 \alt "\\" <char>
 259
 260 <char-literal-char> ::= any character other than "\\" or "'"
 261 \alt "\\" <char>
 262
 263 <char> ::= any single character
 264 \end{grammar}
 265
 266 The syntax for string and character literals differs from~C.  In particular,
 267 escape sequences such as @`\textbackslash n' are not recognized.  The use
 268 of string and character literals in Sod, outside of C~fragments, is limited,
 269 and the simple syntax seems adequate.  For the sake of future compatibility,
 270 the use of character sequences which resemble C escape sequences is
 271 discouraged.
 272
 273 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
 274
 275 \begin{grammar}
 276 <integer-literal> ::= <decimal-integer>
 277 \alt <binary-integer>
 278 \alt <octal-integer>
 279 \alt <hex-integer>
 280
 281 <decimal-integer> ::= <nonzero-digit-char> @<digit-char>^*
 282
 283 <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
 284
 285 <binary-digit-char> ::= "0" | "1"
 286
 287 <octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
 288
 289 <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
 290
 291 <hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
 292
 293 <hex-digit-char> ::= <digit-char>
 294 \alt "A" | "B" | "C" | "D" | "E" | "F"
 295 \alt "a" | "b" | "c" | "d" | "e" | "f"
 296 \end{grammar}
 297
 298 Sod understands only integers, not floating-point numbers; its integer syntax
 299 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 300 binary.  However, length and signedness indicators are not permitted.
 301
 302 \subsubsection{Punctuation} \label{sec:syntax.lex.punct}
 303
 304 \begin{grammar}
 305 <punctuation> ::= any character other than "\"" or "'"
 306 \end{grammar}
 307
 308 Due to the `maximal munch' rule, @<punctuation> tokens cannot be
 309 alphanumeric.
 310
 311 \subsubsection{Comments} \label{sec:lex-comment}
 312
 313 \begin{grammar}
 314 <comment> ::= <block-comment>
 315 \alt <line-comment>
 316
 317 <block-comment> ::=
 318   "/*"
 319   @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
 320   @<star>^*
 321   "*/"
 322
 323 <star> ::= "*"
 324
 325 <not-star> ::= any character other than "*"
 326
 327 <not-star-or-slash> ::= any character other than "*" or  "/"
 328
 329 <line-comment> ::= "//" @<not-newline>^* <newline>
 330
 331 <newline> ::= a newline character
 332
 333 <not-newline> ::= any character other than newline
 334 \end{grammar}
 335
 336 Comments are exactly as in C99: both traditional block comments `\texttt{/*}
 337 \dots\ \texttt{*/}' and \Cplusplus-style `\texttt{//} \dots' comments are
 338 permitted and ignored.
 339
 340 \subsection{Special nonterminals}
 341 \label{sec:special-nonterminals}
 342
 343 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 344 two special nonterminals occur in the module syntax.
 345
 346 \subsubsection{S-expressions} \label{sec:syntax-sexp}
 347
 348 \begin{grammar}
 349 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 350 \end{grammar}
 351
 352 When an S-expression is expected, the Sod parser simply calls the host Lisp
 353 system's \textsf{read} function.  Sod modules are permitted to modify the
 354 read table to extend the S-expression syntax.
 355
 356 S-expressions are self-delimiting, so no end-marker is needed.
 357
 358 \subsubsection{C fragments} \label{sec:syntax.lex.cfrag}
 359
 360 \begin{grammar}
 361 <c-fragment> ::= a sequence of C tokens, with matching brackets
 362 \end{grammar}
 363
 364 Sequences of C code are simply stored and written to the output unchanged
 365 during translation.  They are read using a simple scanner which nonetheless
 366 understands C comments and string and character literals.
 367
 368 A C fragment is terminated by one of a small number of delimiter characters
 369 determined by the immediately surrounding context -- usually a closing brace
 370 or bracket.  The first such delimiter character which is not enclosed in
 371 brackets, braces or parenthesis ends the fragment.
 372
 373 \subsection{Module syntax} \label{sec:syntax-module}
 374
 375 \begin{grammar}
 376 <module> ::= @<definition>^*
 377
 378 <definition> ::= <import-definition>
 379 \alt <load-definition>
 380 \alt <lisp-definition>
 381 \alt <code-definition>
 382 \alt <typename-definition>
 383 \alt <class-definition>
 384 \end{grammar}
 385
 386 A module is the top-level syntactic item.  A module consists of a sequence of
 387 definitions.
 388
 389 \subsection{Simple definitions} \label{sec:syntax.defs}
 390
 391 \subsubsection{Importing modules} \label{sec:syntax.defs.import}
 392
 393 \begin{grammar}
 394 <import-definition> ::= "import" <string> ";"
 395 \end{grammar}
 396
 397 The module named @<string> is processed and its definitions made available.
 398
 399 A search is made for a module source file as follows.
 400 \begin{itemize}
 401 \item The module name @<string> is converted into a filename by appending
 402   @`.sod', if it has no extension already.\footnote{%
 403     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 404     :type "SOD" :case :common))}, so exactly what this means varies
 405     according to the host system.} %
 406 \item The file is looked for relative to the directory containing the
 407   importing module.
 408 \item If that fails, then the file is looked for in each directory on the
 409   module search path in turn.
 410 \item If the file still isn't found, an error is reported and the import
 411   fails.
 412 \end{itemize}
 413 At this point, if the file has previously been imported, nothing further
 414 happens.\footnote{%
 415   This check is done using \textsf{truename}, so it should see through simple
 416   tricks like symbolic links.  However, it may be confused by fancy things
 417   like bind mounts and so on.} %
 418
 419 Recursive imports, either direct or indirect, are an error.
 420
 421 \subsubsection{Loading extensions} \label{sec:syntax.defs.load}
 422
 423 \begin{grammar}
 424 <load-definition> ::= "load" <string> ";"
 425 \end{grammar}
 426
 427 The Lisp file named @<string> is loaded and evaluated.
 428
 429 A search is made for a Lisp source file as follows.
 430 \begin{itemize}
 431 \item The name @<string> is converted into a filename by appending @`.lisp',
 432   if it has no extension already.\footnote{%
 433     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 434     :type "LISP" :case :common))}, so exactly what this means varies
 435     according to the host system.} %
 436 \item A search is then made in the same manner as for module imports
 437   (\xref{sec:syntax-module}).
 438 \end{itemize}
 439 If the file is found, it is loaded using the host Lisp's \textsf{load}
 440 function.
 441
 442 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 443 existing compiled files.  The right way to package a substantial extension to
 444 the Sod translator is to provide the extension as a standard ASDF system (or
 445 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
 446 something like
 447 \begin{listing}
 448 (asdf:operate 'asdf:load-op :foo-extension)
 449 \end{listing}
 450 which will arrange for the extension to be compiled if necessary.
 451
 452 (This approach means that the language doesn't need to depend on any
 453 particular system definition facility.  It's bad enough already that it
 454 depends on Common Lisp.)
 455
 456 \subsubsection{Lisp escapes} \label{sec:syntax.defs.lisp}
 457
 458 \begin{grammar}
 459 <lisp-definition> ::= "lisp" <s-expression> ";"
 460 \end{grammar}
 461
 462 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 463
 464 \textbf{Warning!}  This means that hostile Sod modules are a security hazard.
 465 Lisp code can read and write files, start other programs, and make network
 466 connections.  Don't install Sod modules from sources that you don't
 467 trust.\footnote{%
 468   Presumably you were going to run the corresponding code at some point, so
 469   this isn't as unusually scary as it sounds.  But please be careful.} %
 470
 471 \subsubsection{Declaring type names} \label{sec:syntax.defs.typename}
 472
 473 \begin{grammar}
 474 <typename-definition> ::=
 475   "typename" <identifier-list> ";"
 476 \end{grammar}
 477
 478 Each @<identifier> is declared as naming a C type.  This is important because
 479 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 480 done by distinguishing type names from other identifiers.
 481
 482 Don't declare class names using @"typename"; use @"class" forward
 483 declarations instead.
 484
 485 \subsection{Literal code} \label{sec:syntax-code}
 486
 487 \begin{grammar}
 488 <code-definition> ::=
 489   "code" <identifier> ":" <identifier> @[<constraints>@]
 490   "{" <c-fragment> "}"
 491
 492 <constraints> ::= "[" <constraint-list> "]"
 493
 494 <constraint> ::= @<identifier>^+
 495 \end{grammar}
 496
 497 The @<c-fragment> will be output unchanged to one of the output files.
 498
 499 The first @<identifier> is the symbolic name of an output file.  Predefined
 500 output file names are @"c" and @"h", which are the implementation code and
 501 header file respectively; other output files can be defined by extensions.
 502
 503 The second @<identifier> provides a name for the output item.  Several C
 504 fragments can have the same name: they will be concatenated together in the
 505 order in which they were encountered.
 506
 507 The @<constraints> provide a means for specifying where in the output file
 508 the output item should appear.  (Note the two kinds of square brackets shown
 509 in the syntax: square brackets must appear around the constraints if they are
 510 present, but that they may be omitted.)  Each comma-separated @<constraint>
 511 is a sequence of identifiers naming output items, and indicates that the
 512 output items must appear in the order given -- though the translator is free
 513 to insert additional items in between them.  (The particular output items
 514 needn't be defined already -- indeed, they needn't be defined ever.)
 515
 516 There is a predefined output item @"includes" in both the @"c" and @"h"
 517 output files which is a suitable place for inserting @"\#include"
 518 preprocessor directives in order to declare types and functions for use
 519 elsewhere in the generated output files.
 520
 521 \subsection{Property sets} \label{sec:syntax.propset}
 522
 523 \begin{grammar}
 524 <properties> ::= "[" <property-list> "]"
 525
 526 <property> ::= <identifier> "=" <expression>
 527 \end{grammar}
 528
 529 Property sets are a means for associating miscellaneous information with
 530 classes and related items.  By using property sets, additional information
 531 can be passed to extensions without the need to introduce idiosyncratic
 532 syntax.
 533
 534 A property has a name, given as an @<identifier>, and a value computed by
 535 evaluating an @<expression>.  The value can be one of a number of types,
 536 though the only operators currently defined act on integer values only.
 537
 538 \subsubsection{The expression evaluator} \label{sec:syntax.propset.expr}
 539
 540 \begin{grammar}
 541 <expression> ::= <term> | <expression> "+" <term> | <expression> "-" <term>
 542
 543 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
 544
 545 <factor> ::= <primary> | "+" <factor> | "-" <factor>
 546
 547 <primary> ::=
 548      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 549 \alt "?" <s-expression>
 550 \alt "(" <expression> ")"
 551 \end{grammar}
 552
 553 The arithmetic expression syntax is simple and standard; there are currently
 554 no bitwise, logical, or comparison operators.
 555
 556 A @<primary> expression may be a literal or an identifier.  Note that
 557 identifiers stand for themselves: they \emph{do not} denote values.  For more
 558 fancy expressions, the syntax
 559 \begin{quote}
 560   @"?" @<s-expression>
 561 \end{quote}
 562 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 563 function.
 564 %%% FIXME crossref to extension docs
 565
 566 \subsection{C types} \label{sec:syntax.c-types}
 567
 568 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 569 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 570 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 571 `implicit @"int"' is forbidden), and storage-class specifiers are not
 572 recognized.
 573
 574 \subsubsection{Declaration specifiers} \label{sec:syntax.c-types.declspec}
 575
 576 \begin{grammar}
 577 <declaration-specifier> ::= <type-name>
 578 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 579 \alt "void" | "char" | "int" | "float" | "double"
 580 \alt "short" | "long"
 581 \alt "signed" | "unsigned"
 582 \alt <qualifier>
 583
 584 <qualifier> ::= "const" | "volatile" | "restrict"
 585
 586 <type-name> ::= <identifier>
 587 \end{grammar}
 588
 589 A @<type-name> is an identifier which has been declared as being a type name,
 590 using the @"typename" or @"class" definitions.
 591
 592 Declaration specifiers may appear in any order.  However, not all
 593 combinations are permitted.  A declaration specifier must consist of zero or
 594 more @<qualifiers>, and one of the following, up to reordering.
 595 \begin{itemize}
 596 \item @<type-name>
 597 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
 598 \item @"void"
 599 \item @"char", @"unsigned char", @"signed char"
 600 \item @"short", @"unsigned short", @"signed short"
 601 \item @"short int", @"unsigned short int", @"signed short int"
 602 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
 603 \item @"long", @"unsigned long", @"signed long"
 604 \item @"long int", @"unsigned long int", @"signed long int"
 605 \item @"long long", @"unsigned long long", @"signed long long"
 606 \item @"long long int", @"unsigned long long int", @"signed long long int"
 607 \item @"float", @"double", @"long double"
 608 \end{itemize}
 609 All of these have their usual C meanings.
 610
 611 \subsubsection{Declarators} \label{sec:syntax.c-types.declarator}
 612
 613 \begin{grammar}
 614 <declarator> ::=
 615   @<pointer>^* <inner-declarator> @<declarator-suffix>^*
 616
 617 <inner-declarator> ::= <identifier> | <qualified-identifier>
 618 \alt "(" <declarator> ")"
 619
 620 <qualified-identifier> ::= <identifier> "." <identifier>
 621
 622 <pointer> ::= "*" @<qualifier>^*
 623
 624 <declarator-suffix> ::= "[" <c-fragment> "]"
 625 \alt "(" <arguments> ")"
 626
 627 <arguments> ::= <empty> | "..."
 628 \alt <argument-list> @["," "..."@]
 629
 630 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 631
 632 <argument-declarator> ::= <declarator> | @[<abstract-declarator>@]
 633
 634 <abstract-declarator> ::=
 635   @<pointer>^+ | @<pointer>^* <inner-abstract-declarator>
 636
 637 <inner-abstract-declarator> ::= "(" <abstract-declarator> ")"
 638 \alt @[<inner-abstract-declarator>@] @<declarator-suffix>^+
 639 \end{grammar}
 640
 641 The declarator syntax is taken from C, but with some differences.
 642 \begin{itemize}
 643 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 644   closing square bracket.  This allows array dimensions to contain arbitrary
 645   constant expressions.
 646 \item A declarator may have either a single @<identifier> at its centre or a
 647   pair of @<identifier>s separated by a @`.'; this is used to refer to
 648   slots or messages defined in superclasses.
 649 \end{itemize}
 650 The remaining differences are (I hope) a matter of presentation rather than
 651 substance.
 652
 653 \subsection{Defining classes} \label{sec:syntax.class}
 654
 655 \begin{grammar}
 656 <class-definition> ::= <class-forward-declaration>
 657 \alt <full-class-definition>
 658 \end{grammar}
 659
 660 \subsubsection{Forward declarations} \label{sec:class.class.forward}
 661
 662 \begin{grammar}
 663 <class-forward-declaration> ::= "class" <identifier> ";"
 664 \end{grammar}
 665
 666 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 667 to name a class which is currently undefined.  Forward declarations are
 668 necessary in order to resolve certain kinds of circularity.  For example,
 669 \begin{listing}
 670 class Sub;
 671
 672 class Super : SodObject {
 673   Sub *sub;
 674 };
 675
 676 class Sub : Super {
 677   /* ... */
 678 };
 679 \end{listing}
 680
 681 \subsubsection{Full class definitions} \label{sec:class.class.full}
 682
 683 \begin{grammar}
 684 <full-class-definition> ::=
 685   @[<properties>@]
 686   "class" <identifier> ":" <identifier-list>
 687   "{" @<class-item>^* "}"
 688
 689 <class-item> ::= <slot-item> ";"
 690 \alt <message-item>
 691 \alt <method-item>
 692 \alt  <initializer-item> ";"
 693 \end{grammar}
 694
 695 A full class definition provides a complete description of a class.
 696
 697 The first @<identifier> gives the name of the class.  It is an error to
 698 give the name of an existing class (other than a forward-referenced class),
 699 or an existing type name.  It is conventional to give classes `MixedCase'
 700 names, to distinguish them from other kinds of identifiers.
 701
 702 The @<identifier-list> names the direct superclasses for the new class.  It
 703 is an error if any of these @<identifier>s does not name a defined class.
 704
 705 The @<properties> provide additional information.  The standard class
 706 properties are as follows.
 707 \begin{description}
 708 \item[@"lisp_class"] The name of the Lisp class to use within the translator
 709   to represent this class.  The property value must be an identifier; the
 710   default is @"sod_class".  Extensions may define classes with additional
 711   behaviour, and may recognize additional class properties.
 712 \item[@"metaclass"] The name of the Sod metaclass for this class.  In the
 713   generated code, a class is itself an instance of another class -- its
 714   \emph{metaclass}.  The metaclass defines which slots the class will have,
 715   which messages it will respond to, and what its behaviour will be when it
 716   receives them.  The property value must be an identifier naming a defined
 717   subclass of @"SodClass".  The default metaclass is @"SodClass".
 718   %%% FIXME xref to theory
 719 \item[@"nick"] A nickname for the class, to be used to distinguish it from
 720   other classes in various limited contexts.  The property value must be an
 721   identifier; the default is constructed by forcing the class name to
 722   lower-case.
 723 \end{description}
 724
 725 The class body consists of a sequence of @<class-item>s enclosed in braces.
 726 These items are discussed on the following sections.
 727
 728 \subsubsection{Slot items} \label{sec:sntax.class.slot}
 729
 730 \begin{grammar}
 731 <slot-item> ::=
 732   @[<properties>@]
 733   @<declaration-specifier>^+ <init-declarator-list>
 734
 735 <init-declarator> ::= <declarator> @["=" <initializer>@]
 736 \end{grammar}
 737
 738 A @<slot-item> defines one or more slots.  All instances of the class and any
 739 subclass will contain these slot, with the names and types given by the
 740 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 741 contain qualified identifiers.
 742
 743 It is not possible to declare a slot with function type: such an item is
 744 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 745 functions are fine.
 746
 747 An @<initializer>, if present, is treated as if a separate
 748 @<initializer-item> containing the slot name and initializer were present.
 749 For example,
 750 \begin{listing}
 751 [nick = eg]
 752 class Example : Super {
 753   int foo = 17;
 754 };
 755 \end{listing}
 756 means the same as
 757 \begin{listing}
 758 [nick = eg]
 759 class Example : Super {
 760   int foo;
 761   eg.foo = 17;
 762 };
 763 \end{listing}
 764
 765 \subsubsection{Initializer items} \label{sec:syntax.class.init}
 766
 767 \begin{grammar}
 768 <initializer-item> ::= @["class"@] <slot-initializer-list>
 769
 770 <slot-initializer> ::= <qualified-identifier> "=" <initializer>
 771
 772 <initializer> :: "{" <c-fragment> "}" | <c-fragment>
 773 \end{grammar}
 774
 775 An @<initializer-item> provides an initial value for one or more slots.  If
 776 prefixed by @"class", then the initial values are for class slots (i.e.,
 777 slots of the class object itself); otherwise they are for instance slots.
 778
 779 The first component of the @<qualified-identifier> must be the nickname of
 780 one of the class's superclasses (including itself); the second must be the
 781 name of a slot defined in that superclass.
 782
 783 The initializer has one of two forms.
 784 \begin{itemize}
 785 \item A @<c-fragment> enclosed in braces denotes an aggregate initializer.
 786   This is suitable for initializing structure, union or array slots.
 787 \item A @<c-fragment> \emph{not} beginning with an open brace is a `bare'
 788   initializer, and continues until the next @`,' or @`;' which is not within
 789   nested brackets.  Bare initializers are suitable for initializing scalar
 790   slots, such as pointers or integers, and strings.
 791 \end{itemize}
 792
 793 \subsubsection{Message items} \label{sec:syntax.class.message}
 794
 795 \begin{grammar}
 796 <message-item> ::=
 797   @[<properties>@]
 798   @<declaration-specifier>^+ <declarator> @[<method-body>@]
 799 \end{grammar}
 800
 801 \subsubsection{Method items} \label{sec:syntax.class.method}
 802
 803 \begin{grammar}
 804 <method-item> ::=
 805   @[<properties>@]
 806   @<declaration-specifier>^+ <declarator> <method-body>
 807
 808 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 809 \end{grammar}
 810
 811 %%%--------------------------------------------------------------------------
 812 \section{Class objects}
 813
 814 \begin{listing}
 815 typedef struct SodClass__ichain_obj SodClass;
 816
 817 struct sod_chain {
 818   size_t n_classes;                     /* Number of classes in chain */
 819   const SodClass *const *classes;       /* Vector of classes, head first */
 820   size_t off_ichain;                    /* Offset of ichain from instance base */
 821   const struct sod_vtable *vt;          /* Vtable pointer for chain */
 822   size_t ichainsz;                      /* Size of the ichain structure */
 823 };
 824
 825 struct sod_vtable {
 826   SodClass *_class;                     /* Pointer to instance's class */
 827   size_t _base;                         /* Offset to instance base */
 828 };
 829
 830 struct SodClass__islots {
 831
 832   /* Basic information */
 833   const char *name;                     /* The class's name as a string */
 834   const char *nick;                     /* The nickname as a string */
 835
 836   /* Instance allocation and initialization */
 837   size_t instsz;                        /* Instance layout size in bytes */
 838   void *(*imprint)(void *);             /* Stamp instance with vtable ptrs */
 839   void *(*init)(void *);                /* Initialize instance */
 840
 841   /* Superclass structure */
 842   size_t n_supers;                      /* Number of direct superclasses */
 843   const SodClass *const *supers;        /* Vector of direct superclasses */
 844   size_t n_cpl;                         /* Length of class precedence list */
 845   const SodClass *const *cpl;           /* Vector for class precedence list */
 846
 847   /* Chain structure */
 848   const SodClass *link;                 /* Link to next class in chain */
 849   const SodClass *head;                 /* Pointer to head of chain */
 850   size_t level;                         /* Index of class in its chain */
 851   size_t n_chains;                      /* Number of superclass chains */
 852   const sod_chain *chains;              /* Vector of chain structures */
 853
 854   /* Layout */
 855   size_t off_islots;                    /* Offset of islots from ichain base */
 856   size_t islotsz;                       /* Size of instance slots */
 857 };
 858
 859 struct SodClass__ichain_obj {
 860   const SodClass__vt_obj *_vt;
 861   struct SodClass__islots cls;
 862 };
 863
 864 struct sod_instance {
 865   struct sod_vtable *_vt;
 866 };
 867 \end{listing}
 868
 869 \begin{listing}
 870 void *sod_convert(const SodClass *cls, const void *obj)
 871 {
 872   const struct sod_instance *inst = obj;
 873   const SodClass *real = inst->_vt->_cls;
 874   const struct sod_chain *chain;
 875   size_t i, index;
 876
 877   for (i = 0; i < real->cls.n_chains; i++) {
 878     chain = &real->cls.chains[i];
 879     if (chain->classes[0] == cls->cls.head) {
 880       index = cls->cls.index;
 881       if (index < chain->n_classes && chain->classes[index] == cls)
 882         return ((char *)cls - inst->_vt._base + chain->off_ichain);
 883       else
 884         return (0);
 885     }
 886   }
 887   return (0);
 888 }
 889 \end{listing}
 890
 891 %%%--------------------------------------------------------------------------
 892 \section{Classes}
 893
 894 \subsection{Classes and superclasses}
 895
 896 A @<full-class-definition> must list one or more existing classes to be the
 897 \emph{direct superclasses} for the new class being defined.  We make the
 898 following definitions.
 899 \begin{itemize}
 900 \item The \emph{superclasses} of a class consist of the class itself together
 901   with the superclasses of its direct superclasses.
 902 \item The \emph{proper superclasses} of a class are its superclasses other
 903   than itself.
 904 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
 905   \emph{subclass} of $C$.
 906 \end{itemize}
 907 The predefined class @|SodObject| has no direct superclasses; it is unique in
 908 this respect.  All classes are subclasses of @|SodObject|.
 909
 910 \subsection{The class precedence list}
 911
 912 Let $C$ be a class.  The superclasses of $C$ form a directed graph, with an
 913 edge from each class to each of its direct superclasses.  This is the
 914 \emph{superclass graph of $C$}.
 915
 916 In order to resolve inheritance of items, we define a \emph{class precedence
 917   list} (or CPL) for each class, which imposes a total order on that class's
 918 superclasses.  The default algorithm for computing the CPL is the \emph{C3}
 919 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
 920
 921 The default algorithm works as follows.  Let $C$ be the class whose CPL we
 922 are to compute.  Let $X$ and $Y$ be two of $C$'s superclasses.
 923 \begin{itemize}
 924 \item $C$ must appear first in the CPL.
 925 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
 926   superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
 927 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
 928   of $X$ and $Y$ has a subclass which appears further left in the list of
 929   $C$'s direct superclasses will appear earlier in the CPL.
 930 \end{itemize}
 931 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
 932 superclasses of the same direct superclass of $C$ then that direct
 933 superclass's CPL will order $X$ and $Y$.
 934
 935 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
 936 $X$ is earlier than $Y$ in $C$'s class precedence list.  If $C$ is clear from
 937 context then we omit it, saying simply that $X$ is more specific than $Y$.
 938
 939 \subsection{Instances and metaclasses}
 940
 941 A class defines the structure and behaviour of its \emph{instances}: run-time
 942 objects created (possibly) dynamically.  An instance is an instance of only
 943 one class, though structurally it may be used in place of an instance of any
 944 of that class's superclasses.  It is possible, with care, to change the class
 945 of an instance at run-time.
 946
 947 Classes are themselves represented as instances -- called \emph{class
 948   objects} -- in the running program.  Being instances, they have a class,
 949 called the \emph{metaclass}.  The metaclass defines the structure and
 950 behaviour of the class object.
 951
 952 The predefined class @|SodClass| is the default metaclass for new classes.
 953 @|SodClass| has @|SodObject| as its only direct superclass.  @|SodClass| is
 954 its own metaclass.
 955
 956 \subsection{Items and inheritance}
 957
 958 A class definition also declares \emph{slots}, \emph{messages},
 959 \emph{initializers} and \emph{methods} -- collectively referred to as
 960 \emph{items}.  In addition to the items declared in the class definition --
 961 the class's \emph{direct items} -- a class also \emph{inherits} items from
 962 its superclasses.
 963
 964 The precise rules for item inheritance vary according to the kinds of items
 965 involved.
 966
 967 Some object systems have a notion of `repeated inheritance': if there are
 968 multiple paths in the superclass graph from a class to one of its
 969 superclasses then items defined in that superclass may appear duplicated in
 970 the subclass.  Sod does not have this notion.
 971
 972 \subsubsection{Slots}
 973 A \emph{slot} is a unit of state.  In other object systems, slots may be
 974 called `fields', `member variables', or `instance variables'.
 975
 976 A slot has a \emph{name} and a \emph{type}.  The name serves only to
 977 distinguish the slot from other direct slots defined by the same class.  A
 978 class inherits all of its proper superclasses' slots.  Slots inherited from
 979 superclasses do not conflict with each other or with direct slots, even if
 980 they have the same names.
 981
 982 At run-time, each instance of the class holds a separate value for each slot,
 983 whether direct or inherited.  Changing the value of an instance's slot
 984 doesn't affect other instances.
 985
 986 \subsubsection{Initializers}
 987 Mumble.
 988
 989 \subsubsection{Messages}
 990 A \emph{message} is the stimulus for behaviour.  In Sod, a class must define,
 991 statically, the name and format of the messages it is able to receive and the
 992 values it will return in reply.  In this respect, a message is similar to
 993 `abstract member functions' or `interface member functions' in other object
 994 systems.
 995
 996 Like slots, a message has a \emph{name} and a \emph{type}.  Again, the name
 997 serves only to distinguish the message from other direct messages defined by
 998 the same class.  Messages inherited from superclasses do not conflict with
 999 each other or with direct messages, even if they have the same name.
1000
1001 At run-time, one sends a message to an instance by invoking a function
1002 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
1003
1004 \subsubsection{Methods}
1005 A \emph{method} is a unit of behaviour.  In other object systems, methods may
1006 be called `member functions'.
1007
1008 A method is associated with a message.  When a message is received by an
1009 instance, all of the methods associated with that message on the instance's
1010 class or any of its superclasses are \emph{applicable}.  The details of how
1011 the applicable methods are invoked are described fully in
1012 \xref{sec:fixme-method-combination}.
1013
1014 \subsection{Chains and instance layout}
1015
1016 \include{sod-backg}
1017 \include{sod-protocol}
1018
1019 \end{document}
1020 \f
1021 %%% Local variables:
1022 %%% mode: LaTeX
1023 %%% TeX-PDF-mode: t
1024 %%% End: