chiark - git - mdw - sod/blob - doc/syntax.tex

   1 %%% -*-latex-*-
   2 %%%
   3 %%% Module syntax
   4 %%%
   5 %%% (c) 2015 Straylight/Edgeware
   6 %%%
   7
   8 %%%----- Licensing notice ---------------------------------------------------
   9 %%%
  10 %%% This file is part of the Sensible Object Design, an object system for C.
  11 %%%
  12 %%% SOD is free software; you can redistribute it and/or modify
  13 %%% it under the terms of the GNU General Public License as published by
  14 %%% the Free Software Foundation; either version 2 of the License, or
  15 %%% (at your option) any later version.
  16 %%%
  17 %%% SOD is distributed in the hope that it will be useful,
  18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 %%% GNU General Public License for more details.
  21 %%%
  22 %%% You should have received a copy of the GNU General Public License
  23 %%% along with SOD; if not, write to the Free Software Foundation,
  24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 \chapter{Module syntax} \label{ch:syntax}
  27
  28 %%%--------------------------------------------------------------------------
  29 \section{Notation} \label{sec:syntax.notation}
  30
  31 Fortunately, Sod is syntactically quite simple.  The notation is slightly
  32 unusual in order to make the presentation shorter and easier to read.
  33
  34 Anywhere a simple nonterminal name $x$ may appear in the grammar, an
  35 \emph{indexed} nonterminal $x[a_1, \ldots, a_n]$ may also appear.  On the
  36 left-hand side of a production rule, the indices $a_1$, \ldots, $a_n$ are
  37 variables which vary over all nonterminal and terminal symbols, and the
  38 variables may also appear on the right-hand side in place of a nonterminal.
  39 Such a rule stands for a family of rules, in each variable is replaced by
  40 each possible simple nonterminal or terminal symbol.
  41
  42 The letter $\epsilon$ denotes the empty nonterminal
  43 \begin{quote}
  44   \syntax{$\epsilon$ ::=}
  45 \end{quote}
  46
  47 The following indexed productions are used throughout the grammar, some often
  48 enough that they deserve special notation.
  49 \begin{itemize}
  50 \item @[$x$@] abbreviates @<optional>$[x]$, denoting an optional occurrence
  51   of $x$:
  52   \begin{quote}
  53     \syntax{@[$x$@] ::= <optional>$[x]$ ::= $\epsilon$ @! $x$}
  54   \end{quote}
  55 \item $x^*$ abbreviates @<zero-or-more>$[x]$, denoting a sequence of zero or
  56   more occurrences of $x$:
  57   \begin{quote}
  58     \syntax{$x^*$ ::= <zero-or-more>$[x]$ ::=
  59       $\epsilon$ @! <zero-or-more>$[x]$ $x$}
  60   \end{quote}
  61 \item $x^+$ abbreviates @<one-or-more>$[x]$, denoting a sequence of one or
  62   more occurrences of $x$:
  63   \begin{quote}
  64     \syntax{$x^+$ ::= <one-or-more>$[x]$ ::= <zero-or-more>$[x]$ $x$}
  65   \end{quote}
  66 \item @<list>$[x]$ denotes a sequence of one or more occurrences of $x$
  67   separated by commas:
  68   \begin{quote}
  69     \syntax{<list>$[x]$ ::= $x$ @! <list>$[x]$ "," $x$}
  70   \end{quote}
  71 \end{itemize}
  72
  73 %%%--------------------------------------------------------------------------
  74 \section{Lexical syntax} \label{sec:syntax.lex}
  75
  76 Whitespace and comments are discarded.  The remaining characters are
  77 collected into tokens according to the following syntax.
  78
  79 \begin{grammar}
  80 <token> ::= <identifier>
  81 \alt <string-literal>
  82 \alt <char-literal>
  83 \alt <integer-literal>
  84 \alt <punctuation>
  85 \end{grammar}
  86
  87 This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
  88 munch} rule: at each stage we take the longest sequence of characters which
  89 could be a token.
  90
  91
  92 \subsection{Identifiers} \label{sec:syntax.lex.id}
  93
  94 \begin{grammar}
  95 <identifier> ::= <id-start-char> @<id-body-char>^*
  96
  97 <id-start-char> ::= <alpha-char> | "_"
  98
  99 <id-body-char> ::= <id-start-char> @! <digit-char>
 100
 101 <alpha-char> ::= "A" | "B" | \dots\ | "Z"
 102 \alt "a" | "b" | \dots\ | "z"
 103 \alt <extended-alpha-char>
 104
 105 <digit-char> ::= "0" | <nonzero-digit-char>
 106
 107 <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
 108 \end{grammar}
 109
 110 The precise definition of @<alpha-char> is left to the function
 111 \textsf{alpha-char-p} in the hosting Lisp system.  For portability,
 112 programmers are encouraged to limit themselves to the standard ASCII letters.
 113
 114 There are no reserved words at the lexical level, but the higher-level syntax
 115 recognizes certain identifiers as \emph{keywords} in some contexts.  There is
 116 also an ambiguity (inherited from C) in the declaration syntax which is
 117 settled by distinguishing type names from other identifiers at a lexical
 118 level.
 119
 120
 121 \subsection{String and character literals} \label{sec:syntax.lex.string}
 122
 123 \begin{grammar}
 124 <string-literal> ::= "\"" @<string-literal-char>^* "\""
 125
 126 <char-literal> ::= "'" <char-literal-char> "'"
 127
 128 <string-literal-char> ::= any character other than "\\" or "\""
 129 \alt "\\" <char>
 130
 131 <char-literal-char> ::= any character other than "\\" or "'"
 132 \alt "\\" <char>
 133
 134 <char> ::= any single character
 135 \end{grammar}
 136
 137 The syntax for string and character literals differs from~C.  In particular,
 138 escape sequences such as @`\textbackslash n' are not recognized.  The use
 139 of string and character literals in Sod, outside of C~fragments, is limited,
 140 and the simple syntax seems adequate.  For the sake of future compatibility,
 141 the use of character sequences which resemble C escape sequences is
 142 discouraged.
 143
 144 \subsubsection{Integer literals} \label{sec:syntax.lex.int}
 145
 146 \begin{grammar}
 147 <integer-literal> ::= <decimal-integer>
 148 \alt <binary-integer>
 149 \alt <octal-integer>
 150 \alt <hex-integer>
 151
 152 <decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
 153
 154 <binary-integer> ::= "0" @("b"|"B"@) @<binary-digit-char>^+
 155
 156 <binary-digit-char> ::= "0" | "1"
 157
 158 <octal-integer> ::= "0" @["o"|"O"@] @<octal-digit-char>^+
 159
 160 <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
 161
 162 <hex-integer> ::= "0" @("x"|"X"@) @<hex-digit-char>^+
 163
 164 <hex-digit-char> ::= <digit-char>
 165 \alt "A" | "B" | "C" | "D" | "E" | "F"
 166 \alt "a" | "b" | "c" | "d" | "e" | "f"
 167 \end{grammar}
 168
 169 Sod understands only integers, not floating-point numbers; its integer syntax
 170 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 171 binary.  However, length and signedness indicators are not permitted.
 172
 173
 174 \subsection{Punctuation} \label{sec:syntax.lex.punct}
 175
 176 \begin{grammar}
 177 <punctuation> ::= any nonalphanumeric character other than "_", "\"" or "'"
 178 \end{grammar}
 179
 180
 181 \subsection{Comments} \label{sec:syntax.lex.comment}
 182
 183 \begin{grammar}
 184 <comment> ::= <block-comment>
 185 \alt <line-comment>
 186
 187 <block-comment> ::=
 188   "/*"
 189   @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
 190   @<star>^*
 191   "*/"
 192
 193 <star> ::= "*"
 194
 195 <not-star> ::= any character other than "*"
 196
 197 <not-star-or-slash> ::= any character other than "*" or  "/"
 198
 199 <line-comment> ::= "/\,/" @<not-newline>^* <newline>
 200
 201 <newline> ::= a newline character
 202
 203 <not-newline> ::= any character other than newline
 204 \end{grammar}
 205
 206 Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
 207 @|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
 208 ignored.
 209
 210
 211 \subsection{Special nonterminals} \label{sec:syntax.lex.special}
 212
 213 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 214 two special nonterminals occur in the module syntax.
 215
 216 \subsubsection{S-expressions}
 217 \begin{grammar}
 218 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 219 \end{grammar}
 220
 221 When an S-expression is expected, the Sod parser simply calls the host Lisp
 222 system's @|read| function.  Sod modules are permitted to modify the read
 223 table to extend the S-expression syntax.
 224
 225 S-expressions are self-delimiting, so no end-marker is needed.
 226
 227 \subsubsection{C fragments}
 228 \begin{grammar}
 229 <c-fragment> ::= a sequence of C tokens, with matching brackets
 230 \end{grammar}
 231
 232 Sequences of C code are simply stored and written to the output unchanged
 233 during translation.  They are read using a simple scanner which nonetheless
 234 understands C comments and string and character literals.
 235
 236 A C fragment is terminated by one of a small number of delimiter characters
 237 determined by the immediately surrounding context -- usually a closing brace
 238 or bracket.  The first such delimiter character which is not enclosed in
 239 brackets, braces or parenthesis ends the fragment.
 240
 241 %%%--------------------------------------------------------------------------
 242 \section{Module syntax} \label{sec:syntax.module}
 243
 244 \begin{grammar}
 245 <module> ::= @<definition>^*
 246
 247 <definition> ::= <import-definition>
 248 \alt <load-definition>
 249 \alt <lisp-definition>
 250 \alt <code-definition>
 251 \alt <typename-definition>
 252 \alt <class-definition>
 253 \end{grammar}
 254
 255 A @<module> is the top-level syntactic item.  A module consists of a sequence
 256 of definitions.
 257
 258 \subsection{Simple definitions} \label{sec:syntax.module.simple}
 259
 260 \subsubsection{Importing modules}
 261 \begin{grammar}
 262 <import-definition> ::= "import" <string> ";"
 263 \end{grammar}
 264
 265 The module named @<string> is processed and its definitions made available.
 266
 267 A search is made for a module source file as follows.
 268 \begin{itemize}
 269 \item The module name @<string> is converted into a filename by appending
 270   @`.sod', if it has no extension already.\footnote{%
 271     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 272     :type "SOD" :case :common))}, so exactly what this means varies
 273     according to the host system.} %
 274 \item The file is looked for relative to the directory containing the
 275   importing module.
 276 \item If that fails, then the file is looked for in each directory on the
 277   module search path in turn.
 278 \item If the file still isn't found, an error is reported and the import
 279   fails.
 280 \end{itemize}
 281 At this point, if the file has previously been imported, nothing further
 282 happens.\footnote{%
 283   This check is done using \textsf{truename}, so it should see through simple
 284   tricks like symbolic links.  However, it may be confused by fancy things
 285   like bind mounts and so on.} %
 286
 287 Recursive imports, either direct or indirect, are an error.
 288
 289 \subsubsection{Loading extensions}
 290 \begin{grammar}
 291 <load-definition> ::= "load" <string> ";"
 292 \end{grammar}
 293
 294 The Lisp file named @<string> is loaded and evaluated.
 295
 296 A search is made for a Lisp source file as follows.
 297 \begin{itemize}
 298 \item The name @<string> is converted into a filename by appending @`.lisp',
 299   if it has no extension already.\footnote{%
 300     Technically, what happens is \textsf{(merge-pathnames name (make-pathname
 301     :type "LISP" :case :common))}, so exactly what this means varies
 302     according to the host system.} %
 303 \item A search is then made in the same manner as for module imports
 304   (\xref{sec:syntax-module}).
 305 \end{itemize}
 306 If the file is found, it is loaded using the host Lisp's \textsf{load}
 307 function.
 308
 309 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 310 existing compiled files.  The right way to package a substantial extension to
 311 the Sod translator is to provide the extension as a standard ASDF system (or
 312 similar) and leave a dropping @"foo-extension.lisp" in the module path saying
 313 something like
 314 \begin{quote}
 315   \textsf{(asdf:load-system :foo-extension)}
 316 \end{quote}
 317 which will arrange for the extension to be compiled if necessary.
 318
 319 (This approach means that the language doesn't need to depend on any
 320 particular system definition facility.  It's bad enough already that it
 321 depends on Common Lisp.)
 322
 323 \subsubsection{Lisp escapes}
 324 \begin{grammar}
 325 <lisp-definition> ::= "lisp" <s-expression> ";"
 326 \end{grammar}
 327
 328 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 329
 330 \begin{boxy}[Warning!]
 331   This means that hostile Sod modules are a security hazard.  Lisp code can
 332   read and write files, start other programs, and make network connections.
 333   Don't install Sod modules from sources that you don't trust.\footnote{%
 334     Presumably you were going to run the corresponding code at some point, so
 335     this isn't as unusually scary as it sounds.  But please be careful.} %
 336 \end{boxy}
 337
 338 \subsubsection{Declaring type names}
 339 \begin{grammar}
 340 <typename-definition> ::=
 341   "typename" <list>$[\mbox{@<identifier>}]$ ";"
 342 \end{grammar}
 343
 344 Each @<identifier> is declared as naming a C type.  This is important because
 345 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 346 done by distinguishing type names from other identifiers.
 347
 348 Don't declare class names using @"typename"; use @"class" forward
 349 declarations instead.
 350
 351
 352 \subsection{Literal code} \label{sec:syntax.module.literal}
 353
 354 \begin{grammar}
 355 <code-definition> ::=
 356   "code" <identifier> ":" <item-name> @[<constraints>@]
 357   "{" <c-fragment> "}"
 358
 359 <constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 360
 361 <constraint> ::= @<item-name>^+
 362
 363 <item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 364 \end{grammar}
 365
 366 The @<c-fragment> will be output unchanged to one of the output files.
 367
 368 The first @<identifier> is the symbolic name of an output file.  Predefined
 369 output file names are @"c" and @"h", which are the implementation code and
 370 header file respectively; other output files can be defined by extensions.
 371
 372 Output items are named with a sequence of identifiers, separated by
 373 whitespace, and enclosed in parentheses.  As an abbreviation, a name
 374 consisting of a single identifier may be written as just that identifier,
 375 without the parentheses.
 376
 377 The @<constraints> provide a means for specifying where in the output file
 378 the output item should appear.  (Note the two kinds of square brackets shown
 379 in the syntax: square brackets must appear around the constraints if they are
 380 present, but that they may be omitted.)  Each comma-separated @<constraint>
 381 is a sequence of names of output items, and indicates that the output items
 382 must appear in the order given -- though the translator is free to insert
 383 additional items in between them.  (The particular output items needn't be
 384 defined already -- indeed, they needn't be defined ever.)
 385
 386 There is a predefined output item @"includes" in both the @"c" and @"h"
 387 output files which is a suitable place for inserting @"\#include"
 388 preprocessor directives in order to declare types and functions for use
 389 elsewhere in the generated output files.
 390
 391
 392 \subsection{Property sets} \label{sec:syntax.module.properties}
 393 \begin{grammar}
 394 <properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 395
 396 <property> ::= <identifier> "=" <expression>
 397 \end{grammar}
 398
 399 Property sets are a means for associating miscellaneous information with
 400 classes and related items.  By using property sets, additional information
 401 can be passed to extensions without the need to introduce idiosyncratic
 402 syntax.
 403
 404 A property has a name, given as an @<identifier>, and a value computed by
 405 evaluating an @<expression>.  The value can be one of a number of types,
 406 though the only operators currently defined act on integer values only.
 407
 408 \subsubsection{The expression evaluator}
 409 \begin{grammar}
 410 <expression> ::= <term> | <expression> "+" <term> | <expression> "--" <term>
 411
 412 <term> ::= <factor> | <term> "*" <factor> | <term> "/" <factor>
 413
 414 <factor> ::= <primary> | "+" <factor> | "--" <factor>
 415
 416 <primary> ::=
 417      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 418 \alt "<" <plain-type> ">"
 419 \alt "?" <s-expression>
 420 \alt "(" <expression> ")"
 421 \end{grammar}
 422
 423 The arithmetic expression syntax is simple and standard; there are currently
 424 no bitwise, logical, or comparison operators.
 425
 426 A @<primary> expression may be a literal or an identifier.  Note that
 427 identifiers stand for themselves: they \emph{do not} denote values.  For more
 428 fancy expressions, the syntax
 429 \begin{quote}
 430   @"?" @<s-expression>
 431 \end{quote}
 432 causes the @<s-expression> to be evaluated using the Lisp \textsf{eval}
 433 function.
 434 %%% FIXME crossref to extension docs
 435
 436
 437 \subsection{C types} \label{sec:syntax.module.types}
 438
 439 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 440 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 441 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 442 `implicit @"int"' is forbidden), and storage-class specifiers are not
 443 recognized.
 444
 445 \subsubsection{Declaration specifiers}
 446 \begin{grammar}
 447 <declaration-specifier> ::= <type-name>
 448 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 449 \alt "void" | "char" | "int" | "float" | "double"
 450 \alt "short" | "long"
 451 \alt "signed" | "unsigned"
 452 \alt "bool" | "_Bool"
 453 \alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 454 \alt <qualifier>
 455 \alt <storage-specifier>
 456 \alt <atomic-type>
 457
 458 <qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
 459
 460 <plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
 461
 462 <atomic-type> ::=
 463   <atomic> "(" <plain-type> ")"
 464
 465 <atomic> ::= "atomic" | "_Atomic"
 466
 467 <storage-specifier> ::= <alignas> "(" <c-fragment> ")"
 468
 469 <alignas> ::= "alignas" "_Alignas"
 470
 471 <type-name> ::= <identifier>
 472 \end{grammar}
 473
 474 A @<type-name> is an identifier which has been declared as being a type name,
 475 using the @"typename" or @"class" definitions.  The following type names are
 476 defined in the built-in module.
 477 \begin{itemize}
 478 \item @"va_list"
 479 \item @"size_t"
 480 \item @"ptrdiff_t"
 481 \item @"wchar_t"
 482 \end{itemize}
 483
 484 Declaration specifiers may appear in any order.  However, not all
 485 combinations are permitted.  A declaration specifier must consist of zero or
 486 more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
 487 following, up to reordering.
 488 \begin{itemize}
 489 \item @<type-name>
 490 \item @<atomic-type>
 491 \item @"struct" @<identifier>, @"union" @<identifier>, @"enum" @<identifier>
 492 \item @"void"
 493 \item @"_Bool", @"bool"
 494 \item @"char", @"unsigned char", @"signed char"
 495 \item @"short", @"unsigned short", @"signed short"
 496 \item @"short int", @"unsigned short int", @"signed short int"
 497 \item @"int", @"unsigned int", @"signed int", @"unsigned", @"signed"
 498 \item @"long", @"unsigned long", @"signed long"
 499 \item @"long int", @"unsigned long int", @"signed long int"
 500 \item @"long long", @"unsigned long long", @"signed long long"
 501 \item @"long long int", @"unsigned long long int", @"signed long long int"
 502 \item @"float", @"double", @"long double"
 503 \item @"float _Imaginary", @"double _Imaginary", @"long double _Imaginary"
 504 \item @"float imaginary", @"double imaginary", @"long double imaginary"
 505 \item @"float _Complex", @"double _Complex", @"long double _Complex"
 506 \item @"float complex", @"double complex", @"long double complex"
 507 \end{itemize}
 508 All of these have their usual C meanings.
 509
 510 \subsubsection{Declarators}
 511 \begin{grammar}
 512 <declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
 513
 514 <primary-declarator>$[k, a]$ ::= $k$
 515 \alt "(" <primary-declarator>$[k, a]$ ")"
 516 \alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
 517
 518 <pointer> ::= "*" @<qualifier>^*
 519
 520 <declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
 521 \alt "(" $a$ ")"
 522
 523 <argument-list> ::= $\epsilon$ | "\dots"
 524 \alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
 525
 526 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 527
 528 <abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
 529
 530 <argument-declarator> ::= <declarator>$[\mbox{@<identifier> @! $\epsilon$}]$
 531
 532 <argument-declarator> ::=
 533   <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
 534
 535 <simple-declarator> ::=
 536   <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
 537 \end{grammar}
 538
 539 The declarator syntax is taken from C, but with some differences.
 540 \begin{itemize}
 541 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 542   closing square bracket.  This allows array dimensions to contain arbitrary
 543   constant expressions.
 544 \item A declarator may have either a single @<identifier> at its centre or a
 545   pair of @<identifier>s separated by a @`.'; this is used to refer to
 546   slots or messages defined in superclasses.
 547 \end{itemize}
 548 The remaining differences are (I hope) a matter of presentation rather than
 549 substance.
 550
 551 There is additional syntax to support messages and methods which accept
 552 keyword arguments.
 553
 554 \begin{grammar}
 555 <keyword-argument> ::= <argument> @["=" <c-fragment>@]
 556
 557 <keyword-argument-list> ::=
 558   @[<list>$[\mbox{@<argument>}]$@]
 559   "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
 560
 561 <method-argument-list> ::= <argument-list> @! <keyword-argument-list>
 562
 563 <dotted-name> ::= <identifier> "." <identifier>
 564
 565 <keyword-declarator>$[k]$ ::=
 566   <declarator>$[k, \mbox{@<method-argument-list>}]$
 567 \end{grammar}
 568
 569
 570 \subsection{Class definitions} \label{sec:syntax.module.class}
 571
 572 \begin{grammar}
 573 <class-definition> ::= <class-forward-declaration>
 574 \alt <full-class-definition>
 575 \end{grammar}
 576
 577 \subsubsection{Forward declarations}
 578 \begin{grammar}
 579 <class-forward-declaration> ::= "class" <identifier> ";"
 580 \end{grammar}
 581
 582 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 583 to name a class which is currently undefined.  Forward declarations are
 584 necessary in order to resolve certain kinds of circularity.  For example,
 585 \begin{prog}
 586 class Sub;                                                      \\+
 587
 588 class Super : SodObject \{                                      \\ \ind
 589   Sub *sub;                                                   \-\\
 590 \};                                                             \\+
 591
 592 class Sub : Super \{                                            \\ \ind
 593   /* \dots\ */                                                \-\\
 594 \};
 595 \end{prog}
 596
 597 \subsubsection{Full class definitions}
 598 \begin{grammar}
 599 <full-class-definition> ::=
 600   @[<properties>@]
 601   "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
 602   "{" @<properties-class-item>^* "}"
 603
 604 <properties-class-item> ::= @[<properties>@] <class-item>
 605
 606 <class-item> ::= <slot-item>
 607 \alt <initializer-item>
 608 \alt <initarg-item>
 609 \alt <fragment-item>
 610 \alt <message-item>
 611 \alt <method-item>
 612 \end{grammar}
 613
 614 A full class definition provides a complete description of a class.
 615
 616 The first @<identifier> gives the name of the class.  It is an error to
 617 give the name of an existing class (other than a forward-referenced class),
 618 or an existing type name.  It is conventional to give classes `MixedCase'
 619 names, to distinguish them from other kinds of identifiers.
 620
 621 The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
 622 class.  It is an error if any of these @<identifier>s does not name a defined
 623 class.  The superclass list is required, and must not be empty; listing
 624 @|SodObject| as your class's superclass is a good choice if nothing else
 625 seems suitable.  It's not possible to define a \emph{root class} in the Sod
 626 language: you must use Lisp to do this, and it's quite involved.
 627
 628 The @<properties> provide additional information.  The standard class
 629 properties are as follows.
 630 \begin{description}
 631 \item[@"lisp_class"] The name of the Lisp class to use within the translator
 632   to represent this class.  The property value must be an identifier; the
 633   default is @"sod_class".  Extensions may define classes with additional
 634   behaviour, and may recognize additional class properties.
 635 \item[@"metaclass"] The name of the Sod metaclass for this class.  In the
 636   generated code, a class is itself an instance of another class -- its
 637   \emph{metaclass}.  The metaclass defines which slots the class will have,
 638   which messages it will respond to, and what its behaviour will be when it
 639   receives them.  The property value must be an identifier naming a defined
 640   subclass of @"SodClass".  The default metaclass is @"SodClass".
 641   %%% FIXME xref to theory
 642 \item[@"nick"] A nickname for the class, to be used to distinguish it from
 643   other classes in various limited contexts.  The property value must be an
 644   identifier; the default is constructed by forcing the class name to
 645   lower-case.
 646 \end{description}
 647
 648 The class body consists of a sequence of @<class-item>s enclosed in braces.
 649 These items are discussed on the following sections.
 650
 651 \subsubsection{Slot items}
 652 \begin{grammar}
 653 <slot-item> ::=
 654   @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 655
 656 <init-declarator> ::= <simple-declarator> @["=" <initializer>@]
 657 \end{grammar}
 658
 659 A @<slot-item> defines one or more slots.  All instances of the class and any
 660 subclass will contain these slot, with the names and types given by the
 661 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 662 contain dotted names.
 663
 664 It is not possible to declare a slot with function type: such an item is
 665 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 666 functions are fine.
 667
 668 An @<initializer>, if present, is treated as if a separate
 669 @<initializer-item> containing the slot name and initializer were present.
 670 For example,
 671 \begin{prog}
 672 [nick = eg]                                                     \\
 673 class Example : Super \{                                        \\ \ind
 674   int foo = 17;                                               \-\\
 675 \};
 676 \end{prog}
 677 means the same as
 678 \begin{prog}
 679 [nick = eg]                                                     \\
 680 class Example : Super \{                                        \\ \ind
 681   int foo;                                                      \\
 682   eg.foo = 17;                                                \-\\
 683 \};
 684 \end{prog}
 685
 686 \subsubsection{Initializer items}
 687 \begin{grammar}
 688 <initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 689
 690 <slot-initializer> ::= <dotted-name> @["=" <initializer>@]
 691
 692 <initializer> :: <c-fragment>
 693 \end{grammar}
 694
 695 An @<initializer-item> provides an initial value for one or more slots.  If
 696 prefixed by @"class", then the initial values are for class slots (i.e.,
 697 slots of the class object itself); otherwise they are for instance slots.
 698
 699 The first component of the @<dotted-name> must be the nickname of one of the
 700 class's superclasses (including itself); the second must be the name of a
 701 slot defined in that superclass.
 702
 703 An @|initarg| property may be set on an instance slot initializer (or a
 704 direct slot definition).  See \xref{sec:concepts.lifecycle.birth} for the
 705 details.  An initializer item must have either an @|initarg| property, or an
 706 initializer expression, or both.
 707
 708 Each class may define at most one initializer item with an explicit
 709 initializer expression for a given slot.
 710
 711 \subsubsection{Initarg items}
 712 \begin{grammar}
 713 <initarg-item> ::=
 714   "initarg"
 715   @<declaration-specifier>^+
 716   <list>$[\mbox{@<init-declarator>}]$ ";"
 717 \end{grammar}
 718
 719 \subsubsection{Fragment items}
 720 \begin{grammar}
 721 <fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
 722
 723 <fragment-kind> ::= "init" | "teardown"
 724 \end{grammar}
 725
 726 \subsubsection{Message items}
 727 \begin{grammar}
 728 <message-item> ::=
 729   @<declaration-specifier>^+
 730   <keyword-declarator>$[\mbox{@<identifier>}]$
 731   @[<method-body>@]
 732 \end{grammar}
 733
 734 \subsubsection{Method items}
 735 \begin{grammar}
 736 <method-item> ::=
 737   @<declaration-specifier>^+
 738   <keyword-declarator>$[\mbox{@<dotted-name>}]$
 739   <method-body>
 740
 741 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 742 \end{grammar}
 743
 744 %%%----- That's all, folks --------------------------------------------------
 745
 746 %%% Local variables:
 747 %%% mode: LaTeX
 748 %%% TeX-master: "sod.tex"
 749 %%% TeX-PDF-mode: t
 750 %%% End: