chiark - git - mdw - sod/blob - doc/syntax.tex

   1 %%% -*-latex-*-
   2 %%%
   3 %%% Module syntax
   4 %%%
   5 %%% (c) 2015 Straylight/Edgeware
   6 %%%
   7
   8 %%%----- Licensing notice ---------------------------------------------------
   9 %%%
  10 %%% This file is part of the Sensible Object Design, an object system for C.
  11 %%%
  12 %%% SOD is free software; you can redistribute it and/or modify
  13 %%% it under the terms of the GNU General Public License as published by
  14 %%% the Free Software Foundation; either version 2 of the License, or
  15 %%% (at your option) any later version.
  16 %%%
  17 %%% SOD is distributed in the hope that it will be useful,
  18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 %%% GNU General Public License for more details.
  21 %%%
  22 %%% You should have received a copy of the GNU General Public License
  23 %%% along with SOD; if not, write to the Free Software Foundation,
  24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 \chapter{Module syntax} \label{ch:syntax}
  27
  28 %%%--------------------------------------------------------------------------
  29 \section{Lexical syntax} \label{sec:syntax.lex}
  30
  31 Whitespace and comments are discarded.  The remaining characters are
  32 collected into tokens according to the following syntax.
  33
  34 \begin{grammar}
  35 <token> ::= <identifier>
  36 \alt <string-literal>
  37 \alt <char-literal>
  38 \alt <integer-literal>
  39 \alt <punctuation>
  40 \end{grammar}
  41
  42 This syntax is slightly ambiguous, and is disambiguated by the \emph{maximal
  43 munch} rule: at each stage we take the longest sequence of characters which
  44 could be a token.
  45
  46
  47 \subsection{Identifiers} \label{sec:syntax.lex.id}
  48
  49 \begin{grammar}
  50 <identifier> ::= <id-start-char> @<id-body-char>^*
  51
  52 <id-start-char> ::= <alpha-char> | "_"
  53
  54 <id-body-char> ::= <id-start-char> @! <digit-char>
  55
  56 <alpha-char> ::= "A" | "B" $| \cdots |$ "Z"
  57   | "a" | "b" $| \cdots |$ "z"
  58   | <extended-alpha-char>
  59
  60 <digit-char> ::= "0" | <nonzero-digit-char>
  61
  62 <nonzero-digit-char> ::= "1" | "2" $| \cdots |$ "9"
  63 \end{grammar}
  64
  65 The precise definition of @<alpha-char> is left to the function
  66 @|alpha-char-p| in the hosting Lisp system.  For portability, programmers are
  67 encouraged to limit themselves to the standard ASCII letters.
  68
  69 There are no reserved words at the lexical level, but the higher-level syntax
  70 recognizes certain identifiers as \emph{keywords} in some contexts.  There is
  71 also an ambiguity (inherited from C) in the declaration syntax which is
  72 settled by distinguishing type names from other identifiers at a lexical
  73 level.
  74
  75
  76 \subsection{String and character literals} \label{sec:syntax.lex.string}
  77
  78 \begin{grammar}
  79 <string-literal> ::= "\"" @<string-literal-char>^* "\""
  80
  81 <char-literal> ::= "'" <char-literal-char> "'"
  82
  83 <string-literal-char> :: "\\" <char>
  84   | any character other than "\\" or "\""
  85
  86 <char-literal-char> :: "\\" <char>
  87   | any character other than "\\" or "'"
  88
  89 <char> ::= any single character
  90 \end{grammar}
  91
  92 The syntax for string and character literals differs from~C.  In particular,
  93 escape sequences such as @`\textbackslash n' are not recognized.  The use
  94 of string and character literals in Sod, outside of C~fragments, is limited,
  95 and the simple syntax seems adequate.  For the sake of future compatibility,
  96 the use of character sequences which resemble C escape sequences is
  97 discouraged.
  98
  99
 100 \subsection{Integer literals} \label{sec:syntax.lex.int}
 101
 102 \begin{grammar}
 103 <integer-literal> ::= <decimal-integer>
 104   | <binary-integer>
 105   | <octal-integer>
 106   | <hex-integer>
 107
 108 <decimal-integer> ::= "0" | <nonzero-digit-char> @<digit-char>^*
 109
 110 <binary-integer> ::= "0" @("b" @! "B"@) @<binary-digit-char>^+
 111
 112 <binary-digit-char> ::= "0" | "1"
 113
 114 <octal-integer> ::= "0" @["o" @! "O"@] @<octal-digit-char>^+
 115
 116 <octal-digit-char> ::= "0" | "1" $| \cdots |$ "7"
 117
 118 <hex-integer> ::= "0" @("x" @! "X"@) @<hex-digit-char>^+
 119
 120 <hex-digit-char> ::= <digit-char>
 121   | "A" | "B" | "C" | "D" | "E" | "F"
 122   | "a" | "b" | "c" | "d" | "e" | "f"
 123 \end{grammar}
 124
 125 Sod understands only integers, not floating-point numbers; its integer syntax
 126 goes slightly beyond C in allowing a @`0o' prefix for octal and @`0b' for
 127 binary.  However, length and signedness indicators are not permitted.
 128
 129
 130 \subsection{Punctuation} \label{sec:syntax.lex.punct}
 131
 132 \begin{grammar}
 133 <punctuation> ::= "<<" | ">>" | "||" | "&&"
 134   | "<=" | ">=" | "==" | "!=" | "\dots"
 135 \alt any nonalphanumeric character other than "_", "\"", or "'"
 136 \end{grammar}
 137
 138
 139 \subsection{Comments} \label{sec:syntax.lex.comment}
 140
 141 \begin{grammar}
 142 <comment> ::= <block-comment> | <line-comment>
 143
 144 <block-comment> ::=
 145   "/*"
 146   @<not-star>^* @(@<star>^+ <not-star-or-slash> @<not-star>^*@)^*
 147   @<star>^*
 148   "*/"
 149
 150 <star> ::= "*"
 151
 152 <not-star> ::= any character other than "*"
 153
 154 <not-star-or-slash> ::= any character other than "*" or  "/"
 155
 156 <line-comment> ::= "/\,/" @<not-newline>^* <newline>
 157
 158 <newline> ::= a newline character
 159
 160 <not-newline> ::= any character other than newline
 161 \end{grammar}
 162
 163 Comments are exactly as in C99: both traditional block comments `@|/*| \dots\
 164 @|*/|' and \Cplusplus-style `@|/\,/| \dots' comments are permitted and
 165 ignored.
 166
 167
 168 \subsection{Special nonterminals} \label{sec:syntax.lex.special}
 169
 170 Aside from the lexical syntax presented above (\xref{sec:lexical-syntax}),
 171 two special nonterminals occur in the module syntax.
 172
 173 \subsubsection{S-expressions}
 174 \begin{grammar}
 175 <s-expression> ::= an S-expression, as parsed by the Lisp reader
 176 \end{grammar}
 177
 178 When an S-expression is expected, the Sod parser simply calls the host Lisp
 179 system's @|read| function.  Sod modules are permitted to modify the read
 180 table to extend the S-expression syntax.
 181
 182 S-expressions are self-delimiting, so no end-marker is needed.
 183
 184 \subsubsection{C fragments}
 185 \begin{grammar}
 186 <c-fragment> ::= a sequence of C tokens, with matching brackets
 187 \end{grammar}
 188
 189 Sequences of C code are simply stored and written to the output unchanged
 190 during translation.  They are read using a simple scanner which nonetheless
 191 understands C comments and string and character literals.
 192
 193 A C fragment is terminated by one of a small number of delimiter characters
 194 determined by the immediately surrounding context -- usually some kind of
 195 bracket.  The first such delimiter character which is not enclosed in
 196 brackets, braces or parentheses ends the fragment.
 197
 198 %%%--------------------------------------------------------------------------
 199 \section{C types} \label{sec:syntax.type}
 200
 201 Sod's syntax for C types closely mirrors the standard C syntax.  A C type has
 202 two parts: a sequence of @<declaration-specifier>s and a @<declarator>.  In
 203 Sod, a type must contain at least one @<declaration-specifier> (i.e.,
 204 `implicit @|int|' is forbidden), and storage-class specifiers are not
 205 recognized.
 206
 207
 208 \subsection{Declaration specifiers} \label{sec:syntax.type.declspec}
 209
 210 \begin{grammar}
 211 <declaration-specifier> ::= <type-name>
 212 \alt "struct" <identifier> | "union" <identifier> | "enum" <identifier>
 213 \alt "void" | "char" | "int" | "float" | "double"
 214 \alt "short" | "long"
 215 \alt "signed" | "unsigned"
 216 \alt "bool" | "_Bool"
 217 \alt "imaginary" | "_Imaginary" | "complex" | "_Complex"
 218 \alt <qualifier>
 219 \alt <storage-specifier>
 220 \alt <atomic-type>
 221 \alt <other-declspec>
 222
 223 <qualifier> ::= <atomic> | "const" | "volatile" | "restrict"
 224
 225 <plain-type> ::= @<declaration-specifier>^+ <abstract-declarator>
 226
 227 <atomic-type> ::= <atomic> "(" <plain-type> ")"
 228
 229 <atomic> ::= "atomic" | "_Atomic"
 230
 231 <storage-specifier> ::= <alignas> "(" <c-fragment> ")"
 232
 233 <alignas> ::= "alignas" "_Alignas"
 234
 235 <type-name> ::= <identifier>
 236 \end{grammar}
 237
 238 Declaration specifiers may appear in any order.  However, not all
 239 combinations are permitted.  A declaration specifier must consist of zero or
 240 more @<qualifier>s, zero or more @<storage-specifier>s, and one of the
 241 following, up to reordering:
 242 \begin{itemize}
 243 \item @<type-name>;
 244 \item @<atomic-type>;
 245 \item @"struct" @<identifier>; @"union" @<identifier>; @"enum" @<identifier>;
 246 \item @"void";
 247 \item @"_Bool", @"bool";
 248 \item @"char"; @"unsigned char"; @"signed char";
 249 \item @"short", @"signed short", @"short int", @"signed short int";
 250   @"unsigned short", @"unsigned short int";
 251 \item @"int", @"signed", @"signed int"; @"unsigned", @"unsigned int";
 252 \item @"long", @"signed long", @"long int", @"signed long int"; @"unsigned
 253   long", @"unsigned long int";
 254 \item @"long long", @"signed long long", @"long long int", @"signed long long
 255   int"; @"unsigned long long", @"unsigned long long int";
 256 \item @"float"; @"double"; @"long double";
 257 \item @"float _Imaginary", @"float imaginary"; @"double _Imaginary", @"double
 258   imaginary"; @"long double _Imaginary", @"long double imaginary";
 259 \item @"float _Complex", @"float complex"; @"double _Complex", @"double
 260   complex"; @"long double _Complex", @"long double complex".
 261 \end{itemize}
 262 All of these have their usual C meanings.  Groups separated by commas mean
 263 the same thing, and Sod will not preserve the distinction.
 264
 265 Almost all of these mean the same as they do in C.  There are some minor
 266 differences:
 267 \begin{itemize}
 268 \item In C, the `tag' namespace is shared between @|struct|, @|union|, and
 269   @|enum|; Sod has three distinct namespaces for tags.  This may be fixed in
 270   the future.
 271 \item The @<other-declspec> production is a syntactic extension point, where
 272   extensions can introduce their own additions to the type system.
 273 \end{itemize}
 274
 275 C standards from C99 onwards have tended to introduce new keywords beginning
 276 with an underscore followed by an uppercase letter, so as to avoid conflicts
 277 with existing code.  More conventional spellings are then provided by macros
 278 in new header files.  For example, C99 introduced @"_Bool", and a header file
 279 @|<stdbool.h>| which defines the macro @|bool|.  Sod recognizes both the ugly
 280 underscore names and the more conventional macro names on input, but always
 281 emits the ugly names.  This doesn't cause a compatibility problem in Sod,
 282 because Sod's parser recognizes keywords only in the appropriate context.
 283 For example, the (ill-advised) slot declaration
 284 \begin{prog}
 285   bool bool;
 286 \end{prog}
 287 is completely acceptable, and will cause the C structure member
 288 \begin{prog}
 289   \_Bool bool;
 290 \end{prog}
 291 to be emitted on output, which will be acceptable to C as long as
 292 @|<stdbool.h>| is not included.
 293
 294 A @<type-name> is an identifier which has been declared as being a type name,
 295 using the @"typename" or @"class" definitions.  The following type names are
 296 defined in the built-in module.
 297 \begin{itemize}
 298 \item @|va_list|
 299 \item @|size_t|
 300 \item @|ptrdiff_t|
 301 \item @|wchar_t|
 302 \end{itemize}
 303
 304
 305 \subsection{Declarators} \label{sec:syntax.type.declarator}
 306
 307 \begin{grammar}
 308 <declarator>$[k, a]$ ::= @<pointer>^* <primary-declarator>$[k, a]$
 309
 310 <primary-declarator>$[k, a]$ ::= $k$
 311 \alt "(" <primary-declarator>$[k, a]$ ")"
 312 \alt <primary-declarator>$[k, a]$ @<declarator-suffix>$[a]$
 313
 314 <pointer> ::= "*" @<qualifier>^*
 315
 316 <declarator-suffix>$[a]$ ::= "[" <c-fragment> "]"
 317 \alt "(" $a$ ")"
 318
 319 <argument-list> ::= $\epsilon$ | "\dots"
 320 \alt <list>$[\mbox{@<argument>}]$ @["," "\dots"@]
 321
 322 <argument> ::= @<declaration-specifier>^+ <argument-declarator>
 323
 324 <abstract-declarator> ::= <declarator>$[\epsilon, \mbox{@<argument-list>}]$
 325
 326 <argument-declarator> ::=
 327   <declarator>$[\mbox{@<identifier> @! $\epsilon$}, \mbox{@<argument-list>}]$
 328
 329 <simple-declarator> ::=
 330   <declarator>$[\mbox{@<identifier>}, \mbox{@<argument-list>}]$
 331 \end{grammar}
 332
 333 The declarator syntax is taken from C, but with some differences.
 334 \begin{itemize}
 335 \item Array dimensions are uninterpreted @<c-fragments>, terminated by a
 336   closing square bracket.  This allows array dimensions to contain arbitrary
 337   constant expressions.
 338 \item A declarator may have either a single @<identifier> at its centre or a
 339   pair of @<identifier>s separated by a @`.'; this is used to refer to
 340   slots or messages defined in superclasses.
 341 \end{itemize}
 342 The remaining differences are (I hope) a matter of presentation rather than
 343 substance.
 344
 345 There is additional syntax to support messages and methods which accept
 346 keyword arguments.
 347
 348 \begin{grammar}
 349 <keyword-argument> ::= <argument> @["=" <c-fragment>@]
 350
 351 <keyword-argument-list> ::=
 352   @[<list>$[\mbox{@<argument>}]$@]
 353   "?" @[<list>$[\mbox{@<keyword-argument>}]$@]
 354
 355 <method-argument-list> ::= <argument-list> @! <keyword-argument-list>
 356
 357 <dotted-name> ::= <identifier> "." <identifier>
 358
 359 <keyword-declarator>$[k]$ ::=
 360   <declarator>$[k, \mbox{@<method-argument-list>}]$
 361 \end{grammar}
 362
 363 %%%--------------------------------------------------------------------------
 364 \section{Properties} \label{sec:syntax.prop}
 365
 366 \begin{grammar}
 367 <properties> ::= "[" <list>$[\mbox{@<property>}]$ "]"
 368
 369 <property> ::= <identifier> "=" <expression>
 370
 371 <expression> ::= <logical-or>
 372
 373 <logical-or> ::= <logical-and>
 374   | <logical-or> "||" <logical-and>
 375
 376 <logical-and> ::= <bitwise-or>
 377   | <logical-and> "&&" <bitwise-or>
 378
 379 <bitwise-or> ::= <bitwise-xor>
 380   | <bitwise-or> "|" <bitwise-xor>
 381
 382 <bitwise-xor> ::= <bitwise-and>
 383   | <bitwise-xor> "^" <bitwise-and>
 384
 385 <bitwise-and> ::= <equality>
 386   | <bitwise-and> "&" <equality>
 387
 388 <equality> ::= <ordering>
 389   | <equality> "==" <ordering>
 390   | <equality> "!=" <ordering>
 391
 392 <ordering> ::= <shift>
 393   | <ordering> "<" <shift>
 394   | <ordering> "<=" <shift>
 395   | <ordering> ">=" <shift>
 396   | <ordering> ">" <shift>
 397
 398 <shift> ::= <additive>
 399   | <shift> "<<" <additive>
 400   | <shift> ">>" <additive>
 401
 402 <additive> ::= <term>
 403   | <additive> "+" <term>
 404   | <additive> "--" <term>
 405
 406 <term> ::= <factor>
 407   | <term> "*" <factor>
 408   | <term> "/" <factor>
 409
 410 <factor> ::= <primary>
 411   | "!" <factor> | "~" factor
 412   | "+" <factor> | "--" <factor>
 413
 414 <primary> ::=
 415      <integer-literal> | <string-literal> | <char-literal> | <identifier>
 416 \alt "<" <plain-type> ">" | "{" <c-fragment> "}" | "?" <s-expression>
 417   | "(" <expression> ")"
 418 \end{grammar}
 419
 420 \emph{Property sets} are a means for associating miscellaneous information
 421 with compile-time metaobjects such as modules, classes, messages, methods,
 422 slots, and initializers.  By using property sets, additional information can
 423 be passed to extensions without the need to introduce idiosyncratic syntax.
 424 (That said, extensions can add additional first-class syntax, if necessary.)
 425
 426 An error is reported if an unrecognized property is associated with an
 427 object.
 428
 429
 430 \subsection{Property values} \label{sec:syntax.prop.value}
 431
 432 A property has a name, given as an @<identifier>, and a value computed by
 433 evaluating an @<expression>.  The value can be one of a number of types.
 434
 435 \begin{itemize}
 436
 437 \item An @<integer-literal> denotes a value of type @|int|.
 438
 439 \item Similarly @<string-literal> and @<char-literal> denote @|string| and
 440   @|char| values respectively.  Note that, as properties, characters are
 441   quite distinct from integers, whereas in C, a character literal denotes a
 442   value of type @|int|.
 443
 444 \item There are no variables in the property-value syntax.  Rather, an
 445   @<identifier> denotes that identifier, as a value of type @|id|.
 446
 447 \item A C type (a @<plain-type>, as described in \xref{sec:syntax.type})
 448   between angle brackets, e.g., @|<int>|, or @|<char *>|, or @|<void (*(int,
 449   void (*)(int)))(int)>|, denotes that C type, as a value of type @|type|.
 450
 451 \item A @<c-fragment> within braces denotes the tokens between (and not
 452   including) the braces, as a value of type @|c-fragment|.
 453
 454 \end{itemize}
 455
 456 As shown in the grammar, there are four binary operators, @"+" (addition),
 457 @"--" (subtraction), @"*" (multiplication), and @"/" (division);
 458 multiplication and division have higher precedence than addition and
 459 subtraction, and operators of the same precedence associate left-to-right.
 460 There are also unary @"+" (no effect) and @"--" (negation) operators, with
 461 higher precedence.  All of the above operators act only on integer operands
 462 and yield integer results.  (Although the unary @"+" operator yields its
 463 operand unchanged, an error is still reported if it is applied to a
 464 non-integer value.)  There are currently no bitwise, logical, or comparison
 465 operators.
 466
 467 Finally, an S-expression preceded by @|?| causes the expression to be read in
 468 the current package (which is always @|sod-user| at the start of a module)
 469 and immediately evaluated (using @|eval|); the resulting value is converted
 470 into a property value using the \descref{gf}{decode-property}[generic
 471 function].
 472
 473
 474 \subsection{Property output types and coercions}
 475 \label{sec:syntax.prop.coerce}
 476
 477 When a property value is inspected by the Sod translator, or an extension, it
 478 is \emph{coerced} so as to conform to a requested output type.  This coercion
 479 process is performed by the \descref{gf}{coerce-property-value}[generic
 480 function], and additional output types and coercions can be defined by
 481 extensions.  The built-in output types coercions, from the value types listed
 482 above, are as follows.
 483
 484 \begin{itemize}
 485
 486 \item The output types @|int|, @|string|, @|char|, @|id|, and @|c-fragment|
 487   correspond to the like-named value types described above.  No coercions to
 488   these output types are defined for the described value types.\footnote{%
 489     There is a coercion to @|id| from the value type @|symbol|, but it is
 490     only possible to generate a property value of type @|symbol| using Lisp.}
 491
 492 \item The output type @|type| denotes a C type, as does the value type
 493   @|type|.  In addition, a value of type @|id| can be coerced to a C type if
 494   it is the name of a class, a type name explicitly declared by @|typename|,
 495   or it is one of: @|bool|, @|_Bool|, @|void|, @|char|, @|short|, @|int|,
 496   @|signed|, @|unsigned|, @|long|, @|size_t|, @|ptrdiff_t|, @|wchar_t|,
 497   or @|va_list|.
 498
 499 \item The @|boolean| output type denotes a boolean value, which may be either
 500   true or false.  A value of type @|id| is considered true if it is @|true|,
 501   @|t|, @|yes|, @|on|, @|yup|, or @|verily|; or false if it is @|false|,
 502   @|nil|, @|no|, @|off|, @|nope|, or @|nowise|; it is erroneous to provide
 503   any other identifier where a boolean value is wanted.  A value of type
 504   @|int| is considered true if it is nonzero, or false if it is zero.
 505
 506 \item The @|symbol| output type denotes a Lisp symbol.
 507
 508   A value of type @|id| is coerced to a symbol as follows.  First, the
 509   identifier name is subjected to \emph{case inversion}: if all of the
 510   letters in the name have the same case, either upper or lower, then they
 511   are replaced with the corresponding letters in the opposite case, lower or
 512   upper; if the name contains letters of both cases, then it is not changed.
 513   For example, @|foo45| becomes @|FOO45|, or \emph{vice-versa}; but @|Splat|
 514   remains as it is.  Second, the name is subjected to \emph{separator
 515   switching}: all underscores in the name are replaced with hyphens (and
 516   \emph{vice-versa}, though hyphens aren't permitted in identifiers in the
 517   first place).  Finally, the resulting name is interned in the current
 518   package, which will usually be @|sod-user| unless changed explicitly by the
 519   module.
 520
 521   A value of type @|string| is coerced to a symbol as follows.  If the string
 522   contains no colons, then it is case-inverted (but not separator-switched)
 523   and interned in the current package.  Otherwise, the string either has the
 524   form $p @|:| q$, where $q$ does not begin with a colon (the
 525   \emph{single-colon} case) or $p @|::| q$ (the \emph{double-colon} case);
 526   where $p$ does not contain a colon.  Both $p$ and $q$ are case-inverted
 527   (but not separator-switched).  If $p$ does not name a package, then an
 528   error is reported; as a special case, if $p$ is empty, then it is
 529   considered to name the @|keyword| package.  Otherwise, $q$ is looked up as
 530   a symbol name in package~$p$; in the single-colon case, if the symbol is
 531   not an exported symbol in package~$p$, then an error is reported; in the
 532   double-colon case, $q$ is interned in package~$p$ (and so there needn't be
 533   an exported symbol -- or, indeed, and symbol at all -- named $q$
 534   beforehand).
 535
 536 \item The @|keyword| output type denotes symbols within the @|keyword|
 537   package.  Value of type @|id| or @|string| can be coerced to a @|keyword|
 538   in the same way as to a @|symbol|, as described above, only the converted
 539   name is looked up in the @|keyword| package rather than the current
 540   package.  (A @|string| can override this by specifying an explicit package
 541   name, but this is unlikely to be very helpful.)
 542
 543 \end{itemize}
 544
 545 %%%--------------------------------------------------------------------------
 546 \section{Module syntax} \label{sec:syntax.module}
 547
 548 \begin{grammar}
 549 <module> ::= @<definition>^*
 550
 551 <definition> ::= <property-definition> \fixme{undefined}
 552 \alt <import-definition>
 553 \alt <load-definition>
 554 \alt <lisp-definition>
 555 \alt <code-definition>
 556 \alt <typename-definition>
 557 \alt <class-definition>
 558 \alt <other-definition> \fixme{undefined}
 559 \end{grammar}
 560
 561 A @<module> is the top-level syntactic item: a source file presented to Sod
 562 is expected to conform with the @<module> syntax.
 563
 564 A module consists of a sequence of definitions.
 565
 566 \fixme{describe syntax; expand}
 567 Properties:
 568 \begin{description}
 569 \item[@|module_class|] A symbol naming the Lisp class to use to
 570   represent the module.
 571 \item[@|guard|] An identifier to use as the guard symbol used to prevent
 572   multiple inclusion in the header file.
 573 \end{description}
 574
 575
 576 \subsection{Simple definitions} \label{sec:syntax.module.simple}
 577
 578 \subsubsection{Importing modules}
 579 \begin{grammar}
 580 <import-definition> ::= "import" <string> ";"
 581 \end{grammar}
 582
 583 The module named @<string> is processed and its definitions made available.
 584
 585 A search is made for a module source file as follows.
 586 \begin{itemize}
 587 \item The module name @<string> is converted into a filename by appending
 588   @`.sod', if it has no extension already.\footnote{%
 589     Technically, what happens is @|(merge-pathnames name (make-pathname :type
 590     "SOD" :case :common))|, so exactly what this means varies according to
 591     the host system.} %
 592 \item The file is looked for relative to the directory containing the
 593   importing module.
 594 \item If that fails, then the file is looked for in each directory on the
 595   module search path in turn.
 596 \item If the file still isn't found, an error is reported and the import
 597   fails.
 598 \end{itemize}
 599 At this point, if the file has previously been imported, nothing further
 600 happens.\footnote{%
 601   This check is done using @|truename|, so it should see through simple
 602   tricks like symbolic links.  However, it may be confused by fancy things
 603   like bind mounts and so on.} %
 604
 605 Recursive imports, either direct or indirect, are an error.
 606
 607 \subsubsection{Loading extensions}
 608 \begin{grammar}
 609 <load-definition> ::= "load" <string> ";"
 610 \end{grammar}
 611
 612 The Lisp file named @<string> is loaded and evaluated.
 613
 614 A search is made for a Lisp source file as follows.
 615 \begin{itemize}
 616 \item The name @<string> is converted into a filename by appending @`.lisp',
 617   if it has no extension already.\footnote{%
 618     Technically, what happens is @|(merge-pathnames name (make-pathname :type
 619     "LISP" :case :common))|, so exactly what this means varies according to
 620     the host system.} %
 621 \item A search is then made in the same manner as for module imports
 622   (\xref{sec:syntax-module}).
 623 \end{itemize}
 624 If the file is found, it is loaded using the host Lisp's @|load| function.
 625
 626 Note that Sod doesn't attempt to compile Lisp files, or even to look for
 627 existing compiled files.  The right way to package a substantial extension to
 628 the Sod translator is to provide the extension as a standard ASDF system (or
 629 similar) and leave a dropping @|foo-extension.lisp| in the module path saying
 630 something like
 631 \begin{prog}
 632   (asdf:load-system :foo-extension)
 633 \end{prog}
 634 which will arrange for the extension to be compiled if necessary.
 635
 636 (This approach means that the language doesn't need to depend on any
 637 particular system definition facility.  It's bad enough already that it
 638 depends on Common Lisp.)
 639
 640 \subsubsection{Lisp escapes}
 641 \begin{grammar}
 642 <lisp-definition> ::= "lisp" <s-expression> ";"
 643 \end{grammar}
 644
 645 The @<s-expression> is evaluated immediately.  It can do anything it likes.
 646
 647 \begin{boxy}[Warning!]
 648   This means that hostile Sod modules are a security hazard.  Lisp code can
 649   read and write files, start other programs, and make network connections.
 650   Don't install Sod modules from sources that you don't trust.\footnote{%
 651     Presumably you were going to run the corresponding code at some point, so
 652     this isn't as unusually scary as it sounds.  But please be careful.} %
 653 \end{boxy}
 654
 655 \subsubsection{Declaring type names}
 656 \begin{grammar}
 657 <typename-definition> ::=
 658   "typename" <list>$[\mbox{@<identifier>}]$ ";"
 659 \end{grammar}
 660
 661 Each @<identifier> is declared as naming a C type.  This is important because
 662 the C type syntax -- which Sod uses -- is ambiguous, and disambiguation is
 663 done by distinguishing type names from other identifiers.
 664
 665 Don't declare class names using @"typename"; use @"class" forward
 666 declarations instead.
 667
 668
 669 \subsection{Literal code} \label{sec:syntax.module.literal}
 670
 671 \begin{grammar}
 672 <code-definition> ::=
 673   "code" <reason> ":" <item-name> @[<constraints>@]
 674   "{" <c-fragment> "}"
 675 \alt
 676   "code" <reason> ":" <constraints> ";"
 677
 678 <reason> ::= <identifier>
 679
 680 <constraints> ::= "[" <list>$[\mbox{@<constraint>}]$ "]"
 681
 682 <constraint> ::= @<item-name>^+
 683
 684 <item-name> ::= <identifier> @! "(" @<identifier>^+ ")"
 685 \end{grammar}
 686
 687 The @<c-fragment> will be output unchanged to one of the output files.
 688
 689 The first @<identifier> is the symbolic name of an output file.  Predefined
 690 output file names are @|c| and @|h|, which are the implementation code and
 691 header file respectively; other output files can be defined by extensions.
 692
 693 Output items are named with a sequence of identifiers, separated by
 694 whitespace, and enclosed in parentheses.  As an abbreviation, a name
 695 consisting of a single identifier may be written as just that identifier,
 696 without the parentheses.
 697
 698 The @<constraints> provide a means for specifying where in the output file
 699 the output item should appear.  (Note the two kinds of square brackets shown
 700 in the syntax: square brackets must appear around the constraints if they are
 701 present, but that they may be omitted.)  Each comma-separated @<constraint>
 702 is a sequence of names of output items, and indicates that the output items
 703 must appear in the order given -- though the translator is free to insert
 704 additional items in between them.  (The particular output items needn't be
 705 defined already -- indeed, they needn't be defined ever.)
 706
 707 There is a predefined output item @|includes| in both the @|c| and @|h|
 708 output files which is a suitable place for inserting @|\#include|
 709 preprocessor directives in order to declare types and functions for use
 710 elsewhere in the generated output files.
 711
 712
 713 \subsection{Class definitions} \label{sec:syntax.module.class}
 714
 715 \begin{grammar}
 716 <class-definition> ::= <class-forward-declaration>
 717 \alt <full-class-definition>
 718 \end{grammar}
 719
 720 \subsubsection{Forward declarations}
 721 \begin{grammar}
 722 <class-forward-declaration> ::= "class" <identifier> ";"
 723 \end{grammar}
 724
 725 A @<class-forward-declaration> informs Sod that an @<identifier> will be used
 726 to name a class which is currently undefined.  Forward declarations are
 727 necessary in order to resolve certain kinds of circularity.  For example,
 728 \begin{prog}
 729 class Sub;                                                      \\+
 730
 731 class Super: SodObject \{                                       \\ \ind
 732   Sub *sub;                                                   \-\\
 733 \};                                                             \\+
 734
 735 class Sub: Super \{                                             \\ \ind
 736   /* \dots\ */                                                \-\\
 737 \};
 738 \end{prog}
 739
 740 \subsubsection{Full class definitions}
 741 \begin{grammar}
 742 <full-class-definition> ::=
 743   @[<properties>@]
 744   "class" <identifier> ":" <list>$[\mbox{@<identifier>}]$
 745   "{" @<properties-class-item>^* "}"
 746
 747 <properties-class-item> ::= @[<properties>@] <class-item>
 748
 749 <class-item> ::= <slot-item>
 750 \alt <initializer-item>
 751 \alt <initarg-item>
 752 \alt <fragment-item>
 753 \alt <message-item>
 754 \alt <method-item>
 755 \alt <other-item> \fixme{undefined}
 756 \end{grammar}
 757
 758 A full class definition provides a complete description of a class.
 759
 760 The first @<identifier> gives the name of the class.  It is an error to
 761 give the name of an existing class (other than a forward-referenced class),
 762 or an existing type name.  It is conventional to give classes `MixedCase'
 763 names, to distinguish them from other kinds of identifiers.
 764
 765 The @<list>$[\mbox{@<identifier>}]$ names the direct superclasses for the new
 766 class.  It is an error if any of these @<identifier>s does not name a defined
 767 class.  The superclass list is required, and must not be empty; listing
 768 @|SodObject| as your class's superclass is a good choice if nothing else
 769 seems suitable.  A class with no direct superclasses is called a \emph{root
 770 class}.  It is not possible to define a root class in the Sod language: you
 771 must use Lisp to do this, and it's quite involved.
 772
 773 The @<properties> provide additional information.  The standard class
 774 properties are as follows.
 775 \begin{description}
 776 \item[@|lisp_class|] The name of the Lisp class to use within the translator
 777   to represent this class.  The property value must be an identifier; the
 778   default is @|sod_class|.  Extensions may define classes with additional
 779   behaviour, and may recognize additional class properties.
 780 \item[@|metaclass|] The name of the Sod metaclass for this class.  In the
 781   generated code, a class is itself an instance of another class -- its
 782   \emph{metaclass}.  The metaclass defines which slots the class will have,
 783   which messages it will respond to, and what its behaviour will be when it
 784   receives them.  The property value must be an identifier naming a defined
 785   subclass of @|SodClass|.  The default metaclass is @|SodClass|.
 786   See \xref{sec:concepts.metaclasses} for more details.
 787 \item[@|nick|] A nickname for the class, to be used to distinguish it from
 788   other classes in various limited contexts.  The property value must be an
 789   identifier; the default is constructed by forcing the class name to
 790   lower-case.
 791 \end{description}
 792
 793 The class body consists of a sequence of @<class-item>s enclosed in braces.
 794 These items are discussed on the following sections.
 795
 796 \subsubsection{Slot items}
 797 \begin{grammar}
 798 <slot-item> ::=
 799   @<declaration-specifier>^+ <list>$[\mbox{@<init-declarator>}]$ ";"
 800
 801 <init-declarator> ::= <simple-declarator> @["=" <initializer>@]
 802 \end{grammar}
 803
 804 A @<slot-item> defines one or more slots.  All instances of the class and any
 805 subclass will contain these slot, with the names and types given by the
 806 @<declaration-specifiers> and the @<declarators>.  Slot declarators may not
 807 contain dotted names.
 808
 809 It is not possible to declare a slot with function type: such an item is
 810 interpreted as being a @<message-item> or @<method-item>.  Pointers to
 811 functions are fine.
 812
 813 Properties:
 814 \begin{description}
 815 \item[@|slot_class|] A symbol naming the Lisp class to use to represent the
 816   direct slot.
 817 \item[@|initarg|] An identifier naming an initialization argument which can
 818   be used to provide a value for the slot.  See
 819   \xref{sec:concepts.lifecycle.birth} for the details.
 820 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 821   the initarg.  Only permitted if @|initarg| is also set.
 822 \end{description}
 823
 824 An @<initializer>, if present, is treated as if a separate
 825 @<initializer-item> containing the slot name and initializer were present.
 826 For example,
 827 \begin{prog}
 828 [nick = eg]                                                     \\
 829 class Example: Super \{                                         \\ \ind
 830   int foo = 17;                                               \-\\
 831 \};
 832 \end{prog}
 833 means the same as
 834 \begin{prog}
 835 [nick = eg]                                                     \\
 836 class Example: Super \{                                         \\ \ind
 837   int foo;                                                      \\
 838   eg.foo = 17;                                                \-\\
 839 \};
 840 \end{prog}
 841
 842 \subsubsection{Initializer items}
 843 \begin{grammar}
 844 <initializer-item> ::= @["class"@] <list>$[\mbox{@<slot-initializer>}]$ ";"
 845
 846 <slot-initializer> ::= <dotted-name> @["=" <initializer>@]
 847
 848 <initializer> ::= <c-fragment>
 849 \end{grammar}
 850
 851 An @<initializer-item> provides an initial value for one or more slots.  If
 852 prefixed by @|class|, then the initial values are for class slots (i.e.,
 853 slots of the class object itself); otherwise they are for instance slots.
 854
 855 The first component of the @<dotted-name> must be the nickname of one of the
 856 class's superclasses (including itself); the second must be the name of a
 857 slot defined in that superclass.
 858
 859 Properties:
 860 \begin{description}
 861 \item[@|initializer_class|] A symbol naming the Lisp class to use to
 862   represent the initializer.
 863 \item[@|initarg|] An identifier naming an initialization argument which can
 864   be used to provide a value for the slot.  See
 865   \xref{sec:concepts.lifecycle.birth} for the details.  An initializer item
 866   must have either an @|initarg| property, or an initializer expression, or
 867   both.
 868 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 869   the initarg.  Only permitted if @|initarg| is also set.
 870 \end{description}
 871
 872 Each class may define at most one initializer item with an explicit
 873 initializer expression for a given slot.
 874
 875 \subsubsection{Initarg items}
 876 \begin{grammar}
 877 <initarg-item> ::=
 878   "initarg"
 879   @<declaration-specifier>^+
 880   <list>$[\mbox{@<init-declarator>}]$ ";"
 881 \end{grammar}
 882 Properties:
 883 \begin{description}
 884 \item[@|initarg_class|] A symbol naming the Lisp class to use to represent
 885   the initarg.
 886 \end{description}
 887
 888 \subsubsection{Fragment items}
 889 \begin{grammar}
 890 <fragment-item> ::= <fragment-kind> "{" <c-fragment> "}"
 891
 892 <fragment-kind> ::= "init" | "teardown"
 893 \end{grammar}
 894
 895 \subsubsection{Message items}
 896 \begin{grammar}
 897 <message-item> ::=
 898   @<declaration-specifier>^+
 899   <keyword-declarator>$[\mbox{@<identifier>}]$
 900   @[<method-body>@]
 901 \end{grammar}
 902 Properties:
 903 \begin{description}
 904 \item[@|message_class|] A symbol naming the Lisp class to use to represent
 905   the message.
 906 \item[@|combination|] A keyword naming the aggregating method combination to
 907   use.
 908 \item[@|most_specific|] A keyword, either @`first' or @`last', according to
 909   whether the most specific applicable method should be invoked first or
 910   last.
 911 \end{description}
 912
 913 Properties for the @|custom| aggregating method combination:
 914 \begin{description}
 915 \item[@|retvar|] An identifier for the return value from the effective
 916   method.  The default is @|sod__ret|.  Only permitted if the message return
 917   type is not @|void|.
 918 \item[@|valvar|] An identifier holding each return value from a direct method
 919   in the effective method.  The default is @|sod__val|.  Only permitted if
 920   the method return type (see @|methty| below) is not @|void|.
 921 \item[@|methty|] A C type, which is the return type for direct methods of
 922   this message.  The default is the return type of the message.
 923 \item[@|decls|] A code fragment containing declarations to be inserted at the
 924   head of the effective method body.  The default is to insert nothing.
 925 \item[@|before|] A code fragment containing initialization to be performed at
 926   the beginning of the effective method body.  The default is to insert
 927   nothing.
 928 \item[@|empty|] A code fragment executed if there are no primary methods;
 929   it should usually store a suitable (identity) value in @<retvar>.  The
 930   default is not to emit an effective method at all if there are no primary
 931   methods.
 932 \item[@|first|] A code fragment to set the return value after calling the
 933   first applicable direct method.  The default is to use the @|each|
 934   fragment.
 935 \item[@|each|] A code fragment to set the return value after calling a direct
 936   method.  If @|first| is also set, then it is used after the first direct
 937   method instead of this.  The default is to insert nothing, which is
 938   probably not what you want.
 939 \item[@|after|] A code fragment inserted at the end of the effective method
 940   body.  The default is to insert nothing.
 941 \item[@|count|] An identifier naming a variable to be declared in the
 942   effective method body, of type @|size_t|, holding the number of applicable
 943   methods.  The default is not to provide such a variable.
 944 \end{description}
 945
 946 \subsubsection{Method items}
 947 \begin{grammar}
 948 <method-item> ::=
 949   @<declaration-specifier>^+
 950   <keyword-declarator>$[\mbox{@<dotted-name>}]$
 951   <method-body>
 952
 953 <method-body> ::= "{" <c-fragment> "}" | "extern" ";"
 954 \end{grammar}
 955 Properties:
 956 \begin{description}
 957 \item[@|method_class|] A symbol naming the Lisp class to use to represent
 958   the direct method.
 959 \item[@|role|] A keyword naming the direct method's rôle.  For the built-in
 960   `simple' message classes, the acceptable rôle names are @|before|,
 961   @|after|, and @|around|.  By default, a primary method is constructed.
 962 \end{description}
 963
 964 %%%----- That's all, folks --------------------------------------------------
 965
 966 %%% Local variables:
 967 %%% mode: LaTeX
 968 %%% TeX-master: "sod.tex"
 969 %%% TeX-PDF-mode: t
 970 %%% End: