chiark - git - mdw - sod/blob - doc/cutting-room-floor.tex

   1 %%% -*-latex-*-
   2 %%%
   3 %%% Conceptual background
   4 %%%
   5 %%% (c) 2015 Straylight/Edgeware
   6 %%%
   7
   8 %%%----- Licensing notice ---------------------------------------------------
   9 %%%
  10 %%% This file is part of the Sensible Object Design, an object system for C.
  11 %%%
  12 %%% SOD is free software; you can redistribute it and/or modify
  13 %%% it under the terms of the GNU General Public License as published by
  14 %%% the Free Software Foundation; either version 2 of the License, or
  15 %%% (at your option) any later version.
  16 %%%
  17 %%% SOD is distributed in the hope that it will be useful,
  18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 %%% GNU General Public License for more details.
  21 %%%
  22 %%% You should have received a copy of the GNU General Public License
  23 %%% along with SOD; if not, write to the Free Software Foundation,
  24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 \chapter{Cutting-room floor}
  27
  28 %%%--------------------------------------------------------------------------
  29 \section{Generated names}
  30
  31 The generated names for functions and objects related to a class are
  32 constructed systematically so as not to interfere with each other.  The rules
  33 on class, slot and message naming exist so as to ensure that the generated
  34 names don't collide with each other.
  35
  36 The following notation is used in this section.
  37 \begin{description}
  38 \item[@<class>] The full name of the `focus' class: the one for which we are
  39   generating name.
  40 \item[@<super-nick>] The nickname of a superclass.
  41 \item[@<head-nick>] The nickname of the chain-head class of the chain
  42   in question.
  43 \end{description}
  44
  45 \subsection{Instance layout}
  46
  47 %%%--------------------------------------------------------------------------
  48 \section{Class objects}
  49
  50 \begin{listing}
  51 typedef struct SodClass__ichain_obj SodClass;
  52
  53 struct sod_chain {
  54   size_t n_classes;                     /* Number of classes in chain */
  55   const SodClass *const *classes;       /* Vector of classes, head first */
  56   size_t off_ichain;                    /* Offset of ichain from instance base */
  57   const struct sod_vtable *vt;          /* Vtable pointer for chain */
  58   size_t ichainsz;                      /* Size of the ichain structure */
  59 };
  60
  61 struct sod_vtable {
  62   SodClass *_class;                     /* Pointer to instance's class */
  63   size_t _base;                         /* Offset to instance base */
  64 };
  65
  66 struct SodClass__islots {
  67
  68   /* Basic information */
  69   const char *name;                     /* The class's name as a string */
  70   const char *nick;                     /* The nickname as a string */
  71
  72   /* Instance allocation and initialization */
  73   size_t instsz;                        /* Instance layout size in bytes */
  74   void *(*imprint)(void *);             /* Stamp instance with vtable ptrs */
  75   void *(*init)(void *);                /* Initialize instance */
  76
  77   /* Superclass structure */
  78   size_t n_supers;                      /* Number of direct superclasses */
  79   const SodClass *const *supers;        /* Vector of direct superclasses */
  80   size_t n_cpl;                         /* Length of class precedence list */
  81   const SodClass *const *cpl;           /* Vector for class precedence list */
  82
  83   /* Chain structure */
  84   const SodClass *link;                 /* Link to next class in chain */
  85   const SodClass *head;                 /* Pointer to head of chain */
  86   size_t level;                         /* Index of class in its chain */
  87   size_t n_chains;                      /* Number of superclass chains */
  88   const sod_chain *chains;              /* Vector of chain structures */
  89
  90   /* Layout */
  91   size_t off_islots;                    /* Offset of islots from ichain base */
  92   size_t islotsz;                       /* Size of instance slots */
  93 };
  94
  95 struct SodClass__ichain_obj {
  96   const SodClass__vt_obj *_vt;
  97   struct SodClass__islots cls;
  98 };
  99
 100 struct sod_instance {
 101   struct sod_vtable *_vt;
 102 };
 103 \end{listing}
 104
 105 \begin{listing}
 106 void *sod_convert(const SodClass *cls, const void *obj)
 107 {
 108   const struct sod_instance *inst = obj;
 109   const SodClass *real = inst->_vt->_cls;
 110   const struct sod_chain *chain;
 111   size_t i, index;
 112
 113   for (i = 0; i < real->cls.n_chains; i++) {
 114     chain = &real->cls.chains[i];
 115     if (chain->classes[0] == cls->cls.head) {
 116       index = cls->cls.index;
 117       if (index < chain->n_classes && chain->classes[index] == cls)
 118         return ((char *)cls - inst->_vt._base + chain->off_ichain);
 119       else
 120         return (0);
 121     }
 122   }
 123   return (0);
 124 }
 125 \end{listing}
 126
 127 %%%--------------------------------------------------------------------------
 128 \section{Classes}
 129 \label{sec:class}
 130
 131 \subsection{Classes and superclasses} \label{sec:class.defs}
 132
 133 A @<full-class-definition> must list one or more existing classes to be the
 134 \emph{direct superclasses} for the new class being defined.  We make the
 135 following definitions.
 136 \begin{itemize}
 137 \item The \emph{superclasses} of a class consist of the class itself together
 138   with the superclasses of its direct superclasses.
 139 \item The \emph{proper superclasses} of a class are its superclasses other
 140   than itself.
 141 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
 142   \emph{subclass} of $C$.
 143 \end{itemize}
 144 The predefined class @|SodObject| has no direct superclasses; it is unique in
 145 this respect.  All classes are subclasses of @|SodObject|.
 146
 147 \subsection{The class precedence list} \label{sec:class.cpl}
 148
 149 Let $C$ be a class.  The superclasses of $C$ form a directed graph, with an
 150 edge from each class to each of its direct superclasses.  This is the
 151 \emph{superclass graph of $C$}.
 152
 153 In order to resolve inheritance of items, we define a \emph{class precedence
 154   list} (or CPL) for each class, which imposes a total order on that class's
 155 superclasses.  The default algorithm for computing the CPL is the \emph{C3}
 156 algorithm \cite{barrett-1996:monot-super-linear-dylan}, though extensions may
 157 implement other algorithms.
 158
 159 The default algorithm works as follows.  Let $C$ be the class whose CPL we
 160 are to compute.  Let $X$ and $Y$ be two of $C$'s superclasses.
 161 \begin{itemize}
 162 \item $C$ must appear first in the CPL.
 163 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
 164   superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
 165 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
 166   of $X$ and $Y$ has a subclass which appears further left in the list of
 167   $C$'s direct superclasses will appear earlier in the CPL.
 168 \end{itemize}
 169 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
 170 superclasses of the same direct superclass of $C$ then that direct
 171 superclass's CPL will order $X$ and $Y$.
 172
 173 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
 174 $X$ is earlier than $Y$ in $C$'s class precedence list.  If $C$ is clear from
 175 context then we omit it, saying simply that $X$ is more specific than $Y$.
 176
 177 \subsection{Instances and metaclasses} \label{sec:class.meta}
 178
 179 A class defines the structure and behaviour of its \emph{instances}: run-time
 180 objects created (possibly) dynamically.  An instance is an instance of only
 181 one class, though structurally it may be used in place of an instance of any
 182 of that class's superclasses.  It is possible, with care, to change the class
 183 of an instance at run-time.
 184
 185 Classes are themselves represented as instances -- called \emph{class
 186   objects} -- in the running program.  Being instances, they have a class,
 187 called the \emph{metaclass}.  The metaclass defines the structure and
 188 behaviour of the class object.
 189
 190 The predefined class @|SodClass| is the default metaclass for new classes.
 191 @|SodClass| has @|SodObject| as its only direct superclass.  @|SodClass| is
 192 its own metaclass.
 193
 194 To make matters more complicated, Sod has \emph{two} distinct metalevels: as
 195 well as the runtime metalevel, as discussed above, there's a compile-time
 196 metalevel hosted in the Sod translator.  Since Sod is written in Common Lisp,
 197 a Sod class's compile-time metaclass is a CLOS class.  The usual compile-time
 198 metaclass is @|sod-class|.  The compile-time metalevel is the subject of
 199 \xref{ch:api}.
 200
 201 \subsection{Items and inheritance} \label{sec:class.inherit}
 202
 203 A class definition also declares \emph{slots}, \emph{messages},
 204 \emph{initializers} and \emph{methods} -- collectively referred to as
 205 \emph{items}.  In addition to the items declared in the class definition --
 206 the class's \emph{direct items} -- a class also \emph{inherits} items from
 207 its superclasses.
 208
 209 The precise rules for item inheritance vary according to the kinds of items
 210 involved.
 211
 212 Some object systems have a notion of `repeated inheritance': if there are
 213 multiple paths in the superclass graph from a class to one of its
 214 superclasses then items defined in that superclass may appear duplicated in
 215 the subclass.  Sod does not have this notion.
 216
 217 \subsubsection{Slots} \label{sec:class.inherit.slots}
 218 A \emph{slot} is a unit of state.  In other object systems, slots may be
 219 called `fields', `member variables', or `instance variables'.
 220
 221 A slot has a \emph{name} and a \emph{type}.  The name serves only to
 222 distinguish the slot from other direct slots defined by the same class.  A
 223 class inherits all of its proper superclasses' slots.  Slots inherited from
 224 superclasses do not conflict with each other or with direct slots, even if
 225 they have the same names.
 226
 227 At run-time, each instance of the class holds a separate value for each slot,
 228 whether direct or inherited.  Changing the value of an instance's slot
 229 doesn't affect other instances.
 230
 231 \subsubsection{Initializers} \label{sec:class.inherit.init}
 232 Mumble.
 233
 234 \subsubsection{Messages} \label{sec:class.inherit.messages}
 235 A \emph{message} is the stimulus for behaviour.  In Sod, a class must define,
 236 statically, the name and format of the messages it is able to receive and the
 237 values it will return in reply.  In this respect, a message is similar to
 238 `abstract member functions' or `interface member functions' in other object
 239 systems.
 240
 241 Like slots, a message has a \emph{name} and a \emph{type}.  Again, the name
 242 serves only to distinguish the message from other direct messages defined by
 243 the same class.  Messages inherited from superclasses do not conflict with
 244 each other or with direct messages, even if they have the same name.
 245
 246 At run-time, one sends a message to an instance by invoking a function
 247 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
 248
 249 \subsubsection{Methods} \label{sec:class.inherit.methods}
 250 A \emph{method} is a unit of behaviour.  In other object systems, methods may
 251 be called `member functions'.
 252
 253 A method is associated with a message.  When a message is received by an
 254 instance, all of the methods associated with that message on the instance's
 255 class or any of its superclasses are \emph{applicable}.  The details of how
 256 the applicable methods are invoked are described fully in
 257 \xref{sec:fixme-method-combination}.
 258
 259 \subsection{Chains and instance layout} \label{sec:class.layout}
 260
 261 C is a rather low-level language, and in particular it exposes details of the
 262 way data is laid out in memory.  Since an instance of a class~$C$ should be
 263 (at least in principle) usable anywhere an instance of some superclass $B
 264 \succeq C$ is expected, this implies that an instance of the subclass $C$
 265 needs to contain within it a complete instance of each superclass $B$, laid
 266 out according to the rules of instances of $B$, so that if we have (the
 267 address of) an instance of $C$, we can easily construct a pointer to a thing
 268 which looks like an instance of $B$ contained within it.
 269
 270 Specifically, the information we need to retain for an instance of a
 271 class~$C$ is:
 272 \begin{itemize}
 273 \item the values of each of the slots defined by $C$, including those defined
 274   by superclasses;
 275 \item information which will let us convert a pointer to $C$ into a pointer
 276   to any superclass $B \succeq C$;
 277 \item information which will let us call the appropriate effective method for
 278   each message defined by $C$, including those defined by superclasses; and
 279 \item some additional meta-level information, such as how to find the class
 280   object for $C$ given (the address of) one of its instances.
 281 \end{itemize}
 282
 283 Observe that, while each distinct instance must clearly have its own storage
 284 for slots, all instances of $C$ can share a single copy of the remaining
 285 information.  The individual instance only needs to keep a pointer to this
 286 shared table, which, inspired by the similar structure in many \Cplusplus\
 287 ABIs, are called a \emph{vtable}.
 288
 289 The easiest approach would be to decide that instances of $C$ are exactly
 290 like instances of $B$, only with extra space at the end for the extra slots
 291 which $C$ defines over and above those already existing in $B$.  Conversion
 292 is then trivial: a pointer to an instance of $C$ can be converted to a
 293 pointer to an instance of some superclass $B$ simply by casting.  Even though
 294 the root class @|SodObject| doesn't have any slots at all, its instances will
 295 still need a vtable so that you can find its class object: the address of the
 296 vtable therefore needs to be at the very start of the instance structure.
 297 Again, a vtable for a superclass would have a vtable for each of its
 298 superclasses as a prefix, with new items added afterwards.
 299
 300 This appealing approach works well for an object system which only permits
 301 single inheritance of both state and behaviour.  Alas, it breaks down when
 302 multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$,
 303 even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in
 304 general, $B$'s instance structure will not be a prefix of $B'$'s, nor will
 305 $B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$
 306 as a prefix.
 307
 308 A (non-root) class may -- though need not -- have a distinguished \emph{link}
 309 superclass, which need not be a direct superclass.  Furthermore, each
 310 class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of
 311 $C$, there can be at most one other superclass of $C$ whose link superclass
 312 is $A$.\footnote{%
 313   That is, it's permitted for two classes $B$ and $B'$ to have the same link
 314   superclass $A$, but $B$ and $B'$ can't then both be superclasses of the
 315   same class $C$.} %
 316 Therefore, the links partition the superclasses of~$C$ into nice linear
 317 \emph{chains}, such that each superclass is a member of exactly one chain.
 318 If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more
 319 than that of $A$; otherwise $B$ is called a \emph{chain head} and its level
 320 is zero.  If the classes in a chain are written in a list, chain head first,
 321 then the level of each class gives its index in the list.
 322
 323 Chains therefore allow us to recover some of the linearity properties which
 324 made layout simple in the case of single inheritance.  The instance structure
 325 for a class $C$ contains a substructure for each of $C$'s superclass chains;
 326 a pointer to an object of class $C$ actually points to the substructure for
 327 the chain containing $C$.  The order of these substructures is unimportant
 328 for now.\footnote{%
 329   The chains appear in the order in which their most specific classes appear
 330   in $C$'s class precedence list.  This guarantees that the chain containing
 331   $C$ itself appears first, so that a pointer to $C$'s instance structure is
 332   actually a pointer to $C$'s chain substructure.  Apart from that, it's a
 333   simple, stable, but basically arbitrary choice which can't be changed
 334   without breaking the ABI.} %
 335 The substructure for each chain begins with a pointer to a vtable, followed
 336 by a structure for each superclass in the chain containing the slots defined
 337 by that superclass, with the chain head (least specific class) first.
 338
 339 Suppose we have a pointer to (static) type $C$, and want to convert it into a
 340 pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{%
 341   In the more general case, we have a pointer to static type $C$, which
 342   actually points to an object of some subclass $D$ of $C$, and want to
 343   convert it into a pointer to type $B$.  Such a conversion is called a
 344   \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast}
 345   otherwise.  Downcasts and cross-casts require complicated run-time
 346   checking, and can will fail unless $B$ is a superclass of $D$.} %
 347 If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the
 348 pointer value is already correct and it's only necessary to cast it
 349 appropriately.  Otherwise -- a \emph{cross-chain upcast} -- the pointer needs
 350 to be adjusted to point to a different chain substructure.  Since the lengths
 351 and relative positions of the chain substructures vary between classes, the
 352 adjustments are stored in the vtable.  Cross-chain upcasts are therefore a
 353 bit slower than in-chain upcasts.
 354
 355 Each chain has its own separate vtable, because much of the metadata stored
 356 in the vtable is specific to a particular chain.  For example:
 357 \begin{itemize}
 358 \item offsets to other chains' substructures will vary depending on which
 359   chain we start from; and
 360 \item entry points to methods
 361 \end{itemize}
 362 %%%--------------------------------------------------------------------------
 363 \section{Superclass linearization}
 364
 365 Before making any decisions about relationships between superclasses, Sod
 366 \emph{linearizes} them, i.e., imposes a total order consistent with the
 367 direct-subclass/superclass partial order.
 368
 369 In the vague hope that we don't be completely bogged down in formalism by the
 370 end of this, let's introduce some notation.  We'll fix some class $z$ and
 371 consider its set of superclasses $S(z) = \{ a, b, \dots \}$.  We can define a
 372 relation $c \prec_1 d$ if $c$ is a direct subclass of $d$, and extend it by
 373 taking the reflexive, transitive closure: $c \preceq d$ if and only if
 374 \begin{itemize}
 375 \item $c = d$, or
 376 \item there exists some class $x$ such that $c \prec_1 x$ and $x \preceq d$.
 377 \end{itemize}
 378 This is the `is-subclass-of' relation we've been using so far.\footnote{%
 379   In some object systems, notably Flavors, this relation is allowed to fail
 380   to be a partial order because of cycles in the class graph.  I haven't
 381   given a great deal of thought to how well Sod would cope with a cyclic
 382   class graph.} %
 383 We write $d \succeq c$ and say that $d$ is a superclass of $c$ if and only if
 384 $c \preceq d$.
 385
 386 The problem comes when we try to resolve inheritance questions.  A class
 387 should inherit behaviour from its superclasses; but, in a world of multiple
 388 inheritance, which one do we choose?  We get a simple version of this problem
 389 when we try to resolve inheritance of slot initializers: only one initializer
 390 can be inherited.
 391
 392 We start by collecting into a set~$I$ the classes which define an initializer
 393 for the slot.  If $I$ contains both a class $x$ and one of $x$'s superclasses
 394 then we should prefer $x$ and consider the superclass to be overridden.  So
 395 we should confine our attention to \emph{least} classes: a member $x$ of a
 396 set $I$ is least, with respect to a particular partial order, if $y \preceq
 397 x$ only when $x = y$.  If there is a single least class in our set the we
 398 have a winner.  Otherwise we want some way to choose among them.
 399
 400 This is not uncontroversial.  Languages such as \Cplusplus\ refuse to choose
 401 among least classes; instead, any program in which such a choice must be made
 402 is simply declared erroneous.
 403
 404 Simply throwing up our hands in horror at this situation is satisfactory when
 405 we only wanted to pick one `winner', as we do for slot initializers.
 406 However, method combination is a much more complicated business.  We don't
 407 want to pick just one winner: we want to order all of the applicable methods
 408 in some way.  Insisting that there is a clear winner at every step along the
 409 chain is too much of an imposition.  Instead, we \emph{linearize} the
 410 classes.
 411
 412 %%%--------------------------------------------------------------------------
 413 \section{Invariance, covariance, contravariance}
 414
 415 In Sod, at least with regard to the existing method combinations, method
 416 types are \emph{invariant}.  This is not an accident, and it's not due to
 417 ignorance.
 418
 419 The \emph{signature} of a function, method or message describes its argument
 420 and return-value types.  If a method's arguments are an integer and a string,
 421 and it returns a character, we might write its signature as
 422 \[ (@|int|, @|string|) \to @|char| \]
 423 In Sod, a method's arguments have to match its message's arguments precisely,
 424 and the return type must either be @|void| -- for a dæmon method -- or again
 425 match the message's return type.  This is argument and return-type
 426 \emph{invariance}.
 427
 428 Some object systems allow methods with subtly different signatures to be
 429 defined on a single message.  In particular, since the idea is that instances
 430 of a subclass ought to be broadly compatible~(see \xref{sec:phil.lsp}) with
 431 existing code which expects instances of a superclass, we might be able to
 432 get away with bending method signatures one way or another to permit this.
 433
 434 \Cplusplus\ permits \emph{return-type covariance}, where a method's return
 435 type can be a subclass of the return type specified by a less-specific
 436 method.  Eiffel allows \emph{argument covariance}, where a method's arguments
 437 can be subclasses of the arguments specified by a less-specific
 438 method.\footnote{%
 439   Attentive readers will note that I ought to be talking about pointers to
 440   instances throughout.  I'm trying to limit the weight of the notation.
 441   Besides, I prefer data models as found in Lisp and Python where all values
 442   are held by reference.} %
 443
 444 Eiffel's argument covariance is unsafe.\footnote{%
 445   Argument covariance is correct if you're doing runtime dispatch based on
 446   argument types.  Eiffel isn't: it's single dispatch, like Sod is.} %
 447 Suppose that we have two pairs of classes, $a \prec_1 b$ and $c \prec_1 d$.
 448 Class $b$ defines a message $m$ with signature $d \to @|int|$; class $a$
 449 defines a method with signature $c \to @|int|$.  This means that it's wrong
 450 to send $m$ to an instance $a$ carrying an argument of type $d$.  But of
 451 course, we can treat an instance of $a$ as if it's an instance of $b$,
 452 whereupon it appears that we are permitted to pass a~$c$ in our message.  The
 453 result is a well-known hole in the type system.  Oops.
 454
 455 \Cplusplus's return-type covariance is fine.  Also fine is argument
 456 \emph{contravariance}.  If $b$ defined its message to have signature $c \to
 457 @|int|$, and $a$ were to broaden its method to $d \to @|int|$, there'd be no
 458 problem.  All $c$s are $d$s, so viewing an $a$ as a $b$ does no harm.
 459
 460 All of this fiddling with types is fine as long as method inheritance or
 461 overriding is an all-or-nothing thing.  But Sod has method combinations,
 462 where applicable methods are taken from the instance's class and all its
 463 superclasses and combined.  And this makes everything very messy.
 464
 465 It's possible to sort all of the mess out in the generated effective method
 466 -- we'd just have to convert the arguments to the types that were expected by
 467 the direct methods.  This would require expensive run-time conversions of all
 468 of the non-invariant arguments and return values.  And we'd need some
 469 complicated rule so that we could choose sensible types for the method
 470 entries in our vtables.  Something like this:
 471 \begin{quote} \itshape
 472   For each named argument of a message, there must be a unique greatest type
 473   among the types given for that argument by the applicable methods; and
 474   there must be a unique least type among all of the return types of the
 475   applicable methods.
 476 \end{quote}
 477 I have visions of people wanting to write special no-effect methods whose
 478 only purpose is to guide the translator around the class graph properly.
 479 Let's not.
 480
 481 %% things to talk about:
 482 %% Liskov substitution principle and why it's mad
 483
 484 %%%----- That's all, folks --------------------------------------------------
 485
 486 %%% Local variables:
 487 %%% mode: LaTeX
 488 %%% TeX-master: "sod.tex"
 489 %%% TeX-PDF-mode: t
 490 %%% End: