chiark - git - mdw - sod/blob - doc/cutting-room-floor.tex

   1 %%% -*-latex-*-
   2 %%%
   3 %%% Conceptual background
   4 %%%
   5 %%% (c) 2015 Straylight/Edgeware
   6 %%%
   7
   8 %%%----- Licensing notice ---------------------------------------------------
   9 %%%
  10 %%% This file is part of the Sensible Object Design, an object system for C.
  11 %%%
  12 %%% SOD is free software; you can redistribute it and/or modify
  13 %%% it under the terms of the GNU General Public License as published by
  14 %%% the Free Software Foundation; either version 2 of the License, or
  15 %%% (at your option) any later version.
  16 %%%
  17 %%% SOD is distributed in the hope that it will be useful,
  18 %%% but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 %%% GNU General Public License for more details.
  21 %%%
  22 %%% You should have received a copy of the GNU General Public License
  23 %%% along with SOD; if not, write to the Free Software Foundation,
  24 %%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  25
  26 \chapter{Cutting-room floor}
  27
  28 %%%--------------------------------------------------------------------------
  29 \section{Generated names}
  30
  31 The generated names for functions and objects related to a class are
  32 constructed systematically so as not to interfere with each other.  The rules
  33 on class, slot and message naming exist so as to ensure that the generated
  34 names don't collide with each other.
  35
  36 The following notation is used in this section.
  37 \begin{description}
  38 \item[@<class>] The full name of the `focus' class: the one for which we are
  39   generating name.
  40 \item[@<super-nick>] The nickname of a superclass.
  41 \item[@<head-nick>] The nickname of the chain-head class of the chain
  42   in question.
  43 \end{description}
  44
  45 \subsection{Instance layout}
  46
  47 %%%--------------------------------------------------------------------------
  48 \section{Class objects}
  49
  50 \begin{listing}
  51 typedef struct SodClass__ichain_obj SodClass;
  52
  53 struct sod_chain {
  54   size_t n_classes;                     /* Number of classes in chain */
  55   const SodClass *const *classes;       /* Vector of classes, head first */
  56   size_t off_ichain;                    /* Offset of ichain from instance base */
  57   const struct sod_vtable *vt;          /* Vtable pointer for chain */
  58   size_t ichainsz;                      /* Size of the ichain structure */
  59 };
  60
  61 struct sod_vtable {
  62   SodClass *_class;                     /* Pointer to instance's class */
  63   size_t _base;                         /* Offset to instance base */
  64 };
  65
  66 struct SodClass__islots {
  67
  68   /* Basic information */
  69   const char *name;                     /* The class's name as a string */
  70   const char *nick;                     /* The nickname as a string */
  71
  72   /* Instance allocation and initialization */
  73   size_t instsz;                        /* Instance layout size in bytes */
  74   void *(*imprint)(void *);             /* Stamp instance with vtable ptrs */
  75   void *(*init)(void *);                /* Initialize instance */
  76
  77   /* Superclass structure */
  78   size_t n_supers;                      /* Number of direct superclasses */
  79   const SodClass *const *supers;        /* Vector of direct superclasses */
  80   size_t n_cpl;                         /* Length of class precedence list */
  81   const SodClass *const *cpl;           /* Vector for class precedence list */
  82
  83   /* Chain structure */
  84   const SodClass *link;                 /* Link to next class in chain */
  85   const SodClass *head;                 /* Pointer to head of chain */
  86   size_t level;                         /* Index of class in its chain */
  87   size_t n_chains;                      /* Number of superclass chains */
  88   const sod_chain *chains;              /* Vector of chain structures */
  89
  90   /* Layout */
  91   size_t off_islots;                    /* Offset of islots from ichain base */
  92   size_t islotsz;                       /* Size of instance slots */
  93 };
  94
  95 struct SodClass__ichain_obj {
  96   const SodClass__vt_obj *_vt;
  97   struct SodClass__islots cls;
  98 };
  99
 100 struct sod_instance {
 101   struct sod_vtable *_vt;
 102 };
 103 \end{listing}
 104
 105 \begin{listing}
 106 void *sod_convert(const SodClass *cls, const void *obj)
 107 {
 108   const struct sod_instance *inst = obj;
 109   const SodClass *real = inst->_vt->_cls;
 110   const struct sod_chain *chain;
 111   size_t i, index;
 112
 113   for (i = 0; i < real->cls.n_chains; i++) {
 114     chain = &real->cls.chains[i];
 115     if (chain->classes[0] == cls->cls.head) {
 116       index = cls->cls.index;
 117       if (index < chain->n_classes && chain->classes[index] == cls)
 118         return ((char *)cls - inst->_vt._base + chain->off_ichain);
 119       else
 120         return (0);
 121     }
 122   }
 123   return (0);
 124 }
 125 \end{listing}
 126
 127 %%%--------------------------------------------------------------------------
 128 \section{Classes}
 129 \label{sec:class}
 130
 131 \subsection{Classes and superclasses} \label{sec:class.defs}
 132
 133 A @<full-class-definition> must list one or more existing classes to be the
 134 \emph{direct superclasses} for the new class being defined.  We make the
 135 following definitions.
 136 \begin{itemize}
 137 \item The \emph{superclasses} of a class consist of the class itself together
 138   with the superclasses of its direct superclasses.
 139 \item The \emph{proper superclasses} of a class are its superclasses other
 140   than itself.
 141 \item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
 142   \emph{subclass} of $C$.
 143 \end{itemize}
 144 The predefined class @|SodObject| has no direct superclasses; it is unique in
 145 this respect.  All classes are subclasses of @|SodObject|.
 146
 147 \subsection{The class precedence list} \label{sec:class.cpl}
 148
 149 Let $C$ be a class.  The superclasses of $C$ form a directed graph, with an
 150 edge from each class to each of its direct superclasses.  This is the
 151 \emph{superclass graph of $C$}.
 152
 153 In order to resolve inheritance of items, we define a \emph{class precedence
 154   list} (or CPL) for each class, which imposes a total order on that class's
 155 superclasses.  The default algorithm for computing the CPL is the \emph{C3}
 156 algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
 157
 158 The default algorithm works as follows.  Let $C$ be the class whose CPL we
 159 are to compute.  Let $X$ and $Y$ be two of $C$'s superclasses.
 160 \begin{itemize}
 161 \item $C$ must appear first in the CPL.
 162 \item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
 163   superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
 164 \item If the above rules don't suffice to order $X$ and $Y$, then whichever
 165   of $X$ and $Y$ has a subclass which appears further left in the list of
 166   $C$'s direct superclasses will appear earlier in the CPL.
 167 \end{itemize}
 168 This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
 169 superclasses of the same direct superclass of $C$ then that direct
 170 superclass's CPL will order $X$ and $Y$.
 171
 172 We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
 173 $X$ is earlier than $Y$ in $C$'s class precedence list.  If $C$ is clear from
 174 context then we omit it, saying simply that $X$ is more specific than $Y$.
 175
 176 \subsection{Instances and metaclasses} \label{sec:class.meta}
 177
 178 A class defines the structure and behaviour of its \emph{instances}: run-time
 179 objects created (possibly) dynamically.  An instance is an instance of only
 180 one class, though structurally it may be used in place of an instance of any
 181 of that class's superclasses.  It is possible, with care, to change the class
 182 of an instance at run-time.
 183
 184 Classes are themselves represented as instances -- called \emph{class
 185   objects} -- in the running program.  Being instances, they have a class,
 186 called the \emph{metaclass}.  The metaclass defines the structure and
 187 behaviour of the class object.
 188
 189 The predefined class @|SodClass| is the default metaclass for new classes.
 190 @|SodClass| has @|SodObject| as its only direct superclass.  @|SodClass| is
 191 its own metaclass.
 192
 193 To make matters more complicated, Sod has \emph{two} distinct metalevels: as
 194 well as the runtime metalevel, as discussed above, there's a compile-time
 195 metalevel hosted in the Sod translator.  Since Sod is written in Common Lisp,
 196 a Sod class's compile-time metaclass is a CLOS class.  The usual compile-time
 197 metaclass is @|sod-class|.  The compile-time metalevel is the subject of
 198 \xref{ch:api}.
 199
 200 \subsection{Items and inheritance} \label{sec:class.inherit}
 201
 202 A class definition also declares \emph{slots}, \emph{messages},
 203 \emph{initializers} and \emph{methods} -- collectively referred to as
 204 \emph{items}.  In addition to the items declared in the class definition --
 205 the class's \emph{direct items} -- a class also \emph{inherits} items from
 206 its superclasses.
 207
 208 The precise rules for item inheritance vary according to the kinds of items
 209 involved.
 210
 211 Some object systems have a notion of `repeated inheritance': if there are
 212 multiple paths in the superclass graph from a class to one of its
 213 superclasses then items defined in that superclass may appear duplicated in
 214 the subclass.  Sod does not have this notion.
 215
 216 \subsubsection{Slots} \label{sec:class.inherit.slots}
 217 A \emph{slot} is a unit of state.  In other object systems, slots may be
 218 called `fields', `member variables', or `instance variables'.
 219
 220 A slot has a \emph{name} and a \emph{type}.  The name serves only to
 221 distinguish the slot from other direct slots defined by the same class.  A
 222 class inherits all of its proper superclasses' slots.  Slots inherited from
 223 superclasses do not conflict with each other or with direct slots, even if
 224 they have the same names.
 225
 226 At run-time, each instance of the class holds a separate value for each slot,
 227 whether direct or inherited.  Changing the value of an instance's slot
 228 doesn't affect other instances.
 229
 230 \subsubsection{Initializers} \label{sec:class.inherit.init}
 231 Mumble.
 232
 233 \subsubsection{Messages} \label{sec:class.inherit.messages}
 234 A \emph{message} is the stimulus for behaviour.  In Sod, a class must define,
 235 statically, the name and format of the messages it is able to receive and the
 236 values it will return in reply.  In this respect, a message is similar to
 237 `abstract member functions' or `interface member functions' in other object
 238 systems.
 239
 240 Like slots, a message has a \emph{name} and a \emph{type}.  Again, the name
 241 serves only to distinguish the message from other direct messages defined by
 242 the same class.  Messages inherited from superclasses do not conflict with
 243 each other or with direct messages, even if they have the same name.
 244
 245 At run-time, one sends a message to an instance by invoking a function
 246 obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
 247
 248 \subsubsection{Methods} \label{sec:class.inherit.methods}
 249 A \emph{method} is a unit of behaviour.  In other object systems, methods may
 250 be called `member functions'.
 251
 252 A method is associated with a message.  When a message is received by an
 253 instance, all of the methods associated with that message on the instance's
 254 class or any of its superclasses are \emph{applicable}.  The details of how
 255 the applicable methods are invoked are described fully in
 256 \xref{sec:fixme-method-combination}.
 257
 258 \subsection{Chains and instance layout} \label{sec:class.layout}
 259
 260 C is a rather low-level language, and in particular it exposes details of the
 261 way data is laid out in memory.  Since an instance of a class~$C$ should be
 262 (at least in principle) usable anywhere an instance of some superclass $B
 263 \succeq C$ is expected, this implies that an instance of the subclass $C$
 264 needs to contain within it a complete instance of each superclass $B$, laid
 265 out according to the rules of instances of $B$, so that if we have (the
 266 address of) an instance of $C$, we can easily construct a pointer to a thing
 267 which looks like an instance of $B$ contained within it.
 268
 269 Specifically, the information we need to retain for an instance of a
 270 class~$C$ is:
 271 \begin{itemize}
 272 \item the values of each of the slots defined by $C$, including those defined
 273   by superclasses;
 274 \item information which will let us convert a pointer to $C$ into a pointer
 275   to any superclass $B \succeq C$;
 276 \item information which will let us call the appropriate effective method for
 277   each message defined by $C$, including those defined by superclasses; and
 278 \item some additional meta-level information, such as how to find the class
 279   object for $C$ given (the address of) one of its instances.
 280 \end{itemize}
 281
 282 Observe that, while each distinct instance must clearly have its own storage
 283 for slots, all instances of $C$ can share a single copy of the remaining
 284 information.  The individual instance only needs to keep a pointer to this
 285 shared table, which, inspired by the similar structure in many \Cplusplus\
 286 ABIs, are called a \emph{vtable}.
 287
 288 The easiest approach would be to decide that instances of $C$ are exactly
 289 like instances of $B$, only with extra space at the end for the extra slots
 290 which $C$ defines over and above those already existing in $B$.  Conversion
 291 is then trivial: a pointer to an instance of $C$ can be converted to a
 292 pointer to an instance of some superclass $B$ simply by casting.  Even though
 293 the root class @|SodObject| doesn't have any slots at all, its instances will
 294 still need a vtable so that you can find its class object: the address of the
 295 vtable therefore needs to be at the very start of the instance structure.
 296 Again, a vtable for a superclass would have a vtable for each of its
 297 superclasses as a prefix, with new items added afterwards.
 298
 299 This appealing approach works well for an object system which only permits
 300 single inheritance of both state and behaviour.  Alas, it breaks down when
 301 multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$,
 302 even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in
 303 general, $B$'s instance structure will not be a prefix of $B'$'s, nor will
 304 $B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$
 305 as a prefix.
 306
 307 A (non-root) class may -- though need not -- have a distinguished \emph{link}
 308 superclass, which need not be a direct superclass.  Furthermore, each
 309 class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of
 310 $C$, there can be at most one other superclass of $C$ whose link superclass
 311 is $A$.\footnote{%
 312   That is, it's permitted for two classes $B$ and $B'$ to have the same link
 313   superclass $A$, but $B$ and $B'$ can't then both be superclasses of the
 314   same class $C$.} %
 315 Therefore, the links partition the superclasses of~$C$ into nice linear
 316 \emph{chains}, such that each superclass is a member of exactly one chain.
 317 If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more
 318 than that of $A$; otherwise $B$ is called a \emph{chain head} and its level
 319 is zero.  If the classes in a chain are written in a list, chain head first,
 320 then the level of each class gives its index in the list.
 321
 322 Chains therefore allow us to recover some of the linearity properties which
 323 made layout simple in the case of single inheritance.  The instance structure
 324 for a class $C$ contains a substructure for each of $C$'s superclass chains;
 325 a pointer to an object of class $C$ actually points to the substructure for
 326 the chain containing $C$.  The order of these substructures is unimportant
 327 for now.\footnote{%
 328   The chains appear in the order in which their most specific classes appear
 329   in $C$'s class precedence list.  This guarantees that the chain containing
 330   $C$ itself appears first, so that a pointer to $C$'s instance structure is
 331   actually a pointer to $C$'s chain substructure.  Apart from that, it's a
 332   simple, stable, but basically arbitrary choice which can't be changed
 333   without breaking the ABI.} %
 334 The substructure for each chain begins with a pointer to a vtable, followed
 335 by a structure for each superclass in the chain containing the slots defined
 336 by that superclass, with the chain head (least specific class) first.
 337
 338 Suppose we have a pointer to (static) type $C$, and want to convert it into a
 339 pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{%
 340   In the more general case, we have a pointer to static type $C$, which
 341   actually points to an object of some subclass $D$ of $C$, and want to
 342   convert it into a pointer to type $B$.  Such a conversion is called a
 343   \emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast}
 344   otherwise.  Downcasts and cross-casts require complicated run-time
 345   checking, and can will fail unless $B$ is a superclass of $D$.} %
 346 If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the
 347 pointer value is already correct and it's only necessary to cast it
 348 appropriately.  Otherwise -- a \emph{cross-chain upcast} -- the pointer needs
 349 to be adjusted to point to a different chain substructure.  Since the lengths
 350 and relative positions of the chain substructures vary between classes, the
 351 adjustments are stored in the vtable.  Cross-chain upcasts are therefore a
 352 bit slower than in-chain upcasts.
 353
 354 Each chain has its own separate vtable, because much of the metadata stored
 355 in the vtable is specific to a particular chain.  For example:
 356 \begin{itemize}
 357 \item offsets to other chains' substructures will vary depending on which
 358   chain we start from; and
 359 \item entry points to methods
 360 \end{itemize}
 361 %%%--------------------------------------------------------------------------
 362 \section{Superclass linearization}
 363
 364 Before making any decisions about relationships between superclasses, Sod
 365 \emph{linearizes} them, i.e., imposes a total order consistent with the
 366 direct-subclass/superclass partial order.
 367
 368 In the vague hope that we don't be completely bogged down in formalism by the
 369 end of this, let's introduce some notation.  We'll fix some class $z$ and
 370 consider its set of superclasses $S(z) = \{ a, b, \dots \}$.  We can define a
 371 relation $c \prec_1 d$ if $c$ is a direct subclass of $d$, and extend it by
 372 taking the reflexive, transitive closure: $c \preceq d$ if and only if
 373 \begin{itemize}
 374 \item $c = d$, or
 375 \item there exists some class $x$ such that $c \prec_1 x$ and $x \preceq d$.
 376 \end{itemize}
 377 This is the `is-subclass-of' relation we've been using so far.\footnote{%
 378   In some object systems, notably Flavors, this relation is allowed to fail
 379   to be a partial order because of cycles in the class graph.  I haven't
 380   given a great deal of thought to how well Sod would cope with a cyclic
 381   class graph.} %
 382 We write $d \succeq c$ and say that $d$ is a superclass of $c$ if and only if
 383 $c \preceq d$.
 384
 385 The problem comes when we try to resolve inheritance questions.  A class
 386 should inherit behaviour from its superclasses; but, in a world of multiple
 387 inheritance, which one do we choose?  We get a simple version of this problem
 388 when we try to resolve inheritance of slot initializers: only one initializer
 389 can be inherited.
 390
 391 We start by collecting into a set~$I$ the classes which define an initializer
 392 for the slot.  If $I$ contains both a class $x$ and one of $x$'s superclasses
 393 then we should prefer $x$ and consider the superclass to be overridden.  So
 394 we should confine our attention to \emph{least} classes: a member $x$ of a
 395 set $I$ is least, with respect to a particular partial order, if $y \preceq
 396 x$ only when $x = y$.  If there is a single least class in our set the we
 397 have a winner.  Otherwise we want some way to choose among them.
 398
 399 This is not uncontroversial.  Languages such as \Cplusplus\ refuse to choose
 400 among least classes; instead, any program in which such a choice must be made
 401 is simply declared erroneous.
 402
 403 Simply throwing up our hands in horror at this situation is satisfactory when
 404 we only wanted to pick one `winner', as we do for slot initializers.
 405 However, method combination is a much more complicated business.  We don't
 406 want to pick just one winner: we want to order all of the applicable methods
 407 in some way.  Insisting that there is a clear winner at every step along the
 408 chain is too much of an imposition.  Instead, we \emph{linearize} the
 409 classes.
 410
 411 %%%--------------------------------------------------------------------------
 412 \section{Invariance, covariance, contravariance}
 413
 414 In Sod, at least with regard to the existing method combinations, method
 415 types are \emph{invariant}.  This is not an accident, and it's not due to
 416 ignorance.
 417
 418 The \emph{signature} of a function, method or message describes its argument
 419 and return-value types.  If a method's arguments are an integer and a string,
 420 and it returns a character, we might write its signature as
 421 \[ (@|int|, @|string|) \to @|char| \]
 422 In Sod, a method's arguments have to match its message's arguments precisely,
 423 and the return type must either be @|void| -- for a dæmon method -- or again
 424 match the message's return type.  This is argument and return-type
 425 \emph{invariance}.
 426
 427 Some object systems allow methods with subtly different signatures to be
 428 defined on a single message.  In particular, since the idea is that instances
 429 of a subclass ought to be broadly compatible~(see \xref{sec:phil.lsp}) with
 430 existing code which expects instances of a superclass, we might be able to
 431 get away with bending method signatures one way or another to permit this.
 432
 433 \Cplusplus\ permits \emph{return-type covariance}, where a method's return
 434 type can be a subclass of the return type specified by a less-specific
 435 method.  Eiffel allows \emph{argument covariance}, where a method's arguments
 436 can be subclasses of the arguments specified by a less-specific
 437 method.\footnote{%
 438   Attentive readers will note that I ought to be talking about pointers to
 439   instances throughout.  I'm trying to limit the weight of the notation.
 440   Besides, I prefer data models as found in Lisp and Python where all values
 441   are held by reference.} %
 442
 443 Eiffel's argument covariance is unsafe.\footnote{%
 444   Argument covariance is correct if you're doing runtime dispatch based on
 445   argument types.  Eiffel isn't: it's single dispatch, like Sod is.} %
 446 Suppose that we have two pairs of classes, $a \prec_1 b$ and $c \prec_1 d$.
 447 Class $b$ defines a message $m$ with signature $d \to @|int|$; class $a$
 448 defines a method with signature $c \to @|int|$.  This means that it's wrong
 449 to send $m$ to an instance $a$ carrying an argument of type $d$.  But of
 450 course, we can treat an instance of $a$ as if it's an instance of $b$,
 451 whereupon it appears that we are permitted to pass a~$c$ in our message.  The
 452 result is a well-known hole in the type system.  Oops.
 453
 454 \Cplusplus's return-type covariance is fine.  Also fine is argument
 455 \emph{contravariance}.  If $b$ defined its message to have signature $c \to
 456 @|int|$, and $a$ were to broaden its method to $d \to @|int|$, there'd be no
 457 problem.  All $c$s are $d$s, so viewing an $a$ as a $b$ does no harm.
 458
 459 All of this fiddling with types is fine as long as method inheritance or
 460 overriding is an all-or-nothing thing.  But Sod has method combinations,
 461 where applicable methods are taken from the instance's class and all its
 462 superclasses and combined.  And this makes everything very messy.
 463
 464 It's possible to sort all of the mess out in the generated effective method
 465 -- we'd just have to convert the arguments to the types that were expected by
 466 the direct methods.  This would require expensive run-time conversions of all
 467 of the non-invariant arguments and return values.  And we'd need some
 468 complicated rule so that we could choose sensible types for the method
 469 entries in our vtables.  Something like this:
 470 \begin{quote} \itshape
 471   For each named argument of a message, there must be a unique greatest type
 472   among the types given for that argument by the applicable methods; and
 473   there must be a unique least type among all of the return types of the
 474   applicable methods.
 475 \end{quote}
 476 I have visions of people wanting to write special no-effect methods whose
 477 only purpose is to guide the translator around the class graph properly.
 478 Let's not.
 479
 480 %% things to talk about:
 481 %% Liskov substitution principle and why it's mad
 482
 483 %%%----- That's all, folks --------------------------------------------------
 484
 485 %%% Local variables:
 486 %%% mode: LaTeX
 487 %%% TeX-master: "sod.tex"
 488 %%% TeX-PDF-mode: t
 489 %%% End: