chiark - git - mdw - sod/blame_incremental - doc/cutting-room-floor.tex

... / ...

Commit	Line	Data
	1	%%% --latex--
	2	%%%
	3	%%% Conceptual background
	4	%%%
	5	%%% (c) 2015 Straylight/Edgeware
	6	%%%
	7
	8	%%%----- Licensing notice ---------------------------------------------------
	9	%%%
	10	%%% This file is part of the Sensible Object Design, an object system for C.
	11	%%%
	12	%%% SOD is free software; you can redistribute it and/or modify
	13	%%% it under the terms of the GNU General Public License as published by
	14	%%% the Free Software Foundation; either version 2 of the License, or
	15	%%% (at your option) any later version.
	16	%%%
	17	%%% SOD is distributed in the hope that it will be useful,
	18	%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	%%% GNU General Public License for more details.
	21	%%%
	22	%%% You should have received a copy of the GNU General Public License
	23	%%% along with SOD; if not, write to the Free Software Foundation,
	24	%%% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	25
	26	\chapter{Cutting-room floor}
	27
	28	%%%--------------------------------------------------------------------------
	29	\section{Generated names}
	30
	31	The generated names for functions and objects related to a class are
	32	constructed systematically so as not to interfere with each other. The rules
	33	on class, slot and message naming exist so as to ensure that the generated
	34	names don't collide with each other.
	35
	36	The following notation is used in this section.
	37	\begin{description}
	38	\item[@<class>] The full name of the `focus' class: the one for which we are
	39	generating name.
	40	\item[@<super-nick>] The nickname of a superclass.
	41	\item[@<head-nick>] The nickname of the chain-head class of the chain
	42	in question.
	43	\end{description}
	44
	45	\subsection{Instance layout}
	46
	47	%%%--------------------------------------------------------------------------
	48	\section{Class objects}
	49
	50	\begin{listing}
	51	typedef struct SodClass__ichain_obj SodClass;
	52
	53	struct sod_chain {
	54	size_t n_classes; /* Number of classes in chain */
	55	const SodClass const classes; /* Vector of classes, head first */
	56	size_t off_ichain; /* Offset of ichain from instance base */
	57	const struct sod_vtable vt; / Vtable pointer for chain */
	58	size_t ichainsz; /* Size of the ichain structure */
	59	};
	60
	61	struct sod_vtable {
	62	SodClass _class; / Pointer to instance's class */
	63	size_t _base; /* Offset to instance base */
	64	};
	65
	66	struct SodClass__islots {
	67
	68	/* Basic information */
	69	const char name; / The class's name as a string */
	70	const char nick; / The nickname as a string */
	71
	72	/* Instance allocation and initialization */
	73	size_t instsz; /* Instance layout size in bytes */
	74	void (imprint)(void ); / Stamp instance with vtable ptrs */
	75	void (init)(void ); / Initialize instance */
	76
	77	/* Superclass structure */
	78	size_t n_supers; /* Number of direct superclasses */
	79	const SodClass const supers; /* Vector of direct superclasses */
	80	size_t n_cpl; /* Length of class precedence list */
	81	const SodClass const cpl; /* Vector for class precedence list */
	82
	83	/* Chain structure */
	84	const SodClass link; / Link to next class in chain */
	85	const SodClass head; / Pointer to head of chain */
	86	size_t level; /* Index of class in its chain */
	87	size_t n_chains; /* Number of superclass chains */
	88	const sod_chain chains; / Vector of chain structures */
	89
	90	/* Layout */
	91	size_t off_islots; /* Offset of islots from ichain base */
	92	size_t islotsz; /* Size of instance slots */
	93	};
	94
	95	struct SodClass__ichain_obj {
	96	const SodClass__vt_obj *_vt;
	97	struct SodClass__islots cls;
	98	};
	99
	100	struct sod_instance {
	101	struct sod_vtable *_vt;
	102	};
	103	\end{listing}
	104
	105	\begin{listing}
	106	void sod_convert(const SodClass cls, const void *obj)
	107	{
	108	const struct sod_instance *inst = obj;
	109	const SodClass *real = inst->_vt->_cls;
	110	const struct sod_chain *chain;
	111	size_t i, index;
	112
	113	for (i = 0; i < real->cls.n_chains; i++) {
	114	chain = &real->cls.chains[i];
	115	if (chain->classes[0] == cls->cls.head) {
	116	index = cls->cls.index;
	117	if (index < chain->n_classes && chain->classes[index] == cls)
	118	return ((char *)cls - inst->_vt._base + chain->off_ichain);
	119	else
	120	return (0);
	121	}
	122	}
	123	return (0);
	124	}
	125	\end{listing}
	126
	127	%%%--------------------------------------------------------------------------
	128	\section{Classes}
	129	\label{sec:class}
	130
	131	\subsection{Classes and superclasses} \label{sec:class.defs}
	132
	133	A @<full-class-definition> must list one or more existing classes to be the
	134	\emph{direct superclasses} for the new class being defined. We make the
	135	following definitions.
	136	\begin{itemize}
	137	\item The \emph{superclasses} of a class consist of the class itself together
	138	with the superclasses of its direct superclasses.
	139	\item The \emph{proper superclasses} of a class are its superclasses other
	140	than itself.
	141	\item If $C$ is a (proper) superclass of $D$ then $D$ is a (\emph{proper})
	142	\emph{subclass} of $C$.
	143	\end{itemize}
	144	The predefined class @\|SodObject\| has no direct superclasses; it is unique in
	145	this respect. All classes are subclasses of @\|SodObject\|.
	146
	147	\subsection{The class precedence list} \label{sec:class.cpl}
	148
	149	Let $C$ be a class. The superclasses of $C$ form a directed graph, with an
	150	edge from each class to each of its direct superclasses. This is the
	151	\emph{superclass graph of $C$}.
	152
	153	In order to resolve inheritance of items, we define a \emph{class precedence
	154	list} (or CPL) for each class, which imposes a total order on that class's
	155	superclasses. The default algorithm for computing the CPL is the \emph{C3}
	156	algorithm \cite{fixme-c3}, though extensions may implement other algorithms.
	157
	158	The default algorithm works as follows. Let $C$ be the class whose CPL we
	159	are to compute. Let $X$ and $Y$ be two of $C$'s superclasses.
	160	\begin{itemize}
	161	\item $C$ must appear first in the CPL.
	162	\item If $X$ appears before $Y$ in the CPL of one of $C$'s direct
	163	superclasses, then $X$ appears before $Y$ in the $C$'s CPL.
	164	\item If the above rules don't suffice to order $X$ and $Y$, then whichever
	165	of $X$ and $Y$ has a subclass which appears further left in the list of
	166	$C$'s direct superclasses will appear earlier in the CPL.
	167	\end{itemize}
	168	This last rule is sufficient to disambiguate because if both $X$ and $Y$ are
	169	superclasses of the same direct superclass of $C$ then that direct
	170	superclass's CPL will order $X$ and $Y$.
	171
	172	We say that \emph{$X$ is more specific than $Y$ as a superclass of $C$} if
	173	$X$ is earlier than $Y$ in $C$'s class precedence list. If $C$ is clear from
	174	context then we omit it, saying simply that $X$ is more specific than $Y$.
	175
	176	\subsection{Instances and metaclasses} \label{sec:class.meta}
	177
	178	A class defines the structure and behaviour of its \emph{instances}: run-time
	179	objects created (possibly) dynamically. An instance is an instance of only
	180	one class, though structurally it may be used in place of an instance of any
	181	of that class's superclasses. It is possible, with care, to change the class
	182	of an instance at run-time.
	183
	184	Classes are themselves represented as instances -- called \emph{class
	185	objects} -- in the running program. Being instances, they have a class,
	186	called the \emph{metaclass}. The metaclass defines the structure and
	187	behaviour of the class object.
	188
	189	The predefined class @\|SodClass\| is the default metaclass for new classes.
	190	@\|SodClass\| has @\|SodObject\| as its only direct superclass. @\|SodClass\| is
	191	its own metaclass.
	192
	193	To make matters more complicated, Sod has \emph{two} distinct metalevels: as
	194	well as the runtime metalevel, as discussed above, there's a compile-time
	195	metalevel hosted in the Sod translator. Since Sod is written in Common Lisp,
	196	a Sod class's compile-time metaclass is a CLOS class. The usual compile-time
	197	metaclass is @\|sod-class\|. The compile-time metalevel is the subject of
	198	\xref{ch:api}.
	199
	200	\subsection{Items and inheritance} \label{sec:class.inherit}
	201
	202	A class definition also declares \emph{slots}, \emph{messages},
	203	\emph{initializers} and \emph{methods} -- collectively referred to as
	204	\emph{items}. In addition to the items declared in the class definition --
	205	the class's \emph{direct items} -- a class also \emph{inherits} items from
	206	its superclasses.
	207
	208	The precise rules for item inheritance vary according to the kinds of items
	209	involved.
	210
	211	Some object systems have a notion of `repeated inheritance': if there are
	212	multiple paths in the superclass graph from a class to one of its
	213	superclasses then items defined in that superclass may appear duplicated in
	214	the subclass. Sod does not have this notion.
	215
	216	\subsubsection{Slots} \label{sec:class.inherit.slots}
	217	A \emph{slot} is a unit of state. In other object systems, slots may be
	218	called `fields', `member variables', or `instance variables'.
	219
	220	A slot has a \emph{name} and a \emph{type}. The name serves only to
	221	distinguish the slot from other direct slots defined by the same class. A
	222	class inherits all of its proper superclasses' slots. Slots inherited from
	223	superclasses do not conflict with each other or with direct slots, even if
	224	they have the same names.
	225
	226	At run-time, each instance of the class holds a separate value for each slot,
	227	whether direct or inherited. Changing the value of an instance's slot
	228	doesn't affect other instances.
	229
	230	\subsubsection{Initializers} \label{sec:class.inherit.init}
	231	Mumble.
	232
	233	\subsubsection{Messages} \label{sec:class.inherit.messages}
	234	A \emph{message} is the stimulus for behaviour. In Sod, a class must define,
	235	statically, the name and format of the messages it is able to receive and the
	236	values it will return in reply. In this respect, a message is similar to
	237	`abstract member functions' or `interface member functions' in other object
	238	systems.
	239
	240	Like slots, a message has a \emph{name} and a \emph{type}. Again, the name
	241	serves only to distinguish the message from other direct messages defined by
	242	the same class. Messages inherited from superclasses do not conflict with
	243	each other or with direct messages, even if they have the same name.
	244
	245	At run-time, one sends a message to an instance by invoking a function
	246	obtained from the instance's \emph{vtable}: \xref{sec:fixme-vtable}.
	247
	248	\subsubsection{Methods} \label{sec:class.inherit.methods}
	249	A \emph{method} is a unit of behaviour. In other object systems, methods may
	250	be called `member functions'.
	251
	252	A method is associated with a message. When a message is received by an
	253	instance, all of the methods associated with that message on the instance's
	254	class or any of its superclasses are \emph{applicable}. The details of how
	255	the applicable methods are invoked are described fully in
	256	\xref{sec:fixme-method-combination}.
	257
	258	\subsection{Chains and instance layout} \label{sec:class.layout}
	259
	260	C is a rather low-level language, and in particular it exposes details of the
	261	way data is laid out in memory. Since an instance of a class~$C$ should be
	262	(at least in principle) usable anywhere an instance of some superclass $B
	263	\succeq C$ is expected, this implies that an instance of the subclass $C$
	264	needs to contain within it a complete instance of each superclass $B$, laid
	265	out according to the rules of instances of $B$, so that if we have (the
	266	address of) an instance of $C$, we can easily construct a pointer to a thing
	267	which looks like an instance of $B$ contained within it.
	268
	269	Specifically, the information we need to retain for an instance of a
	270	class~$C$ is:
	271	\begin{itemize}
	272	\item the values of each of the slots defined by $C$, including those defined
	273	by superclasses;
	274	\item information which will let us convert a pointer to $C$ into a pointer
	275	to any superclass $B \succeq C$;
	276	\item information which will let us call the appropriate effective method for
	277	each message defined by $C$, including those defined by superclasses; and
	278	\item some additional meta-level information, such as how to find the class
	279	object for $C$ given (the address of) one of its instances.
	280	\end{itemize}
	281
	282	Observe that, while each distinct instance must clearly have its own storage
	283	for slots, all instances of $C$ can share a single copy of the remaining
	284	information. The individual instance only needs to keep a pointer to this
	285	shared table, which, inspired by the similar structure in many \Cplusplus\
	286	ABIs, are called a \emph{vtable}.
	287
	288	The easiest approach would be to decide that instances of $C$ are exactly
	289	like instances of $B$, only with extra space at the end for the extra slots
	290	which $C$ defines over and above those already existing in $B$. Conversion
	291	is then trivial: a pointer to an instance of $C$ can be converted to a
	292	pointer to an instance of some superclass $B$ simply by casting. Even though
	293	the root class @\|SodObject\| doesn't have any slots at all, its instances will
	294	still need a vtable so that you can find its class object: the address of the
	295	vtable therefore needs to be at the very start of the instance structure.
	296	Again, a vtable for a superclass would have a vtable for each of its
	297	superclasses as a prefix, with new items added afterwards.
	298
	299	This appealing approach works well for an object system which only permits
	300	single inheritance of both state and behaviour. Alas, it breaks down when
	301	multiple inheritance is allowed: $C$ can be a subclass of both $B$ and $B'$,
	302	even though $B$ is not a subclass of $B'$, nor \emph{vice versa}; so, in
	303	general, $B$'s instance structure will not be a prefix of $B'$'s, nor will
	304	$B'$'s be a prefix of $B$'s, and therefore $C$ cannot have both $B$ and $B'$
	305	as a prefix.
	306
	307	A (non-root) class may -- though need not -- have a distinguished \emph{link}
	308	superclass, which need not be a direct superclass. Furthermore, each
	309	class~$C$ must satisfy the \emph{chain condition}: for any superclass $A$ of
	310	$C$, there can be at most one other superclass of $C$ whose link superclass
	311	is $A$.\footnote{%
	312	That is, it's permitted for two classes $B$ and $B'$ to have the same link
	313	superclass $A$, but $B$ and $B'$ can't then both be superclasses of the
	314	same class $C$.} %
	315	Therefore, the links partition the superclasses of~$C$ into nice linear
	316	\emph{chains}, such that each superclass is a member of exactly one chain.
	317	If a class~$B$ has a link superclass~$A$, then $B$'s \emph{level} is one more
	318	than that of $A$; otherwise $B$ is called a \emph{chain head} and its level
	319	is zero. If the classes in a chain are written in a list, chain head first,
	320	then the level of each class gives its index in the list.
	321
	322	Chains therefore allow us to recover some of the linearity properties which
	323	made layout simple in the case of single inheritance. The instance structure
	324	for a class $C$ contains a substructure for each of $C$'s superclass chains;
	325	a pointer to an object of class $C$ actually points to the substructure for
	326	the chain containing $C$. The order of these substructures is unimportant
	327	for now.\footnote{%
	328	The chains appear in the order in which their most specific classes appear
	329	in $C$'s class precedence list. This guarantees that the chain containing
	330	$C$ itself appears first, so that a pointer to $C$'s instance structure is
	331	actually a pointer to $C$'s chain substructure. Apart from that, it's a
	332	simple, stable, but basically arbitrary choice which can't be changed
	333	without breaking the ABI.} %
	334	The substructure for each chain begins with a pointer to a vtable, followed
	335	by a structure for each superclass in the chain containing the slots defined
	336	by that superclass, with the chain head (least specific class) first.
	337
	338	Suppose we have a pointer to (static) type $C$, and want to convert it into a
	339	pointer to some superclass $B$ of $C$ -- an \emph{upcast}.\footnote{%
	340	In the more general case, we have a pointer to static type $C$, which
	341	actually points to an object of some subclass $D$ of $C$, and want to
	342	convert it into a pointer to type $B$. Such a conversion is called a
	343	\emph{downcast} if $B$ is a subclass of $C$, or a \emph{cross-cast}
	344	otherwise. Downcasts and cross-casts require complicated run-time
	345	checking, and can will fail unless $B$ is a superclass of $D$.} %
	346	If $B$ is in the same chain as $C$ -- an \emph{in-chain upcast} -- then the
	347	pointer value is already correct and it's only necessary to cast it
	348	appropriately. Otherwise -- a \emph{cross-chain upcast} -- the pointer needs
	349	to be adjusted to point to a different chain substructure. Since the lengths
	350	and relative positions of the chain substructures vary between classes, the
	351	adjustments are stored in the vtable. Cross-chain upcasts are therefore a
	352	bit slower than in-chain upcasts.
	353
	354	Each chain has its own separate vtable, because much of the metadata stored
	355	in the vtable is specific to a particular chain. For example:
	356	\begin{itemize}
	357	\item offsets to other chains' substructures will vary depending on which
	358	chain we start from; and
	359	\item entry points to methods
	360	\end{itemize}
	361	%%%--------------------------------------------------------------------------
	362	\section{Superclass linearization}
	363
	364	Before making any decisions about relationships between superclasses, Sod
	365	\emph{linearizes} them, i.e., imposes a total order consistent with the
	366	direct-subclass/superclass partial order.
	367
	368	In the vague hope that we don't be completely bogged down in formalism by the
	369	end of this, let's introduce some notation. We'll fix some class $z$ and
	370	consider its set of superclasses $S(z) = \{ a, b, \dots \}$. We can define a
	371	relation $c \prec_1 d$ if $c$ is a direct subclass of $d$, and extend it by
	372	taking the reflexive, transitive closure: $c \preceq d$ if and only if
	373	\begin{itemize}
	374	\item $c = d$, or
	375	\item there exists some class $x$ such that $c \prec_1 x$ and $x \preceq d$.
	376	\end{itemize}
	377	This is the `is-subclass-of' relation we've been using so far.\footnote{%
	378	In some object systems, notably Flavors, this relation is allowed to fail
	379	to be a partial order because of cycles in the class graph. I haven't
	380	given a great deal of thought to how well Sod would cope with a cyclic
	381	class graph.} %
	382	We write $d \succeq c$ and say that $d$ is a superclass of $c$ if and only if
	383	$c \preceq d$.
	384
	385	The problem comes when we try to resolve inheritance questions. A class
	386	should inherit behaviour from its superclasses; but, in a world of multiple
	387	inheritance, which one do we choose? We get a simple version of this problem
	388	when we try to resolve inheritance of slot initializers: only one initializer
	389	can be inherited.
	390
	391	We start by collecting into a set~$I$ the classes which define an initializer
	392	for the slot. If $I$ contains both a class $x$ and one of $x$'s superclasses
	393	then we should prefer $x$ and consider the superclass to be overridden. So
	394	we should confine our attention to \emph{least} classes: a member $x$ of a
	395	set $I$ is least, with respect to a particular partial order, if $y \preceq
	396	x$ only when $x = y$. If there is a single least class in our set the we
	397	have a winner. Otherwise we want some way to choose among them.
	398
	399	This is not uncontroversial. Languages such as \Cplusplus\ refuse to choose
	400	among least classes; instead, any program in which such a choice must be made
	401	is simply declared erroneous.
	402
	403	Simply throwing up our hands in horror at this situation is satisfactory when
	404	we only wanted to pick one `winner', as we do for slot initializers.
	405	However, method combination is a much more complicated business. We don't
	406	want to pick just one winner: we want to order all of the applicable methods
	407	in some way. Insisting that there is a clear winner at every step along the
	408	chain is too much of an imposition. Instead, we \emph{linearize} the
	409	classes.
	410
	411	%%%--------------------------------------------------------------------------
	412	\section{Invariance, covariance, contravariance}
	413
	414	In Sod, at least with regard to the existing method combinations, method
	415	types are \emph{invariant}. This is not an accident, and it's not due to
	416	ignorance.
	417
	418	The \emph{signature} of a function, method or message describes its argument
	419	and return-value types. If a method's arguments are an integer and a string,
	420	and it returns a character, we might write its signature as
	421	\[ (@\|int\|, @\|string\|) \to @\|char\| \]
	422	In Sod, a method's arguments have to match its message's arguments precisely,
	423	and the return type must either be @\|void\| -- for a dæmon method -- or again
	424	match the message's return type. This is argument and return-type
	425	\emph{invariance}.
	426
	427	Some object systems allow methods with subtly different signatures to be
	428	defined on a single message. In particular, since the idea is that instances
	429	of a subclass ought to be broadly compatible~(see \xref{sec:phil.lsp}) with
	430	existing code which expects instances of a superclass, we might be able to
	431	get away with bending method signatures one way or another to permit this.
	432
	433	\Cplusplus\ permits \emph{return-type covariance}, where a method's return
	434	type can be a subclass of the return type specified by a less-specific
	435	method. Eiffel allows \emph{argument covariance}, where a method's arguments
	436	can be subclasses of the arguments specified by a less-specific
	437	method.\footnote{%
	438	Attentive readers will note that I ought to be talking about pointers to
	439	instances throughout. I'm trying to limit the weight of the notation.
	440	Besides, I prefer data models as found in Lisp and Python where all values
	441	are held by reference.} %
	442
	443	Eiffel's argument covariance is unsafe.\footnote{%
	444	Argument covariance is correct if you're doing runtime dispatch based on
	445	argument types. Eiffel isn't: it's single dispatch, like Sod is.} %
	446	Suppose that we have two pairs of classes, $a \prec_1 b$ and $c \prec_1 d$.
	447	Class $b$ defines a message $m$ with signature $d \to @\|int\|$; class $a$
	448	defines a method with signature $c \to @\|int\|$. This means that it's wrong
	449	to send $m$ to an instance $a$ carrying an argument of type $d$. But of
	450	course, we can treat an instance of $a$ as if it's an instance of $b$,
	451	whereupon it appears that we are permitted to pass a~$c$ in our message. The
	452	result is a well-known hole in the type system. Oops.
	453
	454	\Cplusplus's return-type covariance is fine. Also fine is argument
	455	\emph{contravariance}. If $b$ defined its message to have signature $c \to
	456	@\|int\|$, and $a$ were to broaden its method to $d \to @\|int\|$, there'd be no
	457	problem. All $c$s are $d$s, so viewing an $a$ as a $b$ does no harm.
	458
	459	All of this fiddling with types is fine as long as method inheritance or
	460	overriding is an all-or-nothing thing. But Sod has method combinations,
	461	where applicable methods are taken from the instance's class and all its
	462	superclasses and combined. And this makes everything very messy.
	463
	464	It's possible to sort all of the mess out in the generated effective method
	465	-- we'd just have to convert the arguments to the types that were expected by
	466	the direct methods. This would require expensive run-time conversions of all
	467	of the non-invariant arguments and return values. And we'd need some
	468	complicated rule so that we could choose sensible types for the method
	469	entries in our vtables. Something like this:
	470	\begin{quote} \itshape
	471	For each named argument of a message, there must be a unique greatest type
	472	among the types given for that argument by the applicable methods; and
	473	there must be a unique least type among all of the return types of the
	474	applicable methods.
	475	\end{quote}
	476	I have visions of people wanting to write special no-effect methods whose
	477	only purpose is to guide the translator around the class graph properly.
	478	Let's not.
	479
	480	%% things to talk about:
	481	%% Liskov substitution principle and why it's mad
	482
	483	%%%----- That's all, folks --------------------------------------------------
	484
	485	%%% Local variables:
	486	%%% mode: LaTeX
	487	%%% TeX-master: "sod.tex"
	488	%%% TeX-PDF-mode: t
	489	%%% End: