.\" -*-nroff-*-
.TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
.ie t \{\
.  ds ss \s8\u
.  ds se \d\s0
.\}
.el \{\
.  ds ss ^
.  ds se
.\}
.SH NAME
bits \- portable bit manipulation macros
.\" octet
.\" uint16
.\" uint24
.\" uint32
.\" uint64
.\" kludge64
.\"
.\" MASK_8
.\" MASK_16
.\" MASK_16_L
.\" MASK_16_B
.\" MASK_24
.\" MASK_24_L
.\" MASK_24_B
.\" MASK_32
.\" MASK_32_L
.\" MASK_32_B
.\" MASK_64
.\" MASK_64_L
.\" MASK_64_B
.\"
.\" SZ_8
.\" SZ_16
.\" SZ_16_L
.\" SZ_16_B
.\" SZ_24
.\" SZ_24_L
.\" SZ_24_B
.\" SZ_32
.\" SZ_32_L
.\" SZ_32_B
.\" SZ_64
.\" SZ_64_L
.\" SZ_64_B
.\"
.\" TY_8
.\" TY_16
.\" TY_16_L
.\" TY_16_B
.\" TY_24
.\" TY_24_L
.\" TY_24_B
.\" TY_32
.\" TY_32_L
.\" TY_32_B
.\" TY_64
.\" TY_64_L
.\" TY_64_B
.\"
.\" DOUINTSZ
.\" DOUINTCONV
.\"
.\" @U8
.\" @U16
.\" @U24
.\" @U32
.\" @U64
.\" @U64_
.\"
.\" @LSL8
.\" @LSR8
.\" @LSL16
.\" @LSR16
.\" @LSL24
.\" @LSR24
.\" @LSL32
.\" @LSR32
.\" @LSL64
.\" @LSR64
.\" @LSL64_
.\" @LSR64_
.\"
.\" @ROL8
.\" @ROR8
.\" @ROL16
.\" @ROR16
.\" @ROL24
.\" @ROR24
.\" @ROL32
.\" @ROL32
.\" @ROL64
.\" @ROR64
.\" @ROL64_
.\" @ROR64_
.\"
.\" ENDSWAP16
.\" ENDSWAP32
.\" ENDSWAP64
.\"
.\" BTOH16
.\" LTOH16
.\" HTOB16
.\" HTOL16
.\" BTOH32
.\" LTOH32
.\" HTOB32
.\" HTOL32
.\" BTOH64
.\" LTOH64
.\" HTOB64
.\" HTOL64
.\"
.\" RAW8
.\" RAW16
.\" RAW32
.\" RAW64
.\"
.\" @GETBYTE
.\" @PUTBYTE
.\"
.\" @LOAD8
.\" @STORE8
.\"
.\" @LOAD16_L
.\" @LOAD16_B
.\" @LOAD16
.\" @STORE16_L
.\" @STORE16_B
.\" @STORE16
.\"
.\" @LOAD24_L
.\" @LOAD24_B
.\" @LOAD24
.\" @STORE24_L
.\" @STORE24_B
.\" @STORE24
.\"
.\" @LOAD32_L
.\" @LOAD32_B
.\" @LOAD32
.\" @STORE32_L
.\" @STORE32_B
.\" @STORE32
.\"
.\" @LOAD64_L
.\" @LOAD64_B
.\" @LOAD64
.\" @STORE64_L
.\" @STORE64_B
.\" @STORE64
.\"
.\" @LOAD64_L_
.\" @LOAD64_B_
.\" @LOAD64_
.\" @STORE64_L_
.\" @STORE64_B_
.\" @STORE64_
.\"
.\" @SET64
.\" @X64
.\" @ASSIGN64
.\" @HI64
.\" @LO64
.\" @GET64
.\" @AND64
.\" @OR64
.\" @XOR64
.\" @CPL64
.\" @ADD64
.\" @SUB64
.\" @CMP64
.\" @ZERO64
.SH SYNOPSIS
.nf
.B "#include <mLib/bits.h>"

.BR "typedef " ... " octet;"
.BR "typedef " ... " uint16;"
.BR "typedef " ... " uint24;"
.BR "typedef " ... " uint32;"
.BR "typedef " ... " uint64;"
.BR "typedef " ... " kludge64;"

.BI "#define TY_" we " " type
.BI "#define SZ_" we " \fR..."
.BI "#define MASK_" we " \fR..."

.BI "#define DOUINTSZ(" f ") \fR..."
.BI "#define DOUINTCONV(" f ") \fR..."

.IB type " U" w ( v );

.IB type " LSL" w ( type " " v ", int " s );
.IB type " LSR" w ( type " " v ", int " s );
.IB type " ROL" w ( type " " v ", int " s );
.IB type " ROR" w ( type " " v ", int " s );

.BI "octet GETBYTE(void *" p ", size_t " o );
.BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );

.IB type " LOAD" we "(void *" p );
.BI "void STORE" we "(void *" p ", " type " " v );

.BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
.BI "kludge64 X64(" hexh ", " hexl );
.BI "void ASSIGN64(kludge64 &" d ", " x );
.BI "uint32 HI64(kludge64" x );
.BI "uint32 LO64(kludge64" x );
.IB ty " GET64(" ty ", kludge64 " x );
.BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void CPL64(kludge64 &" d ", kludge64 " x );
.BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
.BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
.BI "int ZERO64(kludge64 " x );
.fi
.SH DESCRIPTION
The header file
.B <mLib/bits.h>
contains a number of useful definitions for portably dealing with bit-
and byte-level manipulation of larger quantities.  The various macros
and types are named fairly systematically.
.PP
The header provides utilities for working with 64-bit quantities, but a
64-bit integer type is not guaranteed to exist under C89 rules.  This
header takes two approaches.  Firstly, if a 64-bit type is found, the
header defines the macro
.B HAVE_UINT64
and defines the various
.RB ... 64
macros as described below.  Secondly, it unconditionally defines a type
.B kludge64
and a family of macros for working with them.  See below for details.
.
.SS "Type definitions"
A number of types are defined.
.TP
.B octet
Equivalent to
.BR "unsigned char" .
This is intended to be used when a character array is used to represent
the octets of some external data format.  Note that on some
architectures the
.B "unsigned char"
type may occupy more than 8 bits.
.TP
.B uint16
Equivalent to
.BR "unsigned short" .
Intended to be used when a 16-bit value is required.  This type is
always capable of representing any 16-bit unsigned value, but the actual
type may be wider than 16 bits and will require masking.
.TP
.B uint24
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 24-bit value, although the the actual type may
be wider than 24 bits.
.TP
.B uint32
Equivalent to some (architecture-dependent) standard type.  Capable of
representing any unsigned 32-bit value, although the the actual type may
be wider than 32 bits.
pp.TP
.B uint64
Equivalent to some (architecture-dependent) standard type, if it exists.
Capable of representing any unsigned 64-bit value, although the the
actual type may be wider than 64 bits.
.
.SS "Size/endianness suffixes"
Let
.I w
be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
Furthermore, let
.I we
be one of the size-and-endian suffixes
.IR w ,
or, where
.IR w \~>\~8,
.IB w _L
or
.IB w _B \fR,
where
.RB ` _L '
denotes little-endian (Intel, VAX) representation, and
.RB ` _B '
denotes big-endian (IBM, network) representation; omitting an explicit
suffix gives big-endian order by default, since this is most common in
portable data formats.
.PP
The macro invocation
.BI DOUINTSZ( f )
invokes a given macro
.I f
repeatedly, as
.IB f ( w )
for each size suffix
.I w
listed above.
.PP
The macro invocation
.BI DOUINTCONV( f )
invokes a given macro
.I f
repeatedly, as
.IR f ( w ", " we ", " suff )
where
.I we
ranges over size-and-endian suffixes as described above,
.I w
is just the corresponding bit width, as an integer, and
.I suff
is a suffix
.IR w ,
.IB w l\fR,
or
.IB w b\fR,
suitable for a C function name.
.PP
These macros are intended to be used to define families of related
functions.
.
.SS "Utility macros"
For each size-and-endian suffix
.IR we ,
the following macros are defined.
.TP
.BI TY_ we
A synonym for the appropriate one of the types
.BR octet ,
.BR uint32 ,
etc.\& listed above.
.TP
.BI SZ_ we
The number of octets needed to represent a value of the corresponding
type; i.e., this is
.IR w /8.
.TP
.BI MASK_ we
The largest integer representable in the corresponding type; i.e., this
is
.RI 2\*(ss w \*(se\~\-\~1.
.PP
(Note that the endianness suffix is irrelevant in the above
definitions.)
.PP
For each size suffix
.IR w ,
the macro invocation
.BI U w ( x )
coerces an integer
.I x
to the appropriate type; specifically, it returns the smallest
nonnegative integer congruent to
.I x
(modulo
.RI 2\*(ss w \*(se).
.
.SS "Shift and rotate"
For each size suffix
.IR w ,
the macro invocations
.BI LSL w ( x ", " n )
and
.BI LSR w ( x ", " n )
shift a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places; if
.IR n \~\(>=\~ w
then
.I n
is reduced modulo
.IR w .
(This behaviour is unfortunate, but (a) it's what a number of CPUs
provide natively, and (b) it's a cheap way to prevent undefined
behaviour.)  Similarly,
.BI ROL w ( x ", " n )
and
.BI ROR w ( x ", " n )
rotate a
.IR w -bit
quantity
.I x
left or right, respectively, by
.I n
places.
.
.SS "Byte order conversions"
For each size suffix
.IR w ,
the macro invocation
.BI ENDSWAP w ( x )
returns the
.IR w -bit
value
.IR x
with its bytes reversed.  The
.B ENDSWAP8
macro does nothing (except truncate its operand to 8 bits), but is
provided for the sake of completeness.
.PP
A
.I big-endian
representation stores the most significant octet of an integer at the
lowest address, with the following octets in decreasing order of
significance.  A
.I little-endian
representation instead stores the
.I least
significant octet at the lowest address, with the following octets in
increasing order of significance.  An environment has a preferred order
for arranging the constituent octets of an integer of some given size in
memory; this might be either the big- or little-endian representation
just described, or something else strange.
.PP
It might be possible to rearrange the bits in an integer so that, when
that integer is stored to memory in the environment's preferred manner,
you end up with the big- or little-endian representation of the original
integer; and, similarly, it might be possible to load a big- or
little-endian representation of an integer into a variable using the
environment's preferred ordering and then rearrange the bits so as to
recover the integer value originally represented.  If the environment is
sufficiently strange, these things might not be possible, but this is
actually quite rare.
.PP
Say that an integer has been converted to
.I big-
or
.I "little-endian form"
if, when it is stored in memory in the environment's preferred manner,
one ends up with a big- or little-endian representation of the original
integer.  Equivalently, if one starts with a big- or little-endian
representation of some integer, and loads it into a variable using the
environment's preferred manner, one ends up with the big- or
little-endian form of the original integer.
.PP
If these things are possible, then the following macros are defined.
.TP
.BI HTOL w ( x )
Convert a
.IR w -bit
integer
.I x
to little-endian form.
.TP
.BI HTOB w ( x )
Convert a
.IR w -bit
integer
.I x
to big-endian form.
.TP
.BI LTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from little-endian form.
.TP
.BI BTOH w ( x )
Convert a
.IR w -bit
integer
.I x
from big-endian form.
.
.SS "Load and store"
The macro invocation
.BI GETBYTE( p ", " o )
returns the
.IR o th
octet following the address
.IR p .
Conversely,
.BI PUTBYTE( p ", " o ", " v)
stores
.I
v in the
.IR o th
byte following the address
.IR p .
These macros always operate on byte offsets regardless of the type of
the pointer
.IR p .
.PP
For each size suffix
.IR w ,
there may be a macro such that the invocation
.BI RAW w ( p )
is an lvalue designating the
.IR w /8
octets starting at address
.IR p ,
interpreted according to the environment's preferred representation,
except that
.I p
need not be aligned in any particular fashion.  There are many reasons
why this might not be possible; programmers are not normally expected to
use these macros directly, and they are documented in case they are
useful for special effects.
.PP
For each size-and-endian suffix
.IR we ,
the macro invocation
.BI LOAD we ( p )
loads and returns a value in the corresponding format at address
.IR p ;
similarly,
.BI STORE we ( p ", " x )
stores the value
.I x
at address
.I p
in the corresponding format.
.
.SS "64-bit support"
For portability to environments without native 64-bit integers, the
structure
.B kludge64
is defined.  If the target platform is known to have an unsigned 64-bit
integer type, then this structure merely encapsulates a native integer,
and a decent optimizing compiler can be expected to handle this exactly
as if it were the native type.  Otherwise, it contains two 32-bit halves
which are processed the hard way.
.PP
For each of the above macros with a suffix
.BR 64 ,
.BR 64_L ,
or
.BR 64_B ,
an additional `kludge' macro is defined, whose name has an additional
final underscore; e.g., the kludge macro corresponding to
.B ROR64
is
.BR ROR64_ ;
and that corresponding to
.B LOAD64_L
is
.BR LOAD64_L_ .
If the original macro would have
.I returned
a value of type
.BR uint64 ,
then the kludge macro has an additional first argument, denoted
.IR d ,
which should be an lvalue of type
.BR kludge64 ,
and the kludge macro will store its result in
.IR d .
The kludge macro's remaining arguments are the same as the original
macro, except that where the original macro accepts an argument of type
.BR uint64 ,
the kludge macro accepts an argument of type
.B kludge64
instead.
.PP
Finally, a number of additional macros are provided, to make working
with
.B kludge64
somewhat less awful.
.TP
.BI SET64( d ", " h ", " l )
Set the high 32 bits of
.I d
to be
.IR h ,
and the low 32 bits to be
.IR l .
Both
.I h
and
.I l
may be arbitrary integers.
.TP
.BI X64( hexh ", " hexl )
Expands to an initializer for an object of type
.B kludge64
where
.I hexh
and
.I hexl
encode the high and low 32-bit halves in hexadecimal, without any
.B 0x
prefix.
.TP
.BI ASSIGN( d ", " x )
Make
.I d
be a copy of the
.B kludge64
.IR x .
.TP
.BI HI64( x )
Return the high 32 bits of
.IR x .
.TP
.BI LO64( x )
Return the low 32 bits of
.IR x .
.TP
.BI GET64( t ", " x )
Return the value of
.I x
as a value of type
.IR t .
If
.I t
is an unsigned integer type, then the value will be truncated to fit as
necessary; if
.I t
is a signed integer type, then the behaviour is undefined if the value
of
.I x
is too large.
.TP
.BI AND64( d ", " x ", " y )
Set
.I d
to be the bitwise-and of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI OR64( d ", " x ", " y )
Set
.I d
to be the bitwise-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI XOR64( d ", " x ", " y )
Set
.I d
to be the bitwise-exclusive-or of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CPL64( d ", " x )
Set
.I d
to be the bitwise complement of the
.B kludge64
argument
.IR x .
.TP
.BI ADD64( d ", " x ", " y )
Set
.I d
to be the sum of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI SUB64( d ", " x ", " y )
Set
.I d
to be the difference of the two
.B kludge64
arguments
.I x
and
.IR y .
.TP
.BI CMP64( x ", " op ", " y )
Here,
.I x
and
.I y
should be arguments of type
.B kludge64
and
.I op
should be one of the relational operators
.BR == ,
.BR < ,
.BR <= ,
.BR > ,
or
.B >=
\(en
.I not
.BR !=.
Evaluates nonzero if
.IR x \~ op \~ y .
.TP
.BI ZERO64( x )
Evaluates nonzero if the
.B kludge64
argument
.I x
is exactly zero.
.SH "SEE ALSO"
.BR mLib (3).
.SH AUTHOR
Mark Wooding, <mdw@distorted.org.uk>