.\" -*-nroff-*- .TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library" .ie t \{\ . ds ss \s8\u . ds se \d\s0 .\} .el \{\ . ds ss ^ . ds se .\} .SH NAME bits \- portable bit manipulation macros .\" octet .\" uint16 .\" uint24 .\" uint32 .\" uint64 .\" kludge64 .\" .\" MASK_8 .\" MASK_16 .\" MASK_16_L .\" MASK_16_B .\" MASK_24 .\" MASK_24_L .\" MASK_24_B .\" MASK_32 .\" MASK_32_L .\" MASK_32_B .\" MASK_64 .\" MASK_64_L .\" MASK_64_B .\" .\" SZ_8 .\" SZ_16 .\" SZ_16_L .\" SZ_16_B .\" SZ_24 .\" SZ_24_L .\" SZ_24_B .\" SZ_32 .\" SZ_32_L .\" SZ_32_B .\" SZ_64 .\" SZ_64_L .\" SZ_64_B .\" .\" TY_8 .\" TY_16 .\" TY_16_L .\" TY_16_B .\" TY_24 .\" TY_24_L .\" TY_24_B .\" TY_32 .\" TY_32_L .\" TY_32_B .\" TY_64 .\" TY_64_L .\" TY_64_B .\" .\" DOUINTSZ .\" DOUINTCONV .\" .\" @U8 .\" @U16 .\" @U24 .\" @U32 .\" @U64 .\" @U64_ .\" .\" @LSL8 .\" @LSR8 .\" @LSL16 .\" @LSR16 .\" @LSL24 .\" @LSR24 .\" @LSL32 .\" @LSR32 .\" @LSL64 .\" @LSR64 .\" @LSL64_ .\" @LSR64_ .\" .\" @ROL8 .\" @ROR8 .\" @ROL16 .\" @ROR16 .\" @ROL24 .\" @ROR24 .\" @ROL32 .\" @ROL32 .\" @ROL64 .\" @ROR64 .\" @ROL64_ .\" @ROR64_ .\" .\" ENDSWAP16 .\" ENDSWAP32 .\" ENDSWAP64 .\" .\" BTOH16 .\" LTOH16 .\" HTOB16 .\" HTOL16 .\" BTOH32 .\" LTOH32 .\" HTOB32 .\" HTOL32 .\" BTOH64 .\" LTOH64 .\" HTOB64 .\" HTOL64 .\" .\" RAW8 .\" RAW16 .\" RAW32 .\" RAW64 .\" .\" @GETBYTE .\" @PUTBYTE .\" .\" @LOAD8 .\" @STORE8 .\" .\" @LOAD16_L .\" @LOAD16_B .\" @LOAD16 .\" @STORE16_L .\" @STORE16_B .\" @STORE16 .\" .\" @LOAD24_L .\" @LOAD24_B .\" @LOAD24 .\" @STORE24_L .\" @STORE24_B .\" @STORE24 .\" .\" @LOAD32_L .\" @LOAD32_B .\" @LOAD32 .\" @STORE32_L .\" @STORE32_B .\" @STORE32 .\" .\" @LOAD64_L .\" @LOAD64_B .\" @LOAD64 .\" @STORE64_L .\" @STORE64_B .\" @STORE64 .\" .\" @LOAD64_L_ .\" @LOAD64_B_ .\" @LOAD64_ .\" @STORE64_L_ .\" @STORE64_B_ .\" @STORE64_ .\" .\" @SET64 .\" @X64 .\" @ASSIGN64 .\" @HI64 .\" @LO64 .\" @GET64 .\" @AND64 .\" @OR64 .\" @XOR64 .\" @CPL64 .\" @ADD64 .\" @SUB64 .\" @CMP64 .\" @ZERO64 .SH SYNOPSIS .nf .B "#include " .BR "typedef " ... " octet;" .BR "typedef " ... " uint16;" .BR "typedef " ... " uint24;" .BR "typedef " ... " uint32;" .BR "typedef " ... " uint64;" .BR "typedef " ... " kludge64;" .BI "#define TY_" we " " type .BI "#define SZ_" we " \fR..." .BI "#define MASK_" we " \fR..." .BI "#define DOUINTSZ(" f ") \fR..." .BI "#define DOUINTCONV(" f ") \fR..." .IB type " U" w ( v ); .IB type " LSL" w ( type " " v ", int " s ); .IB type " LSR" w ( type " " v ", int " s ); .IB type " ROL" w ( type " " v ", int " s ); .IB type " ROR" w ( type " " v ", int " s ); .BI "octet GETBYTE(void *" p ", size_t " o ); .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v ); .IB type " LOAD" we "(void *" p ); .BI "void STORE" we "(void *" p ", " type " " v ); .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l ); .BI "kludge64 X64(" hexh ", " hexl ); .BI "void ASSIGN64(kludge64 &" d ", " x ); .BI "uint32 HI64(kludge64" x ); .BI "uint32 LO64(kludge64" x ); .IB ty " GET64(" ty ", kludge64 " x ); .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y ); .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y ); .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y ); .BI "void CPL64(kludge64 &" d ", kludge64 " x ); .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y ); .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y ); .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y ); .BI "int ZERO64(kludge64 " x ); .fi .SH DESCRIPTION The header file .B contains a number of useful definitions for portably dealing with bit- and byte-level manipulation of larger quantities. The various macros and types are named fairly systematically. .PP The header provides utilities for working with 64-bit quantities, but a 64-bit integer type is not guaranteed to exist under C89 rules. This header takes two approaches. Firstly, if a 64-bit type is found, the header defines the macro .B HAVE_UINT64 and defines the various .RB ... 64 macros as described below. Secondly, it unconditionally defines a type .B kludge64 and a family of macros for working with them. See below for details. . .SS "Type definitions" A number of types are defined. .TP .B octet Equivalent to .BR "unsigned char" . This is intended to be used when a character array is used to represent the octets of some external data format. Note that on some architectures the .B "unsigned char" type may occupy more than 8 bits. .TP .B uint16 Equivalent to .BR "unsigned short" . Intended to be used when a 16-bit value is required. This type is always capable of representing any 16-bit unsigned value, but the actual type may be wider than 16 bits and will require masking. .TP .B uint24 Equivalent to some (architecture-dependent) standard type. Capable of representing any unsigned 24-bit value, although the the actual type may be wider than 24 bits. .TP .B uint32 Equivalent to some (architecture-dependent) standard type. Capable of representing any unsigned 32-bit value, although the the actual type may be wider than 32 bits. pp.TP .B uint64 Equivalent to some (architecture-dependent) standard type, if it exists. Capable of representing any unsigned 64-bit value, although the the actual type may be wider than 64 bits. . .SS "Size/endianness suffixes" Let .I w be one of the size suffixes: 8, 16, 24, 32, and (if available) 64. Furthermore, let .I we be one of the size-and-endian suffixes .IR w , or, where .IR w \~>\~8, .IB w _L or .IB w _B \fR, where .RB ` _L ' denotes little-endian (Intel, VAX) representation, and .RB ` _B ' denotes big-endian (IBM, network) representation; omitting an explicit suffix gives big-endian order by default, since this is most common in portable data formats. .PP The macro invocation .BI DOUINTSZ( f ) invokes a given macro .I f repeatedly, as .IB f ( w ) for each size suffix .I w listed above. .PP The macro invocation .BI DOUINTCONV( f ) invokes a given macro .I f repeatedly, as .IR f ( w ", " we ", " suff ) where .I we ranges over size-and-endian suffixes as described above, .I w is just the corresponding bit width, as an integer, and .I suff is a suffix .IR w , .IB w l\fR, or .IB w b\fR, suitable for a C function name. .PP These macros are intended to be used to define families of related functions. . .SS "Utility macros" For each size-and-endian suffix .IR we , the following macros are defined. .TP .BI TY_ we A synonym for the appropriate one of the types .BR octet , .BR uint32 , etc.\& listed above. .TP .BI SZ_ we The number of octets needed to represent a value of the corresponding type; i.e., this is .IR w /8. .TP .BI MASK_ we The largest integer representable in the corresponding type; i.e., this is .RI 2\*(ss w \*(se\~\-\~1. .PP (Note that the endianness suffix is irrelevant in the above definitions.) .PP For each size suffix .IR w , the macro invocation .BI U w ( x ) coerces an integer .I x to the appropriate type; specifically, it returns the smallest nonnegative integer congruent to .I x (modulo .RI 2\*(ss w \*(se). . .SS "Shift and rotate" For each size suffix .IR w , the macro invocations .BI LSL w ( x ", " n ) and .BI LSR w ( x ", " n ) shift a .IR w -bit quantity .I x left or right, respectively, by .I n places; if .IR n \~\(>=\~ w then .I n is reduced modulo .IR w . (This behaviour is unfortunate, but (a) it's what a number of CPUs provide natively, and (b) it's a cheap way to prevent undefined behaviour.) Similarly, .BI ROL w ( x ", " n ) and .BI ROR w ( x ", " n ) rotate a .IR w -bit quantity .I x left or right, respectively, by .I n places. . .SS "Byte order conversions" For each size suffix .IR w , the macro invocation .BI ENDSWAP w ( x ) returns the .IR w -bit value .IR x with its bytes reversed. The .B ENDSWAP8 macro does nothing (except truncate its operand to 8 bits), but is provided for the sake of completeness. .PP A .I big-endian representation stores the most significant octet of an integer at the lowest address, with the following octets in decreasing order of significance. A .I little-endian representation instead stores the .I least significant octet at the lowest address, with the following octets in increasing order of significance. An environment has a preferred order for arranging the constituent octets of an integer of some given size in memory; this might be either the big- or little-endian representation just described, or something else strange. .PP It might be possible to rearrange the bits in an integer so that, when that integer is stored to memory in the environment's preferred manner, you end up with the big- or little-endian representation of the original integer; and, similarly, it might be possible to load a big- or little-endian representation of an integer into a variable using the environment's preferred ordering and then rearrange the bits so as to recover the integer value originally represented. If the environment is sufficiently strange, these things might not be possible, but this is actually quite rare. .PP Say that an integer has been converted to .I big- or .I "little-endian form" if, when it is stored in memory in the environment's preferred manner, one ends up with a big- or little-endian representation of the original integer. Equivalently, if one starts with a big- or little-endian representation of some integer, and loads it into a variable using the environment's preferred manner, one ends up with the big- or little-endian form of the original integer. .PP If these things are possible, then the following macros are defined. .TP .BI HTOL w ( x ) Convert a .IR w -bit integer .I x to little-endian form. .TP .BI HTOB w ( x ) Convert a .IR w -bit integer .I x to big-endian form. .TP .BI LTOH w ( x ) Convert a .IR w -bit integer .I x from little-endian form. .TP .BI BTOH w ( x ) Convert a .IR w -bit integer .I x from big-endian form. . .SS "Load and store" The macro invocation .BI GETBYTE( p ", " o ) returns the .IR o th octet following the address .IR p . Conversely, .BI PUTBYTE( p ", " o ", " v) stores .I v in the .IR o th byte following the address .IR p . These macros always operate on byte offsets regardless of the type of the pointer .IR p . .PP For each size suffix .IR w , there may be a macro such that the invocation .BI RAW w ( p ) is an lvalue designating the .IR w /8 octets starting at address .IR p , interpreted according to the environment's preferred representation, except that .I p need not be aligned in any particular fashion. There are many reasons why this might not be possible; programmers are not normally expected to use these macros directly, and they are documented in case they are useful for special effects. .PP For each size-and-endian suffix .IR we , the macro invocation .BI LOAD we ( p ) loads and returns a value in the corresponding format at address .IR p ; similarly, .BI STORE we ( p ", " x ) stores the value .I x at address .I p in the corresponding format. . .SS "64-bit support" For portability to environments without native 64-bit integers, the structure .B kludge64 is defined. If the target platform is known to have an unsigned 64-bit integer type, then this structure merely encapsulates a native integer, and a decent optimizing compiler can be expected to handle this exactly as if it were the native type. Otherwise, it contains two 32-bit halves which are processed the hard way. .PP For each of the above macros with a suffix .BR 64 , .BR 64_L , or .BR 64_B , an additional `kludge' macro is defined, whose name has an additional final underscore; e.g., the kludge macro corresponding to .B ROR64 is .BR ROR64_ ; and that corresponding to .B LOAD64_L is .BR LOAD64_L_ . If the original macro would have .I returned a value of type .BR uint64 , then the kludge macro has an additional first argument, denoted .IR d , which should be an lvalue of type .BR kludge64 , and the kludge macro will store its result in .IR d . The kludge macro's remaining arguments are the same as the original macro, except that where the original macro accepts an argument of type .BR uint64 , the kludge macro accepts an argument of type .B kludge64 instead. .PP Finally, a number of additional macros are provided, to make working with .B kludge64 somewhat less awful. .TP .BI SET64( d ", " h ", " l ) Set the high 32 bits of .I d to be .IR h , and the low 32 bits to be .IR l . Both .I h and .I l may be arbitrary integers. .TP .BI X64( hexh ", " hexl ) Expands to an initializer for an object of type .B kludge64 where .I hexh and .I hexl encode the high and low 32-bit halves in hexadecimal, without any .B 0x prefix. .TP .BI ASSIGN( d ", " x ) Make .I d be a copy of the .B kludge64 .IR x . .TP .BI HI64( x ) Return the high 32 bits of .IR x . .TP .BI LO64( x ) Return the low 32 bits of .IR x . .TP .BI GET64( t ", " x ) Return the value of .I x as a value of type .IR t . If .I t is an unsigned integer type, then the value will be truncated to fit as necessary; if .I t is a signed integer type, then the behaviour is undefined if the value of .I x is too large. .TP .BI AND64( d ", " x ", " y ) Set .I d to be the bitwise-and of the two .B kludge64 arguments .I x and .IR y . .TP .BI OR64( d ", " x ", " y ) Set .I d to be the bitwise-or of the two .B kludge64 arguments .I x and .IR y . .TP .BI XOR64( d ", " x ", " y ) Set .I d to be the bitwise-exclusive-or of the two .B kludge64 arguments .I x and .IR y . .TP .BI CPL64( d ", " x ) Set .I d to be the bitwise complement of the .B kludge64 argument .IR x . .TP .BI ADD64( d ", " x ", " y ) Set .I d to be the sum of the two .B kludge64 arguments .I x and .IR y . .TP .BI SUB64( d ", " x ", " y ) Set .I d to be the difference of the two .B kludge64 arguments .I x and .IR y . .TP .BI CMP64( x ", " op ", " y ) Here, .I x and .I y should be arguments of type .B kludge64 and .I op should be one of the relational operators .BR == , .BR < , .BR <= , .BR > , or .B >= \(en .I not .BR !=. Evaluates nonzero if .IR x \~ op \~ y . .TP .BI ZERO64( x ) Evaluates nonzero if the .B kludge64 argument .I x is exactly zero. .SH "SEE ALSO" .BR mLib (3). .SH AUTHOR Mark Wooding,