3 .\" Manual for bit manipulation
5 .\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
8 .\"----- Licensing notice ---------------------------------------------------
10 .\" This file is part of the mLib utilities library.
12 .\" mLib is free software: you can redistribute it and/or modify it under
13 .\" the terms of the GNU Library General Public License as published by
14 .\" the Free Software Foundation; either version 2 of the License, or (at
15 .\" your option) any later version.
17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 .\" License for more details.
22 .\" You should have received a copy of the GNU Library General Public
23 .\" License along with mLib. If not, write to the Free Software
24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
27 .\"--------------------------------------------------------------------------
28 .so ../defs.man \" @@@PRE@@@
30 .\"--------------------------------------------------------------------------
31 .TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
195 .\"--------------------------------------------------------------------------
197 bits \- portable bit manipulation macros
199 .\"--------------------------------------------------------------------------
203 .B "#include <mLib/bits.h>"
205 .BR "typedef " ... " octet;"
206 .BR "typedef " ... " uint16;"
207 .BR "typedef " ... " uint24;"
208 .BR "typedef " ... " uint32;"
209 .BR "typedef " ... " uint64;"
210 .BR "typedef " ... " kludge64;"
221 and, on platforms with a 64-bit type,
237 and, on platforms with a 64-bit type,
244 .BI "#define TY_" we " " type
245 .BI "#define SZ_" we " \fR..."
246 .BI "#define MASK_" we " \fR..."
248 .BI "#define DOUINTSZ(" f ") \fR..."
249 .BI "#define DOUINTCONV(" f ") \fR..."
251 .IB type " U" w ( v );
253 .IB type " LSL" w ( type " " v ", int " s );
254 .IB type " LSR" w ( type " " v ", int " s );
255 .IB type " ROL" w ( type " " v ", int " s );
256 .IB type " ROR" w ( type " " v ", int " s );
258 .BI "octet GETBYTE(void *" p ", size_t " o );
259 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
261 .IB type " LOAD" we "(void *" p );
262 .BI "void STORE" we "(void *" p ", " type " " v );
264 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
265 .BI "kludge64 X64(" hexh ", " hexl );
266 .BI "void ASSIGN64(kludge64 &" d ", " x );
267 .BI "uint32 HI64(kludge64" x );
268 .BI "uint32 LO64(kludge64" x );
269 .IB ty " GET64(" ty ", kludge64 " x );
270 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
271 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
272 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
273 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
274 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
275 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
276 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
277 .BI "int ZERO64(kludge64 " x );
280 .\"--------------------------------------------------------------------------
285 contains a number of useful definitions for portably dealing with bit-
286 and byte-level manipulation of larger quantities. The various macros
287 and types are named fairly systematically.
289 The header provides utilities for working with 64-bit quantities, but a
290 64-bit integer type is not guaranteed to exist under C89 rules. This
291 header takes two approaches. Firstly, if a 64-bit type is found, the
292 header defines the macro
294 and defines the various
296 macros as described below. Secondly, it unconditionally defines a type
298 and a family of macros for working with them. See below for details.
300 .SS "Type definitions"
301 A number of types are defined.
305 .BR "unsigned char" .
306 This is intended to be used when a character array is used to represent
307 the octets of some external data format. Note that on some
310 type may occupy more than 8 bits.
314 .BR "unsigned short" .
315 Intended to be used when a 16-bit value is required. This type is
316 always capable of representing any 16-bit unsigned value, but the actual
317 type may be wider than 16 bits and will require masking.
320 Equivalent to some (architecture-dependent) standard type. Capable of
321 representing any unsigned 24-bit value, although the the actual type may
322 be wider than 24 bits.
325 Equivalent to some (architecture-dependent) standard type. Capable of
326 representing any unsigned 32-bit value, although the the actual type may
327 be wider than 32 bits.
330 Equivalent to some (architecture-dependent) standard type, if it exists.
331 Capable of representing any unsigned 64-bit value, although the the
332 actual type may be wider than 64 bits.
334 .SS "Size/endianness suffixes"
337 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
340 be one of the size-and-endian suffixes
349 denotes little-endian (Intel, VAX) representation, and
351 denotes big-endian (IBM, network) representation; omitting an explicit
352 suffix gives big-endian order by default, since this is most common in
353 portable data formats.
357 invokes a given macro
367 invokes a given macro
370 .IR f ( w ", " we ", " suff )
373 ranges over size-and-endian suffixes as described above,
375 is just the corresponding bit width, as an integer, and
382 suitable for a C function name.
384 These macros are intended to be used to define families of related
388 For each size-and-endian suffix
390 the following macros are defined.
393 A synonym for the appropriate one of the types
399 The number of octets needed to represent a value of the corresponding
404 The largest integer representable in the corresponding type; i.e., this
406 .RI 2\*(ss w \*(se\~\-\~1.
408 (Note that the endianness suffix is irrelevant in the above
417 to the appropriate type; specifically, it returns the smallest
418 nonnegative integer congruent to
423 .SS "Shift and rotate"
426 the macro invocations
427 .BI LSL w ( x ", " n )
429 .BI LSR w ( x ", " n )
434 left or right, respectively, by
442 (This behaviour is unfortunate, but (a) it's what a number of CPUs
443 provide natively, and (b) it's a cheap way to prevent undefined
444 behaviour.) Similarly,
445 .BI ROL w ( x ", " n )
447 .BI ROR w ( x ", " n )
452 left or right, respectively, by
456 .SS "Byte order conversions"
465 with its bytes reversed. The
467 macro does nothing (except truncate its operand to 8 bits), but is
468 provided for the sake of completeness.
472 representation stores the most significant octet of an integer at the
473 lowest address, with the following octets in decreasing order of
476 representation instead stores the
478 significant octet at the lowest address, with the following octets in
479 increasing order of significance. An environment has a preferred order
480 for arranging the constituent octets of an integer of some given size in
481 memory; this might be either the big- or little-endian representation
482 just described, or something else strange.
484 It might be possible to rearrange the bits in an integer so that, when
485 that integer is stored to memory in the environment's preferred manner,
486 you end up with the big- or little-endian representation of the original
487 integer; and, similarly, it might be possible to load a big- or
488 little-endian representation of an integer into a variable using the
489 environment's preferred ordering and then rearrange the bits so as to
490 recover the integer value originally represented. If the environment is
491 sufficiently strange, these things might not be possible, but this is
494 Say that an integer has been converted to
497 .I "little-endian form"
498 if, when it is stored in memory in the environment's preferred manner,
499 one ends up with a big- or little-endian representation of the original
500 integer. Equivalently, if one starts with a big- or little-endian
501 representation of some integer, and loads it into a variable using the
502 environment's preferred manner, one ends up with the big- or
503 little-endian form of the original integer.
505 If these things are possible, then the following macros are defined.
512 to little-endian form.
526 from little-endian form.
533 from big-endian form.
537 .BI GETBYTE( p ", " o )
540 octet following the address
543 .BI PUTBYTE( p ", " o ", " v)
548 byte following the address
550 These macros always operate on byte offsets regardless of the type of
556 there may be a macro such that the invocation
558 is an lvalue designating the
560 octets starting at address
562 interpreted according to the environment's preferred representation,
565 need not be aligned in any particular fashion. There are many reasons
566 why this might not be possible; programmers are not normally expected to
567 use these macros directly, and they are documented in case they are
568 useful for special effects.
570 For each size-and-endian suffix
574 loads and returns a value in the corresponding format at address
577 .BI STORE we ( p ", " x )
582 in the corresponding format.
585 For portability to environments without native 64-bit integers, the
588 is defined. If the target platform is known to have an unsigned 64-bit
589 integer type, then this structure merely encapsulates a native integer,
590 and a decent optimizing compiler can be expected to handle this exactly
591 as if it were the native type. Otherwise, it contains two 32-bit halves
592 which are processed the hard way.
594 For each of the above macros with a suffix
599 an additional `kludge' macro is defined, whose name has an additional
600 final underscore; e.g., the kludge macro corresponding to
604 and that corresponding to
608 If the original macro would have
612 then the kludge macro has an additional first argument, denoted
614 which should be an lvalue of type
616 and the kludge macro will store its result in
618 The kludge macro's remaining arguments are the same as the original
619 macro, except that where the original macro accepts an argument of type
621 the kludge macro accepts an argument of type
625 Finally, a number of additional macros are provided, to make working
630 .BI SET64( d ", " h ", " l )
631 Set the high 32 bits of
635 and the low 32 bits to be
641 may be arbitrary integers.
643 .BI X64( hexh ", " hexl )
644 Expands to an initializer for an object of type
650 encode the high and low 32-bit halves in hexadecimal, without any
654 .BI ASSIGN( d ", " x )
662 Return the high 32 bits of
666 Return the low 32 bits of
669 .BI GET64( t ", " x )
676 is an unsigned integer type, then the value will be truncated to fit as
679 is a signed integer type, then the behaviour is undefined if the value
684 .BI AND64( d ", " x ", " y )
687 to be the bitwise-and of the two
694 .BI OR64( d ", " x ", " y )
697 to be the bitwise-or of the two
704 .BI XOR64( d ", " x ", " y )
707 to be the bitwise-exclusive-or of the two
714 .BI CPL64( d ", " x )
717 to be the bitwise complement of the
722 .BI ADD64( d ", " x ", " y )
725 to be the sum of the two
732 .BI SUB64( d ", " x ", " y )
735 to be the difference of the two
742 .BI CMP64( x ", " op ", " y )
747 should be arguments of type
751 should be one of the relational operators
765 Evaluates nonzero if the
771 .\"--------------------------------------------------------------------------
776 .\"--------------------------------------------------------------------------
779 Mark Wooding, <mdw@distorted.org.uk>
781 .\"----- That's all, folks --------------------------------------------------