chiark - git - mdw - mLib/blob - utils/bits.3.in

   1 .\" -*-nroff-*-
   2 .\"
   3 .\" Manual for bit manipulation
   4 .\"
   5 .\" (c) 1999, 2001, 2005, 2009, 2018, 2024 Straylight/Edgeware
   6 .\"
   7 .
   8 .\"----- Licensing notice ---------------------------------------------------
   9 .\"
  10 .\" This file is part of the mLib utilities library.
  11 .\"
  12 .\" mLib is free software: you can redistribute it and/or modify it under
  13 .\" the terms of the GNU Library General Public License as published by
  14 .\" the Free Software Foundation; either version 2 of the License, or (at
  15 .\" your option) any later version.
  16 .\"
  17 .\" mLib is distributed in the hope that it will be useful, but WITHOUT
  18 .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  19 .\" FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  20 .\" License for more details.
  21 .\"
  22 .\" You should have received a copy of the GNU Library General Public
  23 .\" License along with mLib.  If not, write to the Free Software
  24 .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  25 .\" USA.
  26 .
  27 .\"--------------------------------------------------------------------------
  28 .so ../defs.man \" @@@PRE@@@
  29 .
  30 .\"--------------------------------------------------------------------------
  31 .TH bits 3mLib "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
  32 .\" octet
  33 .\" uint16
  34 .\" uint24
  35 .\" uint32
  36 .\" uint64
  37 .\" kludge64
  38 .\"
  39 .\" MASK_8
  40 .\" MASK_16
  41 .\" MASK_16_L
  42 .\" MASK_16_B
  43 .\" MASK_24
  44 .\" MASK_24_L
  45 .\" MASK_24_B
  46 .\" MASK_32
  47 .\" MASK_32_L
  48 .\" MASK_32_B
  49 .\" MASK_64
  50 .\" MASK_64_L
  51 .\" MASK_64_B
  52 .\"
  53 .\" SZ_8
  54 .\" SZ_16
  55 .\" SZ_16_L
  56 .\" SZ_16_B
  57 .\" SZ_24
  58 .\" SZ_24_L
  59 .\" SZ_24_B
  60 .\" SZ_32
  61 .\" SZ_32_L
  62 .\" SZ_32_B
  63 .\" SZ_64
  64 .\" SZ_64_L
  65 .\" SZ_64_B
  66 .\"
  67 .\" TY_8
  68 .\" TY_16
  69 .\" TY_16_L
  70 .\" TY_16_B
  71 .\" TY_24
  72 .\" TY_24_L
  73 .\" TY_24_B
  74 .\" TY_32
  75 .\" TY_32_L
  76 .\" TY_32_B
  77 .\" TY_64
  78 .\" TY_64_L
  79 .\" TY_64_B
  80 .\"
  81 .\" DOUINTSZ
  82 .\" DOUINTCONV
  83 .\"
  84 .\" @U8
  85 .\" @U16
  86 .\" @U24
  87 .\" @U32
  88 .\" @U64
  89 .\" @U64_
  90 .\"
  91 .\" @LSL8
  92 .\" @LSR8
  93 .\" @LSL16
  94 .\" @LSR16
  95 .\" @LSL24
  96 .\" @LSR24
  97 .\" @LSL32
  98 .\" @LSR32
  99 .\" @LSL64
 100 .\" @LSR64
 101 .\" @LSL64_
 102 .\" @LSR64_
 103 .\"
 104 .\" @ROL8
 105 .\" @ROR8
 106 .\" @ROL16
 107 .\" @ROR16
 108 .\" @ROL24
 109 .\" @ROR24
 110 .\" @ROL32
 111 .\" @ROL32
 112 .\" @ROL64
 113 .\" @ROR64
 114 .\" @ROL64_
 115 .\" @ROR64_
 116 .\"
 117 .\" ENDSWAP16
 118 .\" ENDSWAP32
 119 .\" ENDSWAP64
 120 .\"
 121 .\" BTOH16
 122 .\" LTOH16
 123 .\" HTOB16
 124 .\" HTOL16
 125 .\" BTOH32
 126 .\" LTOH32
 127 .\" HTOB32
 128 .\" HTOL32
 129 .\" BTOH64
 130 .\" LTOH64
 131 .\" HTOB64
 132 .\" HTOL64
 133 .\"
 134 .\" RAW8
 135 .\" RAW16
 136 .\" RAW32
 137 .\" RAW64
 138 .\"
 139 .\" @GETBYTE
 140 .\" @PUTBYTE
 141 .\"
 142 .\" @LOAD8
 143 .\" @STORE8
 144 .\"
 145 .\" @LOAD16_L
 146 .\" @LOAD16_B
 147 .\" @LOAD16
 148 .\" @STORE16_L
 149 .\" @STORE16_B
 150 .\" @STORE16
 151 .\"
 152 .\" @LOAD24_L
 153 .\" @LOAD24_B
 154 .\" @LOAD24
 155 .\" @STORE24_L
 156 .\" @STORE24_B
 157 .\" @STORE24
 158 .\"
 159 .\" @LOAD32_L
 160 .\" @LOAD32_B
 161 .\" @LOAD32
 162 .\" @STORE32_L
 163 .\" @STORE32_B
 164 .\" @STORE32
 165 .\"
 166 .\" @LOAD64_L
 167 .\" @LOAD64_B
 168 .\" @LOAD64
 169 .\" @STORE64_L
 170 .\" @STORE64_B
 171 .\" @STORE64
 172 .\"
 173 .\" @LOAD64_L_
 174 .\" @LOAD64_B_
 175 .\" @LOAD64_
 176 .\" @STORE64_L_
 177 .\" @STORE64_B_
 178 .\" @STORE64_
 179 .\"
 180 .\" @SET64
 181 .\" @X64
 182 .\" @ASSIGN64
 183 .\" @HI64
 184 .\" @LO64
 185 .\" @GET64
 186 .\" @AND64
 187 .\" @OR64
 188 .\" @XOR64
 189 .\" @CPL64
 190 .\" @ADD64
 191 .\" @SUB64
 192 .\" @CMP64
 193 .\" @ZERO64
 194 .
 195 .\"--------------------------------------------------------------------------
 196 .SH NAME
 197 bits \- portable bit manipulation macros
 198 .
 199 .\"--------------------------------------------------------------------------
 200 .SH SYNOPSIS
 201 .
 202 .nf
 203 .B "#include <mLib/bits.h>"
 204 .PP
 205 .BR "typedef " ... " octet;"
 206 .BR "typedef " ... " uint16;"
 207 .BR "typedef " ... " uint24;"
 208 .BR "typedef " ... " uint32;"
 209 .BR "typedef " ... " uint64;"
 210 .BR "typedef " ... " kludge64;"
 211 .PP
 212 .fi
 213 In the following,
 214 .I w
 215 is one of
 216 .BR 8 ,
 217 .BR 16 ,
 218 .BR 24 ,
 219 and
 220 .BR 32 ,
 221 and, on platforms with a 64-bit type,
 222 .BR 64 ;
 223 and
 224 .I we
 225 is one of
 226 .BR 8 ,
 227 .BR 16 ,
 228 .BR 16_L ,
 229 .BR 16_B ,
 230 .BR 24 ,
 231 .BR 24_L ,
 232 .BR 24_B ,
 233 .BR 32 ,
 234 .BR 32_L ,
 235 and
 236 .BR 32_B ,
 237 and, on platforms with a 64-bit type,
 238 .BR 64 ,
 239 .BR 64_L ,
 240 and
 241 .BR 64_B .
 242 .nf
 243 .PP
 244 .BI "#define TY_" we " " type
 245 .BI "#define SZ_" we " \fR..."
 246 .BI "#define MASK_" we " \fR..."
 247 .PP
 248 .BI "#define DOUINTSZ(" f ") \fR..."
 249 .BI "#define DOUINTCONV(" f ") \fR..."
 250 .PP
 251 .IB type " U" w ( v );
 252 .PP
 253 .IB type " LSL" w ( type " " v ", int " s );
 254 .IB type " LSR" w ( type " " v ", int " s );
 255 .IB type " ROL" w ( type " " v ", int " s );
 256 .IB type " ROR" w ( type " " v ", int " s );
 257 .PP
 258 .BI "octet GETBYTE(void *" p ", size_t " o );
 259 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
 260 .PP
 261 .IB type " LOAD" we "(void *" p );
 262 .BI "void STORE" we "(void *" p ", " type " " v );
 263 .PP
 264 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
 265 .BI "kludge64 X64(" hexh ", " hexl );
 266 .BI "void ASSIGN64(kludge64 &" d ", " x );
 267 .BI "uint32 HI64(kludge64" x );
 268 .BI "uint32 LO64(kludge64" x );
 269 .IB ty " GET64(" ty ", kludge64 " x );
 270 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 271 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 272 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 273 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
 274 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 275 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 276 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
 277 .BI "int ZERO64(kludge64 " x );
 278 .fi
 279 .
 280 .\"--------------------------------------------------------------------------
 281 .SH DESCRIPTION
 282 .
 283 The header file
 284 .B <mLib/bits.h>
 285 contains a number of useful definitions for portably dealing with bit-
 286 and byte-level manipulation of larger quantities.  The various macros
 287 and types are named fairly systematically.
 288 .PP
 289 The header provides utilities for working with 64-bit quantities, but a
 290 64-bit integer type is not guaranteed to exist under C89 rules.  This
 291 header takes two approaches.  Firstly, if a 64-bit type is found, the
 292 header defines the macro
 293 .B HAVE_UINT64
 294 and defines the various
 295 .RB ... 64
 296 macros as described below.  Secondly, it unconditionally defines a type
 297 .B kludge64
 298 and a family of macros for working with them.  See below for details.
 299 .
 300 .SS "Type definitions"
 301 A number of types are defined.
 302 .TP
 303 .B octet
 304 Equivalent to
 305 .BR "unsigned char" .
 306 This is intended to be used when a character array is used to represent
 307 the octets of some external data format.  Note that on some
 308 architectures the
 309 .B "unsigned char"
 310 type may occupy more than 8 bits.
 311 .TP
 312 .B uint16
 313 Equivalent to
 314 .BR "unsigned short" .
 315 Intended to be used when a 16-bit value is required.  This type is
 316 always capable of representing any 16-bit unsigned value, but the actual
 317 type may be wider than 16 bits and will require masking.
 318 .TP
 319 .B uint24
 320 Equivalent to some (architecture-dependent) standard type.  Capable of
 321 representing any unsigned 24-bit value, although the the actual type may
 322 be wider than 24 bits.
 323 .TP
 324 .B uint32
 325 Equivalent to some (architecture-dependent) standard type.  Capable of
 326 representing any unsigned 32-bit value, although the the actual type may
 327 be wider than 32 bits.
 328 pp.TP
 329 .B uint64
 330 Equivalent to some (architecture-dependent) standard type, if it exists.
 331 Capable of representing any unsigned 64-bit value, although the the
 332 actual type may be wider than 64 bits.
 333 .
 334 .SS "Size/endianness suffixes"
 335 Let
 336 .I w
 337 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
 338 Furthermore, let
 339 .I we
 340 be one of the size-and-endian suffixes
 341 .IR w ,
 342 or, where
 343 .IR w \~>\~8,
 344 .IB w _L
 345 or
 346 .IB w _B \fR,
 347 where
 348 .RB ` _L '
 349 denotes little-endian (Intel, VAX) representation, and
 350 .RB ` _B '
 351 denotes big-endian (IBM, network) representation; omitting an explicit
 352 suffix gives big-endian order by default, since this is most common in
 353 portable data formats.
 354 .PP
 355 The macro invocation
 356 .BI DOUINTSZ( f )
 357 invokes a given macro
 358 .I f
 359 repeatedly, as
 360 .IB f ( w )
 361 for each size suffix
 362 .I w
 363 listed above.
 364 .PP
 365 The macro invocation
 366 .BI DOUINTCONV( f )
 367 invokes a given macro
 368 .I f
 369 repeatedly, as
 370 .IR f ( w ", " we ", " suff )
 371 where
 372 .I we
 373 ranges over size-and-endian suffixes as described above,
 374 .I w
 375 is just the corresponding bit width, as an integer, and
 376 .I suff
 377 is a suffix
 378 .IR w ,
 379 .IB w l\fR,
 380 or
 381 .IB w b\fR,
 382 suitable for a C function name.
 383 .PP
 384 These macros are intended to be used to define families of related
 385 functions.
 386 .
 387 .SS "Utility macros"
 388 For each size-and-endian suffix
 389 .IR we ,
 390 the following macros are defined.
 391 .TP
 392 .BI TY_ we
 393 A synonym for the appropriate one of the types
 394 .BR octet ,
 395 .BR uint32 ,
 396 etc.\& listed above.
 397 .TP
 398 .BI SZ_ we
 399 The number of octets needed to represent a value of the corresponding
 400 type; i.e., this is
 401 .IR w /8.
 402 .TP
 403 .BI MASK_ we
 404 The largest integer representable in the corresponding type; i.e., this
 405 is
 406 .RI 2\*(ss w \*(se\~\-\~1.
 407 .PP
 408 (Note that the endianness suffix is irrelevant in the above
 409 definitions.)
 410 .PP
 411 For each size suffix
 412 .IR w ,
 413 the macro invocation
 414 .BI U w ( x )
 415 coerces an integer
 416 .I x
 417 to the appropriate type; specifically, it returns the smallest
 418 nonnegative integer congruent to
 419 .I x
 420 (modulo
 421 .RI 2\*(ss w \*(se).
 422 .
 423 .SS "Shift and rotate"
 424 For each size suffix
 425 .IR w ,
 426 the macro invocations
 427 .BI LSL w ( x ", " n )
 428 and
 429 .BI LSR w ( x ", " n )
 430 shift a
 431 .IR w -bit
 432 quantity
 433 .I x
 434 left or right, respectively, by
 435 .I n
 436 places; if
 437 .IR n \~\(>=\~ w
 438 then
 439 .I n
 440 is reduced modulo
 441 .IR w .
 442 (This behaviour is unfortunate, but (a) it's what a number of CPUs
 443 provide natively, and (b) it's a cheap way to prevent undefined
 444 behaviour.)  Similarly,
 445 .BI ROL w ( x ", " n )
 446 and
 447 .BI ROR w ( x ", " n )
 448 rotate a
 449 .IR w -bit
 450 quantity
 451 .I x
 452 left or right, respectively, by
 453 .I n
 454 places.
 455 .
 456 .SS "Byte order conversions"
 457 For each size suffix
 458 .IR w ,
 459 the macro invocation
 460 .BI ENDSWAP w ( x )
 461 returns the
 462 .IR w -bit
 463 value
 464 .IR x
 465 with its bytes reversed.  The
 466 .B ENDSWAP8
 467 macro does nothing (except truncate its operand to 8 bits), but is
 468 provided for the sake of completeness.
 469 .PP
 470 A
 471 .I big-endian
 472 representation stores the most significant octet of an integer at the
 473 lowest address, with the following octets in decreasing order of
 474 significance.  A
 475 .I little-endian
 476 representation instead stores the
 477 .I least
 478 significant octet at the lowest address, with the following octets in
 479 increasing order of significance.  An environment has a preferred order
 480 for arranging the constituent octets of an integer of some given size in
 481 memory; this might be either the big- or little-endian representation
 482 just described, or something else strange.
 483 .PP
 484 It might be possible to rearrange the bits in an integer so that, when
 485 that integer is stored to memory in the environment's preferred manner,
 486 you end up with the big- or little-endian representation of the original
 487 integer; and, similarly, it might be possible to load a big- or
 488 little-endian representation of an integer into a variable using the
 489 environment's preferred ordering and then rearrange the bits so as to
 490 recover the integer value originally represented.  If the environment is
 491 sufficiently strange, these things might not be possible, but this is
 492 actually quite rare.
 493 .PP
 494 Say that an integer has been converted to
 495 .I big-
 496 or
 497 .I "little-endian form"
 498 if, when it is stored in memory in the environment's preferred manner,
 499 one ends up with a big- or little-endian representation of the original
 500 integer.  Equivalently, if one starts with a big- or little-endian
 501 representation of some integer, and loads it into a variable using the
 502 environment's preferred manner, one ends up with the big- or
 503 little-endian form of the original integer.
 504 .PP
 505 If these things are possible, then the following macros are defined.
 506 .TP
 507 .BI HTOL w ( x )
 508 Convert a
 509 .IR w -bit
 510 integer
 511 .I x
 512 to little-endian form.
 513 .TP
 514 .BI HTOB w ( x )
 515 Convert a
 516 .IR w -bit
 517 integer
 518 .I x
 519 to big-endian form.
 520 .TP
 521 .BI LTOH w ( x )
 522 Convert a
 523 .IR w -bit
 524 integer
 525 .I x
 526 from little-endian form.
 527 .TP
 528 .BI BTOH w ( x )
 529 Convert a
 530 .IR w -bit
 531 integer
 532 .I x
 533 from big-endian form.
 534 .
 535 .SS "Load and store"
 536 The macro invocation
 537 .BI GETBYTE( p ", " o )
 538 returns the
 539 .IR o th
 540 octet following the address
 541 .IR p .
 542 Conversely,
 543 .BI PUTBYTE( p ", " o ", " v)
 544 stores
 545 .I
 546 v in the
 547 .IR o th
 548 byte following the address
 549 .IR p .
 550 These macros always operate on byte offsets regardless of the type of
 551 the pointer
 552 .IR p .
 553 .PP
 554 For each size suffix
 555 .IR w ,
 556 there may be a macro such that the invocation
 557 .BI RAW w ( p )
 558 is an lvalue designating the
 559 .IR w /8
 560 octets starting at address
 561 .IR p ,
 562 interpreted according to the environment's preferred representation,
 563 except that
 564 .I p
 565 need not be aligned in any particular fashion.  There are many reasons
 566 why this might not be possible; programmers are not normally expected to
 567 use these macros directly, and they are documented in case they are
 568 useful for special effects.
 569 .PP
 570 For each size-and-endian suffix
 571 .IR we ,
 572 the macro invocation
 573 .BI LOAD we ( p )
 574 loads and returns a value in the corresponding format at address
 575 .IR p ;
 576 similarly,
 577 .BI STORE we ( p ", " x )
 578 stores the value
 579 .I x
 580 at address
 581 .I p
 582 in the corresponding format.
 583 .
 584 .SS "64-bit support"
 585 For portability to environments without native 64-bit integers, the
 586 structure
 587 .B kludge64
 588 is defined.  If the target platform is known to have an unsigned 64-bit
 589 integer type, then this structure merely encapsulates a native integer,
 590 and a decent optimizing compiler can be expected to handle this exactly
 591 as if it were the native type.  Otherwise, it contains two 32-bit halves
 592 which are processed the hard way.
 593 .PP
 594 For each of the above macros with a suffix
 595 .BR 64 ,
 596 .BR 64_L ,
 597 or
 598 .BR 64_B ,
 599 an additional `kludge' macro is defined, whose name has an additional
 600 final underscore; e.g., the kludge macro corresponding to
 601 .B ROR64
 602 is
 603 .BR ROR64_ ;
 604 and that corresponding to
 605 .B LOAD64_L
 606 is
 607 .BR LOAD64_L_ .
 608 If the original macro would have
 609 .I returned
 610 a value of type
 611 .BR uint64 ,
 612 then the kludge macro has an additional first argument, denoted
 613 .IR d ,
 614 which should be an lvalue of type
 615 .BR kludge64 ,
 616 and the kludge macro will store its result in
 617 .IR d .
 618 The kludge macro's remaining arguments are the same as the original
 619 macro, except that where the original macro accepts an argument of type
 620 .BR uint64 ,
 621 the kludge macro accepts an argument of type
 622 .B kludge64
 623 instead.
 624 .PP
 625 Finally, a number of additional macros are provided, to make working
 626 with
 627 .B kludge64
 628 somewhat less awful.
 629 .TP
 630 .BI SET64( d ", " h ", " l )
 631 Set the high 32 bits of
 632 .I d
 633 to be
 634 .IR h ,
 635 and the low 32 bits to be
 636 .IR l .
 637 Both
 638 .I h
 639 and
 640 .I l
 641 may be arbitrary integers.
 642 .TP
 643 .BI X64( hexh ", " hexl )
 644 Expands to an initializer for an object of type
 645 .B kludge64
 646 where
 647 .I hexh
 648 and
 649 .I hexl
 650 encode the high and low 32-bit halves in hexadecimal, without any
 651 .B 0x
 652 prefix.
 653 .TP
 654 .BI ASSIGN( d ", " x )
 655 Make
 656 .I d
 657 be a copy of the
 658 .B kludge64
 659 .IR x .
 660 .TP
 661 .BI HI64( x )
 662 Return the high 32 bits of
 663 .IR x .
 664 .TP
 665 .BI LO64( x )
 666 Return the low 32 bits of
 667 .IR x .
 668 .TP
 669 .BI GET64( t ", " x )
 670 Return the value of
 671 .I x
 672 as a value of type
 673 .IR t .
 674 If
 675 .I t
 676 is an unsigned integer type, then the value will be truncated to fit as
 677 necessary; if
 678 .I t
 679 is a signed integer type, then the behaviour is undefined if the value
 680 of
 681 .I x
 682 is too large.
 683 .TP
 684 .BI AND64( d ", " x ", " y )
 685 Set
 686 .I d
 687 to be the bitwise-and of the two
 688 .B kludge64
 689 arguments
 690 .I x
 691 and
 692 .IR y .
 693 .TP
 694 .BI OR64( d ", " x ", " y )
 695 Set
 696 .I d
 697 to be the bitwise-or of the two
 698 .B kludge64
 699 arguments
 700 .I x
 701 and
 702 .IR y .
 703 .TP
 704 .BI XOR64( d ", " x ", " y )
 705 Set
 706 .I d
 707 to be the bitwise-exclusive-or of the two
 708 .B kludge64
 709 arguments
 710 .I x
 711 and
 712 .IR y .
 713 .TP
 714 .BI CPL64( d ", " x )
 715 Set
 716 .I d
 717 to be the bitwise complement of the
 718 .B kludge64
 719 argument
 720 .IR x .
 721 .TP
 722 .BI ADD64( d ", " x ", " y )
 723 Set
 724 .I d
 725 to be the sum of the two
 726 .B kludge64
 727 arguments
 728 .I x
 729 and
 730 .IR y .
 731 .TP
 732 .BI SUB64( d ", " x ", " y )
 733 Set
 734 .I d
 735 to be the difference of the two
 736 .B kludge64
 737 arguments
 738 .I x
 739 and
 740 .IR y .
 741 .TP
 742 .BI CMP64( x ", " op ", " y )
 743 Here,
 744 .I x
 745 and
 746 .I y
 747 should be arguments of type
 748 .B kludge64
 749 and
 750 .I op
 751 should be one of the relational operators
 752 .BR == ,
 753 .BR < ,
 754 .BR <= ,
 755 .BR > ,
 756 or
 757 .B >=
 758 \(en
 759 .I not
 760 .BR !=.
 761 Evaluates nonzero if
 762 .IR x \~ op \~ y .
 763 .TP
 764 .BI ZERO64( x )
 765 Evaluates nonzero if the
 766 .B kludge64
 767 argument
 768 .I x
 769 is exactly zero.
 770 .
 771 .\"--------------------------------------------------------------------------
 772 .SH "SEE ALSO"
 773 .
 774 .BR mLib (3).
 775 .
 776 .\"--------------------------------------------------------------------------
 777 .SH AUTHOR
 778 .
 779 Mark Wooding, <mdw@distorted.org.uk>
 780 .
 781 .\"----- That's all, folks --------------------------------------------------