chiark - git - mdw - mLib/blob - utils/bits.3

   1 .\" -*-nroff-*-
   2 .TH bits 3 "20 June 1999" "Straylight/Edgeware" "mLib utilities library"
   3 .ie t \{\
   4 .  ds ss \s8\u
   5 .  ds se \d\s0
   6 .\}
   7 .el \{\
   8 .  ds ss ^
   9 .  ds se
  10 .\}
  11 .SH NAME
  12 bits \- portable bit manipulation macros
  13 .\" octet
  14 .\" uint16
  15 .\" uint24
  16 .\" uint32
  17 .\" uint64
  18 .\" kludge64
  19 .\"
  20 .\" MASK_8
  21 .\" MASK_16
  22 .\" MASK_16_L
  23 .\" MASK_16_B
  24 .\" MASK_24
  25 .\" MASK_24_L
  26 .\" MASK_24_B
  27 .\" MASK_32
  28 .\" MASK_32_L
  29 .\" MASK_32_B
  30 .\" MASK_64
  31 .\" MASK_64_L
  32 .\" MASK_64_B
  33 .\"
  34 .\" SZ_8
  35 .\" SZ_16
  36 .\" SZ_16_L
  37 .\" SZ_16_B
  38 .\" SZ_24
  39 .\" SZ_24_L
  40 .\" SZ_24_B
  41 .\" SZ_32
  42 .\" SZ_32_L
  43 .\" SZ_32_B
  44 .\" SZ_64
  45 .\" SZ_64_L
  46 .\" SZ_64_B
  47 .\"
  48 .\" TY_8
  49 .\" TY_16
  50 .\" TY_16_L
  51 .\" TY_16_B
  52 .\" TY_24
  53 .\" TY_24_L
  54 .\" TY_24_B
  55 .\" TY_32
  56 .\" TY_32_L
  57 .\" TY_32_B
  58 .\" TY_64
  59 .\" TY_64_L
  60 .\" TY_64_B
  61 .\"
  62 .\" DOUINTSZ
  63 .\" DOUINTCONV
  64 .\"
  65 .\" @U8
  66 .\" @U16
  67 .\" @U24
  68 .\" @U32
  69 .\" @U64
  70 .\" @U64_
  71 .\"
  72 .\" @LSL8
  73 .\" @LSR8
  74 .\" @LSL16
  75 .\" @LSR16
  76 .\" @LSL24
  77 .\" @LSR24
  78 .\" @LSL32
  79 .\" @LSR32
  80 .\" @LSL64
  81 .\" @LSR64
  82 .\" @LSL64_
  83 .\" @LSR64_
  84 .\"
  85 .\" @ROL8
  86 .\" @ROR8
  87 .\" @ROL16
  88 .\" @ROR16
  89 .\" @ROL24
  90 .\" @ROR24
  91 .\" @ROL32
  92 .\" @ROL32
  93 .\" @ROL64
  94 .\" @ROR64
  95 .\" @ROL64_
  96 .\" @ROR64_
  97 .\"
  98 .\" ENDSWAP16
  99 .\" ENDSWAP32
 100 .\" ENDSWAP64
 101 .\"
 102 .\" BTOH16
 103 .\" LTOH16
 104 .\" HTOB16
 105 .\" HTOL16
 106 .\" BTOH32
 107 .\" LTOH32
 108 .\" HTOB32
 109 .\" HTOL32
 110 .\" BTOH64
 111 .\" LTOH64
 112 .\" HTOB64
 113 .\" HTOL64
 114 .\"
 115 .\" RAW8
 116 .\" RAW16
 117 .\" RAW32
 118 .\" RAW64
 119 .\"
 120 .\" @GETBYTE
 121 .\" @PUTBYTE
 122 .\"
 123 .\" @LOAD8
 124 .\" @STORE8
 125 .\"
 126 .\" @LOAD16_L
 127 .\" @LOAD16_B
 128 .\" @LOAD16
 129 .\" @STORE16_L
 130 .\" @STORE16_B
 131 .\" @STORE16
 132 .\"
 133 .\" @LOAD24_L
 134 .\" @LOAD24_B
 135 .\" @LOAD24
 136 .\" @STORE24_L
 137 .\" @STORE24_B
 138 .\" @STORE24
 139 .\"
 140 .\" @LOAD32_L
 141 .\" @LOAD32_B
 142 .\" @LOAD32
 143 .\" @STORE32_L
 144 .\" @STORE32_B
 145 .\" @STORE32
 146 .\"
 147 .\" @LOAD64_L
 148 .\" @LOAD64_B
 149 .\" @LOAD64
 150 .\" @STORE64_L
 151 .\" @STORE64_B
 152 .\" @STORE64
 153 .\"
 154 .\" @LOAD64_L_
 155 .\" @LOAD64_B_
 156 .\" @LOAD64_
 157 .\" @STORE64_L_
 158 .\" @STORE64_B_
 159 .\" @STORE64_
 160 .\"
 161 .\" @SET64
 162 .\" @X64
 163 .\" @ASSIGN64
 164 .\" @HI64
 165 .\" @LO64
 166 .\" @GET64
 167 .\" @AND64
 168 .\" @OR64
 169 .\" @XOR64
 170 .\" @CPL64
 171 .\" @ADD64
 172 .\" @SUB64
 173 .\" @CMP64
 174 .\" @ZERO64
 175 .SH SYNOPSIS
 176 .nf
 177 .B "#include <mLib/bits.h>"
 178
 179 .BR "typedef " ... " octet;"
 180 .BR "typedef " ... " uint16;"
 181 .BR "typedef " ... " uint24;"
 182 .BR "typedef " ... " uint32;"
 183 .BR "typedef " ... " uint64;"
 184 .BR "typedef " ... " kludge64;"
 185
 186 .BI "#define TY_" we " " type
 187 .BI "#define SZ_" we " \fR..."
 188 .BI "#define MASK_" we " \fR..."
 189
 190 .BI "#define DOUINTSZ(" f ") \fR..."
 191 .BI "#define DOUINTCONV(" f ") \fR..."
 192
 193 .IB type " U" w ( v );
 194
 195 .IB type " LSL" w ( type " " v ", int " s );
 196 .IB type " LSR" w ( type " " v ", int " s );
 197 .IB type " ROL" w ( type " " v ", int " s );
 198 .IB type " ROR" w ( type " " v ", int " s );
 199
 200 .BI "octet GETBYTE(void *" p ", size_t " o );
 201 .BI "void PUTBYTE(void *" p ", size_t " o ", octet " v );
 202
 203 .IB type " LOAD" we "(void *" p );
 204 .BI "void STORE" we "(void *" p ", " type " " v );
 205
 206 .BI "void SET64(kludge64 &" d ", uint32 " h ", uint32 " l );
 207 .BI "kludge64 X64(" hexh ", " hexl );
 208 .BI "void ASSIGN64(kludge64 &" d ", " x );
 209 .BI "uint32 HI64(kludge64" x );
 210 .BI "uint32 LO64(kludge64" x );
 211 .IB ty " GET64(" ty ", kludge64 " x );
 212 .BI "void AND64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 213 .BI "void OR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 214 .BI "void XOR64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 215 .BI "void CPL64(kludge64 &" d ", kludge64 " x );
 216 .BI "void ADD64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 217 .BI "void SUB64(kludge64 &" d ", kludge64 " x ", kludge64 " y );
 218 .BI "int CMP64(kludge64 " x ", " op ", kludge64 " y );
 219 .BI "int ZERO64(kludge64 " x );
 220 .fi
 221 .SH DESCRIPTION
 222 The header file
 223 .B <mLib/bits.h>
 224 contains a number of useful definitions for portably dealing with bit-
 225 and byte-level manipulation of larger quantities.  The various macros
 226 and types are named fairly systematically.
 227 .PP
 228 The header provides utilities for working with 64-bit quantities, but a
 229 64-bit integer type is not guaranteed to exist under C89 rules.  This
 230 header takes two approaches.  Firstly, if a 64-bit type is found, the
 231 header defines the macro
 232 .B HAVE_UINT64
 233 and defines the various
 234 .RB ... 64
 235 macros as described below.  Secondly, it unconditionally defines a type
 236 .B kludge64
 237 and a family of macros for working with them.  See below for details.
 238 .
 239 .SS "Type definitions"
 240 A number of types are defined.
 241 .TP
 242 .B octet
 243 Equivalent to
 244 .BR "unsigned char" .
 245 This is intended to be used when a character array is used to represent
 246 the octets of some external data format.  Note that on some
 247 architectures the
 248 .B "unsigned char"
 249 type may occupy more than 8 bits.
 250 .TP
 251 .B uint16
 252 Equivalent to
 253 .BR "unsigned short" .
 254 Intended to be used when a 16-bit value is required.  This type is
 255 always capable of representing any 16-bit unsigned value, but the actual
 256 type may be wider than 16 bits and will require masking.
 257 .TP
 258 .B uint24
 259 Equivalent to some (architecture-dependent) standard type.  Capable of
 260 representing any unsigned 24-bit value, although the the actual type may
 261 be wider than 24 bits.
 262 .TP
 263 .B uint32
 264 Equivalent to some (architecture-dependent) standard type.  Capable of
 265 representing any unsigned 32-bit value, although the the actual type may
 266 be wider than 32 bits.
 267 pp.TP
 268 .B uint64
 269 Equivalent to some (architecture-dependent) standard type, if it exists.
 270 Capable of representing any unsigned 64-bit value, although the the
 271 actual type may be wider than 64 bits.
 272 .
 273 .SS "Size/endianness suffixes"
 274 Let
 275 .I w
 276 be one of the size suffixes: 8, 16, 24, 32, and (if available) 64.
 277 Furthermore, let
 278 .I we
 279 be one of the size-and-endian suffixes
 280 .IR w ,
 281 or, where
 282 .IR w \~>\~8,
 283 .IB w _L
 284 or
 285 .IB w _B \fR,
 286 where
 287 .RB ` _L '
 288 denotes little-endian (Intel, VAX) representation, and
 289 .RB ` _B '
 290 denotes big-endian (IBM, network) representation; omitting an explicit
 291 suffix gives big-endian order by default, since this is most common in
 292 portable data formats.
 293 .PP
 294 The macro invocation
 295 .BI DOUINTSZ( f )
 296 invokes a given macro
 297 .I f
 298 repeatedly, as
 299 .IB f ( w )
 300 for each size suffix
 301 .I w
 302 listed above.
 303 .PP
 304 The macro invocation
 305 .BI DOUINTCONV( f )
 306 invokes a given macro
 307 .I f
 308 repeatedly, as
 309 .IR f ( w ", " we ", " suff )
 310 where
 311 .I we
 312 ranges over size-and-endian suffixes as described above,
 313 .I w
 314 is just the corresponding bit width, as an integer, and
 315 .I suff
 316 is a suffix
 317 .IR w ,
 318 .IB w l\fR,
 319 or
 320 .IB w b\fR,
 321 suitable for a C function name.
 322 .PP
 323 These macros are intended to be used to define families of related
 324 functions.
 325 .
 326 .SS "Utility macros"
 327 For each size-and-endian suffix
 328 .IR we ,
 329 the following macros are defined.
 330 .TP
 331 .BI TY_ we
 332 A synonym for the appropriate one of the types
 333 .BR octet ,
 334 .BR uint32 ,
 335 etc.\& listed above.
 336 .TP
 337 .BI SZ_ we
 338 The number of octets needed to represent a value of the corresponding
 339 type; i.e., this is
 340 .IR w /8.
 341 .TP
 342 .BI MASK_ we
 343 The largest integer representable in the corresponding type; i.e., this
 344 is
 345 .RI 2\*(ss w \*(se\~\-\~1.
 346 .PP
 347 (Note that the endianness suffix is irrelevant in the above
 348 definitions.)
 349 .PP
 350 For each size suffix
 351 .IR w ,
 352 the macro invocation
 353 .BI U w ( x )
 354 coerces an integer
 355 .I x
 356 to the appropriate type; specifically, it returns the smallest
 357 nonnegative integer congruent to
 358 .I x
 359 (modulo
 360 .RI 2\*(ss w \*(se).
 361 .
 362 .SS "Shift and rotate"
 363 For each size suffix
 364 .IR w ,
 365 the macro invocations
 366 .BI LSL w ( x ", " n )
 367 and
 368 .BI LSR w ( x ", " n )
 369 shift a
 370 .IR w -bit
 371 quantity
 372 .I x
 373 left or right, respectively, by
 374 .I n
 375 places; if
 376 .IR n \~\(>=\~ w
 377 then
 378 .I n
 379 is reduced modulo
 380 .IR w .
 381 (This behaviour is unfortunate, but (a) it's what a number of CPUs
 382 provide natively, and (b) it's a cheap way to prevent undefined
 383 behaviour.)  Similarly,
 384 .BI ROL w ( x ", " n )
 385 and
 386 .BI ROR w ( x ", " n )
 387 rotate a
 388 .IR w -bit
 389 quantity
 390 .I x
 391 left or right, respectively, by
 392 .I n
 393 places.
 394 .
 395 .SS "Byte order conversions"
 396 For each size suffix
 397 .IR w ,
 398 the macro invocation
 399 .BI ENDSWAP w ( x )
 400 returns the
 401 .IR w -bit
 402 value
 403 .IR x
 404 with its bytes reversed.  The
 405 .B ENDSWAP8
 406 macro does nothing (except truncate its operand to 8 bits), but is
 407 provided for the sake of completeness.
 408 .PP
 409 A
 410 .I big-endian
 411 representation stores the most significant octet of an integer at the
 412 lowest address, with the following octets in decreasing order of
 413 significance.  A
 414 .I little-endian
 415 representation instead stores the
 416 .I least
 417 significant octet at the lowest address, with the following octets in
 418 increasing order of significance.  An environment has a preferred order
 419 for arranging the constituent octets of an integer of some given size in
 420 memory; this might be either the big- or little-endian representation
 421 just described, or something else strange.
 422 .PP
 423 It might be possible to rearrange the bits in an integer so that, when
 424 that integer is stored to memory in the environment's preferred manner,
 425 you end up with the big- or little-endian representation of the original
 426 integer; and, similarly, it might be possible to load a big- or
 427 little-endian representation of an integer into a variable using the
 428 environment's preferred ordering and then rearrange the bits so as to
 429 recover the integer value originally represented.  If the environment is
 430 sufficiently strange, these things might not be possible, but this is
 431 actually quite rare.
 432 .PP
 433 Say that an integer has been converted to
 434 .I big-
 435 or
 436 .I "little-endian form"
 437 if, when it is stored in memory in the environment's preferred manner,
 438 one ends up with a big- or little-endian representation of the original
 439 integer.  Equivalently, if one starts with a big- or little-endian
 440 representation of some integer, and loads it into a variable using the
 441 environment's preferred manner, one ends up with the big- or
 442 little-endian form of the original integer.
 443 .PP
 444 If these things are possible, then the following macros are defined.
 445 .TP
 446 .BI HTOL w ( x )
 447 Convert a
 448 .IR w -bit
 449 integer
 450 .I x
 451 to little-endian form.
 452 .TP
 453 .BI HTOB w ( x )
 454 Convert a
 455 .IR w -bit
 456 integer
 457 .I x
 458 to big-endian form.
 459 .TP
 460 .BI LTOH w ( x )
 461 Convert a
 462 .IR w -bit
 463 integer
 464 .I x
 465 from little-endian form.
 466 .TP
 467 .BI BTOH w ( x )
 468 Convert a
 469 .IR w -bit
 470 integer
 471 .I x
 472 from big-endian form.
 473 .
 474 .SS "Load and store"
 475 The macro invocation
 476 .BI GETBYTE( p ", " o )
 477 returns the
 478 .IR o th
 479 octet following the address
 480 .IR p .
 481 Conversely,
 482 .BI PUTBYTE( p ", " o ", " v)
 483 stores
 484 .I
 485 v in the
 486 .IR o th
 487 byte following the address
 488 .IR p .
 489 These macros always operate on byte offsets regardless of the type of
 490 the pointer
 491 .IR p .
 492 .PP
 493 For each size suffix
 494 .IR w ,
 495 there may be a macro such that the invocation
 496 .BI RAW w ( p )
 497 is an lvalue designating the
 498 .IR w /8
 499 octets starting at address
 500 .IR p ,
 501 interpreted according to the environment's preferred representation,
 502 except that
 503 .I p
 504 need not be aligned in any particular fashion.  There are many reasons
 505 why this might not be possible; programmers are not normally expected to
 506 use these macros directly, and they are documented in case they are
 507 useful for special effects.
 508 .PP
 509 For each size-and-endian suffix
 510 .IR we ,
 511 the macro invocation
 512 .BI LOAD we ( p )
 513 loads and returns a value in the corresponding format at address
 514 .IR p ;
 515 similarly,
 516 .BI STORE we ( p ", " x )
 517 stores the value
 518 .I x
 519 at address
 520 .I p
 521 in the corresponding format.
 522 .
 523 .SS "64-bit support"
 524 For portability to environments without native 64-bit integers, the
 525 structure
 526 .B kludge64
 527 is defined.  If the target platform is known to have an unsigned 64-bit
 528 integer type, then this structure merely encapsulates a native integer,
 529 and a decent optimizing compiler can be expected to handle this exactly
 530 as if it were the native type.  Otherwise, it contains two 32-bit halves
 531 which are processed the hard way.
 532 .PP
 533 For each of the above macros with a suffix
 534 .BR 64 ,
 535 .BR 64_L ,
 536 or
 537 .BR 64_B ,
 538 an additional `kludge' macro is defined, whose name has an additional
 539 final underscore; e.g., the kludge macro corresponding to
 540 .B ROR64
 541 is
 542 .BR ROR64_ ;
 543 and that corresponding to
 544 .B LOAD64_L
 545 is
 546 .BR LOAD64_L_ .
 547 If the original macro would have
 548 .I returned
 549 a value of type
 550 .BR uint64 ,
 551 then the kludge macro has an additional first argument, denoted
 552 .IR d ,
 553 which should be an lvalue of type
 554 .BR kludge64 ,
 555 and the kludge macro will store its result in
 556 .IR d .
 557 The kludge macro's remaining arguments are the same as the original
 558 macro, except that where the original macro accepts an argument of type
 559 .BR uint64 ,
 560 the kludge macro accepts an argument of type
 561 .B kludge64
 562 instead.
 563 .PP
 564 Finally, a number of additional macros are provided, to make working
 565 with
 566 .B kludge64
 567 somewhat less awful.
 568 .TP
 569 .BI SET64( d ", " h ", " l )
 570 Set the high 32 bits of
 571 .I d
 572 to be
 573 .IR h ,
 574 and the low 32 bits to be
 575 .IR l .
 576 Both
 577 .I h
 578 and
 579 .I l
 580 may be arbitrary integers.
 581 .TP
 582 .BI X64( hexh ", " hexl )
 583 Expands to an initializer for an object of type
 584 .B kludge64
 585 where
 586 .I hexh
 587 and
 588 .I hexl
 589 encode the high and low 32-bit halves in hexadecimal, without any
 590 .B 0x
 591 prefix.
 592 .TP
 593 .BI ASSIGN( d ", " x )
 594 Make
 595 .I d
 596 be a copy of the
 597 .B kludge64
 598 .IR x .
 599 .TP
 600 .BI HI64( x )
 601 Return the high 32 bits of
 602 .IR x .
 603 .TP
 604 .BI LO64( x )
 605 Return the low 32 bits of
 606 .IR x .
 607 .TP
 608 .BI GET64( t ", " x )
 609 Return the value of
 610 .I x
 611 as a value of type
 612 .IR t .
 613 If
 614 .I t
 615 is an unsigned integer type, then the value will be truncated to fit as
 616 necessary; if
 617 .I t
 618 is a signed integer type, then the behaviour is undefined if the value
 619 of
 620 .I x
 621 is too large.
 622 .TP
 623 .BI AND64( d ", " x ", " y )
 624 Set
 625 .I d
 626 to be the bitwise-and of the two
 627 .B kludge64
 628 arguments
 629 .I x
 630 and
 631 .IR y .
 632 .TP
 633 .BI OR64( d ", " x ", " y )
 634 Set
 635 .I d
 636 to be the bitwise-or of the two
 637 .B kludge64
 638 arguments
 639 .I x
 640 and
 641 .IR y .
 642 .TP
 643 .BI XOR64( d ", " x ", " y )
 644 Set
 645 .I d
 646 to be the bitwise-exclusive-or of the two
 647 .B kludge64
 648 arguments
 649 .I x
 650 and
 651 .IR y .
 652 .TP
 653 .BI CPL64( d ", " x )
 654 Set
 655 .I d
 656 to be the bitwise complement of the
 657 .B kludge64
 658 argument
 659 .IR x .
 660 .TP
 661 .BI ADD64( d ", " x ", " y )
 662 Set
 663 .I d
 664 to be the sum of the two
 665 .B kludge64
 666 arguments
 667 .I x
 668 and
 669 .IR y .
 670 .TP
 671 .BI SUB64( d ", " x ", " y )
 672 Set
 673 .I d
 674 to be the difference of the two
 675 .B kludge64
 676 arguments
 677 .I x
 678 and
 679 .IR y .
 680 .TP
 681 .BI CMP64( x ", " op ", " y )
 682 Here,
 683 .I x
 684 and
 685 .I y
 686 should be arguments of type
 687 .B kludge64
 688 and
 689 .I op
 690 should be one of the relational operators
 691 .BR == ,
 692 .BR < ,
 693 .BR <= ,
 694 .BR > ,
 695 or
 696 .B >=
 697 \(en
 698 .I not
 699 .BR !=.
 700 Evaluates nonzero if
 701 .IR x \~ op \~ y .
 702 .TP
 703 .BI ZERO64( x )
 704 Evaluates nonzero if the
 705 .B kludge64
 706 argument
 707 .I x
 708 is exactly zero.
 709 .SH "SEE ALSO"
 710 .BR mLib (3).
 711 .SH AUTHOR
 712 Mark Wooding, <mdw@distorted.org.uk>
 713