From: RWilson@acorn.co.uk
Subject: Dithering in general
Date: 30 Jan 92 12:26:11 GMT

As well as the deathless 'How to find the closest colour out of the RISC OS
256' (yes, *anyone* can use this code (especially Find256) - we'd like an
acknowledgement, please!), here are some notes on dithering:

(a) There are some text books that get the Floyd Steinberg error weights WRONG
- the books say 2/8 3/8 and 3/8, but this (although cheaper to calculate)
is not F-S, where the weights are 7/16, 5/16, 3/16 and 1/16.

(b) ALL dithering algorithms look 'better' (less prone to regular artifact
patterning) if the display is scanned serpentine (l-r one raster line,
then r-l).

(c) There are more expensive (and slightly better than F-S) schemes: see
"Digital Halftoning" by Robert Ulichney published by the MIT Press, ISBN
0-262-21009-6.

(d) ChangeFSI gets right. :-)

(e) For god's sake, allow people access to the undithered data.

--Roger


From: RWilson@acorn.co.uk
Subject: How to find the closest colour out of the RISC OS 256
Date: 30 Jan 92 12:20:23 GMT

; MACRO ColErr
;
; Calculates an error value in $error corresponding to the difference
; between the two colours $col1 and $col2.  Requires two temporary
; registers.  All registers must be different.  This routine requires
; 16 instructions, three of which are multiples plus for instructions
; for the error loading calculations.  To speed up the multiplies
; the error value for each component is made positive.
;
; The error calculation is controlled by the following ``loading'' values.
; The basic component error calculation, given components c1 and c2 from
; $col1 and $col2, is:-
;
;       e := ((c1-c2) ^ 2) * load
;
; Thus a higher ``load'' makes errors in the component more significant when
; the component value itself is small, however has little effect when the
; component value is large.  Each loading must not exceed 65536/3 or the
; error calculation may overflow.  If a loading exceeds 32768/3 the result may
; have the top bit set.  The loadings are determined algorithmically by the
; following macros - this avoids having to do a real multiplication.  The
; macros have the form:-
;
;       XWeight $err, $sqr
;
; and calculate:-
;
;       $err = $sqr * XLoad     (for BLoad)
;       $err += $sqr * XLoad    (for GLoad, RLoad)
;
; $sqr may be overwritten.  They are called in the order BLoad, GLoad
; RLoad, and are only called if the corresponding loading value is not
; 1.
;
BLoad   EQU     1
        MACRO
$label  BWeight $error, $sqr
        ASSERT  0
        MEND

GLoad   EQU     10
        MACRO
$label  GWeight $error, $sqr
$label  ADD     $sqr, $sqr, $sqr, LSL #2
        ADD     $error, $error, $sqr, LSL #1
        MEND

RLoad   EQU     3
        MACRO
$label  RWeight $error, $sqr
$label  ADD     $sqr, $sqr, $sqr, LSL #1
        ADD     $error, $error, $sqr
        MEND

        MACRO
$label  ColErr  $error, $col1, $col2, $temp1, $temp2

$label  MOV     $temp1, $col1, LSR #24
        SUBS    $temp2, $temp1, $col2, LSR #24
        RSBLT   $temp2, $temp2, #0
   [    BLoad /= 1
        MUL     $temp1, $temp2, $temp2
        BWeight $error, $temp1
   |
        MUL     $error, $temp2, $temp2
   ]
        MOV     $temp2, #255
        AND     $temp1, $temp2, $col1, LSR #16
        AND     $temp2, $temp2, $col2, LSR #16
        SUBS    $temp2, $temp1, $temp2
        RSBLT   $temp2, $temp2, #0
   [    GLoad /= 1
        MUL     $temp1, $temp2, $temp2
        GWeight $error, $temp1
   |
        MLA     $error, $temp2, $temp2, $error
   ]
        MOV     $temp2, #255
        AND     $temp1, $temp2, $col1, LSR #8
        AND     $temp2, $temp2, $col2, LSR #8
        SUBS    $temp2, $temp1, $temp2
        RSBLT   $temp2, $temp2, #0
   [    RLoad /= 1
        MUL     $temp1, $temp2, $temp2
        RWeight $error, $temp1
   |
        MLA     $error, $temp2, $temp2, $error
   ]
        MEND
;
; MACRO CompErr
;
; This macro also calculates an error value, however the second colour
; is specified as three separate r, g, b values.  The registers containing
; these values can be the same, if desired.  The registers should not be
; the same as any of the other registers.  The calculation needs 16
; instructions, including three multiplies.
;
        MACRO
$label  CompErr $error, $col, $red, $green, $blue, $temp1, $temp2

$label  SUBS    $temp2, $blue, $col, LSR #24
        RSBLT   $temp2, $temp2, #0
   [    BLoad /= 1
        MUL     $temp1, $temp2, $temp2
        BWeight $error, $temp1
   |
        MUL     $error, $temp2, $temp2
   ]
        AND     $temp1, $col, #&FF0000        ; green component, still shifted
        SUBS    $temp2, $green, $temp1, LSR #16
        RSBLT   $temp2, $temp2, #0             ; |green error|
   [    GLoad /= 1
        MUL     $temp1, $temp2, $temp2
        GWeight $error, $temp1
   |
        MLA     $error, $temp2, $temp2, $error
   ]
        AND     $temp1, $col, #&FF00          ; red component, still shifted
        SUBS    $temp2, $red, $temp1, LSR #8
        RSBLT   $temp2, $temp2, #0             ; |red error|
   [    RLoad /= 1
        MUL     $temp1, $temp2, $temp2
        RWeight $error, $temp1
   |
        MLA     $error, $temp2, $temp2, $error
   ]
        MEND
;
; MACRO FindCol
;
; This macro finds the closest colour to the given r, g, b triple from
; an array of (word sized) RGB values (encoded BBGGRR00).  The macro
; preserves the $red, $green and $blue values, exits with $error set
; to that of the found colour and $index set to the index of the entry.  Again,
; all arguments must be different registers.
;
        MACRO
$label  FindCol $list, $listend, $red, $green, $blue, $error, $index, $ptr, $col, $temp1, $temp2, $temp3

$label  MOV     $error, #&FFFFFFFF           ; maximum error
        SUB     $ptr, $list, #4              ; points to entry to last entry tried
0:      LDR     $col, [$ptr, #4]!
        CompErr $temp3, $col, $red, $green, $blue, $temp1, $temp2
        CMP     $temp3, $error
        MOVLS   $error, $temp3
        SUBLS   $index, $list, $ptr          ; index * 4
        CMP     $ptr, $listend
        BLO     0b
        MOV     $index, $index, LSR #2
        MEND
;
; MACRO Find256
;
; This macro finds the best matching colour for the *standard* ARM 256 entry palette - the
; one with the R/G/B/T (tint) bits.  The algorithm returns a palette value encoded in the
; standard way (BGGRBRTT) in $col and the error in $error.
; All arguments must be different registers.  The loop is about 44 instructions, including
; the normal three multiplies.  The code goes round it four times, there is a further 12
; instruction overhead.
;
; The registers must all be different except for $red, $green and $blue, which can be
; the same if desired.
;
; The ARM palette entries are assumed to expand a 4 bit component to an 8 bit component
; using c<<4|c - this has been determined experimentally to give good results.
;
        MACRO
$label  Find256 $red, $green, $blue, $error, $col, $tint, $temp1, $temp2, $temp3, $pixel

$label  MOV     $error, #&FFFFFFFF
        MOV     $tint, #&30:SHL:23                ; tint bits unexpanded

0:      RSB     $temp1, $tint, #&20:SHL:23        ; overflow is not possible here
        SUB     $temp2, $blue, $blue, LSR #4      ; effectively multiplication by 16/17
        ADDS    $temp1, $temp1, $temp2, LSL #23
        ;
        ; At this point the top bits of $temp1 hold the best blue bit values given
        ; the current $tint tint bits, however the desired value may be >11tt or <00tt,
        ; in either case the top bit (bit 31) of $temp1 will be set, hence the N flag
        ; will be set in the PSR.  We must distinguish overflow (>11tt) from a simple
        ; negative result (<00tt) and truncate both to the appropriate end of the
        ; scale.  We have calculated (blue-tint+&22)<<23.  The overflow (V) flag will
        ; ONLY be set for >11tt; the other possible results (in the range &FF<<23 to
        ; -&17<<23 are representable without overflow), so:-
        ;
        MOVVSS  $temp1, #&7F000000                 ; clears the N flag!
        MOVMI   $temp1, #0
        ;
        ; Now extract the blue bits and reconstruct the real (expanded) blue value.
        ;
        AND     $temp1, $temp1, #&60000000         ; two blue bits
        ADD     $temp1, $temp1, $tint              ; plus tint
        ADD     $pixel, $temp1, $temp1, LSR #4     ; expand component bits - 8 bit blue value
        ;
        ; Calculate the error as above.
        ;
        SUBS    $temp2, $blue, $pixel, LSR #23
        RSBLT   $temp2, $temp2, #0                 ; speeds up multiplication
   [    BLoad /= 1
        MUL     $temp1, $temp2, $temp2
        BWeight $temp3, $temp1
   |
        MUL     $temp3, $temp2, $temp2
   ]
        ;
        ; Repeat this for the green component, accumulating the error
        ;
        RSB     $temp1, $tint, #&20:SHL:23
        SUB     $temp2, $green, $green, LSR #4
        ADDS    $temp1, $temp1, $temp2, LSL #23
        MOVVSS  $temp1, #&7F000000
        MOVMI   $temp1, #0
        ;
        AND     $temp1, $temp1, #&60000000         ; two green bits
        ADD     $temp1, $tint, $temp1              ; 4 bit green value
        ADD     $temp1, $temp1, $temp1, LSR #4     ; expand component bits
        ORR     $pixel, $pixel, $temp1, LSR #8     ; Accumulate bits in $pixel
        ;
        SUBS    $temp2, $green, $temp1, LSR #23
        RSBLT   $temp2, $temp2, #0
   [    GLoad /= 1
        MUL     $temp1, $temp2, $temp2
        GWeight $temp3, $temp1
   |
        MLA     $temp3, $temp2, $temp2, $temp3
   ]
        ;
        ; And the red component:-
        ;
        RSB     $temp1, $tint, #&20:SHL:23
        SUB     $temp2, $red, $red, LSR #4
        ADDS    $temp1, $temp1, $temp2, LSL #23
        MOVVSS  $temp1, #&7F000000
        MOVMI   $temp1, #0
        ;
        AND     $temp1, $temp1, #&60000000         ; two red bits
        ADD     $temp1, $tint, $temp1              ; 4 bit red value
        ADD     $temp1, $temp1, $temp1, LSR #4     ; expand component bits
        ORR     $pixel, $pixel, $temp1, LSR #16    ; Accumulate bits in $pixel
        ;
        SUBS    $temp2, $red, $temp1, LSR #23
        RSBLT   $temp2, $temp2, #0
   [    RLoad /= 1
        MUL     $temp1, $temp2, $temp2
        RWeight $temp3, $temp1
   |
        MLA     $temp3, $temp2, $temp2, $temp3
   ]
        ;
        ; $temp3 contains the error for the ARM value in $pixel (actually this value
        ; is shifted right by 1 bit because of the LSL 23 above).  Check the error
        ; and see if this is a better pixel.
        ;
        CMP     $temp3, $error
        MOVLS   $error, $temp3                ; LS, so selects lower tint in preference
        MOVLS   $col, $pixel                  ; $col holds best match
        ;
        ; Try the next tint
        ;
        SUBS    $tint, $tint, #&10:SHL:23
        BGE     0b
        ;
        ; $error is the error, and is directly comparable with the $error
        ; value from the other macros.  $col to the ARM palette entry using
        ; the appropriate bit manipulations.  The value is currently:-
        ;
        ;       0BBTT011 1GGTT011 1RRTT011 10000000
        ;
        ; We need to convert this to the form:-
        ;
        ;    76543210
        ;    BGGRBRTT
        ;
        AND     $temp1, $col, #&40000000        ; B   (needs >> 23)
        MOV     $temp2, $temp1, LSR #23
        AND     $temp1, $col, #&600000          ; GG  (needs >> 16)
        ORR     $temp2, $temp2, $temp1, LSR #16
        AND     $temp1, $col, #&4000            ; R   (needs >> 10)
        ORR     $temp2, $temp2, $temp1, LSR #10
        AND     $temp1, $col, #&20000000        ; B   (needs >> 26)
        ORR     $temp2, $temp2, $temp1, LSR #26
        AND     $temp1, $col, #&3800            ; RTT (needs >> 11)
        ORR     $col, $temp2, $temp1, LSR #11
        MEND