chiark / gitweb /
eglibc (2.11.3-4+deb6u3) squeeze-lts; urgency=medium
[eglibc.git] / ports / sysdeps / alpha / alphaev5 / rshift.s
1  # Alpha EV5 __mpn_rshift --
2
3  # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
4
5  # This file is part of the GNU MP Library.
6
7  # The GNU MP Library is free software; you can redistribute it and/or modify
8  # it under the terms of the GNU Lesser General Public License as published by
9  # the Free Software Foundation; either version 2.1 of the License, or (at your
10  # option) any later version.
11
12  # The GNU MP Library is distributed in the hope that it will be useful, but
13  # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15  # License for more details.
16
17  # You should have received a copy of the GNU Lesser General Public License
18  # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19  # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20  # MA 02111-1307, USA.
21
22
23  # INPUT PARAMETERS
24  # res_ptr      r16
25  # s1_ptr       r17
26  # size         r18
27  # cnt          r19
28
29  # This code runs at 3.25 cycles/limb on the EV5.
30
31         .set    noreorder
32         .set    noat
33 .text
34         .align  3
35         .globl  __mpn_rshift
36         .ent    __mpn_rshift
37 __mpn_rshift:
38         .frame  $30,0,$26,0
39
40         ldq     $4,0($17)       # load first limb
41         subq    $31,$19,$20
42         subq    $18,1,$18
43         and     $18,4-1,$28     # number of limbs in first loop
44         sll     $4,$20,$0       # compute function result
45
46         beq     $28,.L0
47         subq    $18,$28,$18
48
49         .align  3
50 .Loop0: ldq     $3,8($17)
51         addq    $16,8,$16
52         srl     $4,$19,$5
53         addq    $17,8,$17
54         subq    $28,1,$28
55         sll     $3,$20,$6
56         or      $3,$3,$4
57         or      $5,$6,$8
58         stq     $8,-8($16)
59         bne     $28,.Loop0
60
61 .L0:    srl     $4,$19,$24
62         beq     $18,.Lend
63  # warm up phase 1
64         ldq     $1,8($17)
65         subq    $18,4,$18
66         ldq     $2,16($17)
67         ldq     $3,24($17)
68         ldq     $4,32($17)
69         beq     $18,.Lend1
70  # warm up phase 2
71         sll     $1,$20,$7
72         srl     $1,$19,$21
73         sll     $2,$20,$8
74         ldq     $1,40($17)
75         srl     $2,$19,$22
76         ldq     $2,48($17)
77         sll     $3,$20,$5
78         or      $7,$24,$7
79         srl     $3,$19,$23
80         or      $8,$21,$8
81         sll     $4,$20,$6
82         ldq     $3,56($17)
83         srl     $4,$19,$24
84         ldq     $4,64($17)
85         subq    $18,4,$18
86         beq     $18,.Lend2
87         .align  4
88  # main loop
89 .Loop:  stq     $7,0($16)
90         or      $5,$22,$5
91         stq     $8,8($16)
92         or      $6,$23,$6
93
94         sll     $1,$20,$7
95         subq    $18,4,$18
96         srl     $1,$19,$21
97         unop    # ldq   $31,-96($17)
98
99         sll     $2,$20,$8
100         ldq     $1,72($17)
101         srl     $2,$19,$22
102         ldq     $2,80($17)
103
104         stq     $5,16($16)
105         or      $7,$24,$7
106         stq     $6,24($16)
107         or      $8,$21,$8
108
109         sll     $3,$20,$5
110         unop    # ldq   $31,-96($17)
111         srl     $3,$19,$23
112         addq    $16,32,$16
113
114         sll     $4,$20,$6
115         ldq     $3,88($17)
116         srl     $4,$19,$24
117         ldq     $4,96($17)
118
119         addq    $17,32,$17
120         bne     $18,.Loop
121  # cool down phase 2/1
122 .Lend2: stq     $7,0($16)
123         or      $5,$22,$5
124         stq     $8,8($16)
125         or      $6,$23,$6
126         sll     $1,$20,$7
127         srl     $1,$19,$21
128         sll     $2,$20,$8
129         srl     $2,$19,$22
130         stq     $5,16($16)
131         or      $7,$24,$7
132         stq     $6,24($16)
133         or      $8,$21,$8
134         sll     $3,$20,$5
135         srl     $3,$19,$23
136         sll     $4,$20,$6
137         srl     $4,$19,$24
138  # cool down phase 2/2
139         stq     $7,32($16)
140         or      $5,$22,$5
141         stq     $8,40($16)
142         or      $6,$23,$6
143         stq     $5,48($16)
144         stq     $6,56($16)
145  # cool down phase 2/3
146         stq     $24,64($16)
147         ret     $31,($26),1
148
149  # cool down phase 1/1
150 .Lend1: sll     $1,$20,$7
151         srl     $1,$19,$21
152         sll     $2,$20,$8
153         srl     $2,$19,$22
154         sll     $3,$20,$5
155         or      $7,$24,$7
156         srl     $3,$19,$23
157         or      $8,$21,$8
158         sll     $4,$20,$6
159         srl     $4,$19,$24
160  # cool down phase 1/2
161         stq     $7,0($16)
162         or      $5,$22,$5
163         stq     $8,8($16)
164         or      $6,$23,$6
165         stq     $5,16($16)
166         stq     $6,24($16)
167         stq     $24,32($16)
168         ret     $31,($26),1
169
170 .Lend:  stq     $24,0($16)
171         ret     $31,($26),1
172         .end    __mpn_rshift