chiark / gitweb /
math/mpx-mul4-x86-sse2.S: `mmla4' only need 48 bytes of stack.
authorMark Wooding <mdw@distorted.org.uk>
Sat, 5 Nov 2016 21:28:22 +0000 (21:28 +0000)
committerMark Wooding <mdw@distorted.org.uk>
Mon, 3 Apr 2017 09:11:42 +0000 (10:11 +0100)
math/mpx-mul4-x86-sse2.S

index a6613ed0da1fccd2b6bc2f0a0c95f64c26e9c0bf..e466cfa9bd7c24aa6adbe50d37dc6a0f4eafe4fa 100644 (file)
@@ -522,7 +522,7 @@ INTFUNC(mmul4)
        // of the sum U V + N Y to [EDI], leaving the remaining carry in
        // XMM4, XMM5, and XMM6.  The registers XMM0, XMM1, XMM2, XMM3, and
        // XMM7 are clobbered; the general-purpose registers are preserved.
-       stalloc 64                      // space for the carries
+       stalloc 48                      // space for the carries
   endprologue
 
        // Calculate W = U V, and leave it in the destination.  Stash the
@@ -547,7 +547,7 @@ INTFUNC(mmla4)
        // carry in XMM4, XMM5, and XMM6.  The registers XMM0, XMM1, XMM2,
        // XMM3, and XMM7 are clobbered; the general-purpose registers are
        // preserved.
-       stalloc 64                      // space for the carries
+       stalloc 48                      // space for the carries
   endprologue
 
        movd    xmm4, [edi +  0]
@@ -616,7 +616,7 @@ INTFUNC(mmla4)
        paddq   xmm6, [esp + 32]
 
        // And, with that, we're done.
-       stfree  64
+       stfree  48
        ret
 
 ENDFUNC