1 unfortunately, it does not work
2 in mean time use local-nosavesse.diff
4 --- a/linuxthreads/descr.h
5 +++ b/linuxthreads/descr.h
7 /* This overlaps tcbhead_t (see tls.h), as used for TLS without threads. */
10 + tcbhead_t tcbheader;
13 void *tcb; /* Pointer to the TCB. This is not always
14 --- a/linuxthreads/sysdeps/x86_64/tcb-offsets.sym
15 +++ b/linuxthreads/sysdeps/x86_64/tcb-offsets.sym
18 MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
19 POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
20 +RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse)
21 --- a/linuxthreads/sysdeps/x86_64/tls.h
22 +++ b/linuxthreads/sysdeps/x86_64/tls.h
27 +# include <xmmintrin.h>
29 /* Type for the dtv. */
33 void *self; /* Pointer to the thread descriptor. */
37 uintptr_t stack_guard;
38 uintptr_t pointer_guard;
45 + void *__padding1[7];
46 + int rtld_must_xmm_save;
48 + /* Have space for the post-AVX register size. */
49 + __m128 rtld_savespace_sse[8][4];
52 #else /* __ASSEMBLER__ */
54 # define TLS_INIT_TCB_SIZE sizeof (tcbhead_t)
56 /* Alignment requirements for the initial TCB. */
57 -# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
58 +//# define TLS_INIT_TCB_ALIGN __alignof__ (tcbhead_t)
59 +// Normally the above would be correct But we have to store post-AVX
60 +// vector registers in the TCB and we want the storage to be aligned.
61 +// unfortunately there isn't yet a type for these values and hence no
62 +// 32-byte alignment requirement. Make this explicit, for now.
63 +# define TLS_INIT_TCB_ALIGN 32
65 /* This is the size of the TCB. */
66 # define TLS_TCB_SIZE sizeof (struct _pthread_descr_struct)
68 /* Alignment requirements for the TCB. */
69 # define TLS_TCB_ALIGN __alignof__ (struct _pthread_descr_struct)
70 +// It is already 32B aligned
72 /* The TCB can have any size and the memory following the address the
73 thread pointer points to is unspecified. Allocate the TCB there. */
75 #define THREAD_GSCOPE_WAIT() \
76 do { /* GL(dl_wait_lookup_done) () */ } while (0)
80 +/* Defined in dl-trampoline.S. */
81 +extern void _dl_x86_64_save_sse (void);
82 +extern void _dl_x86_64_restore_sse (void);
84 +# define RTLD_CHECK_FOREIGN_CALL \
85 + (THREAD_GETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save) != 0)
87 +/* NB: Don't use the xchg operation because that would imply a lock
88 + prefix which is expensive and unnecessary. The cache line is also
89 + not contested at all. */
90 +# define RTLD_ENABLE_FOREIGN_CALL \
91 + int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \
92 + p_header.tcbheader.rtld_must_xmm_save); \
93 + THREAD_SETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save, 1)
95 +# define RTLD_PREPARE_FOREIGN_CALL \
96 + do if (THREAD_GETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save)) \
98 + _dl_x86_64_save_sse (); \
99 + THREAD_SETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save, 0); \
103 +# define RTLD_FINALIZE_FOREIGN_CALL \
105 + if (THREAD_GETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save) == 0) \
106 + _dl_x86_64_restore_sse (); \
107 + THREAD_SETMEM (THREAD_SELF, p_header.tcbheader.rtld_must_xmm_save, \
108 + old_rtld_must_xmm_save); \
112 # endif /* HAVE_TLS_SUPPORT */
114 #endif /* __ASSEMBLER__ */