xref: /AOO41X/main/sal/osl/unx/asm/interlck_sparc.s (revision b5da552ccefc4034e06a43bfae43fb8a8b64a7ad)
1*b5da552cSAndrew Rist/**************************************************************
2cdf0e10cSrcweir *
3*b5da552cSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*b5da552cSAndrew Rist * or more contributor license agreements.  See the NOTICE file
5*b5da552cSAndrew Rist * distributed with this work for additional information
6*b5da552cSAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7*b5da552cSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*b5da552cSAndrew Rist * "License"); you may not use this file except in compliance
9*b5da552cSAndrew Rist * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir *
11*b5da552cSAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir *
13*b5da552cSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*b5da552cSAndrew Rist * software distributed under the License is distributed on an
15*b5da552cSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b5da552cSAndrew Rist * KIND, either express or implied.  See the License for the
17*b5da552cSAndrew Rist * specific language governing permissions and limitations
18*b5da552cSAndrew Rist * under the License.
19cdf0e10cSrcweir *
20*b5da552cSAndrew Rist *************************************************************/
21*b5da552cSAndrew Rist
22*b5da552cSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir
25cdf0e10cSrcweir/*
26cdf0e10cSrcweir * Implements osl_[increment|decrement]InterlockedCount in two ways:
27cdf0e10cSrcweir * sparcv8 architecture:                use the "swap" instruction
28cdf0e10cSrcweir * sparcv9/sparcv8plus architecture:    use the "cas"  instruction
29cdf0e10cSrcweir *
30cdf0e10cSrcweir * 32 bit mode with v8 and v8plus support:
31cdf0e10cSrcweir * Initialize once with osl_InterlockedCountSetV9(int bv9) if you want to
32cdf0e10cSrcweir * use the "cas" instruction, which is faster (no spinlock needed)
33cdf0e10cSrcweir * Default is to use the "swap" instruction, which works on all supported
34cdf0e10cSrcweir * SPARC cpu's
35cdf0e10cSrcweir *
36cdf0e10cSrcweir * osl_InterlockedCountSetV9(int bv9)
37cdf0e10cSrcweir *    bv9 = 0   use sparcv8 "swap" (spinlock)
38cdf0e10cSrcweir *    bv9 = 1   use sparcv9/sparcv8plus "cas" (no spinlock)
39cdf0e10cSrcweir *
40cdf0e10cSrcweir * 32 bit mode without v8 support (implies v8plus) or 64 bit mode:
41cdf0e10cSrcweir * No need (nor the possibilty) to call osl_InterlockedCountSetV9(),
42cdf0e10cSrcweir * sparcv9 mode is implied. Assemble with -xarch=v8plus (32 bit) or
43cdf0e10cSrcweir * -xarch=v9 (64 bit).
44cdf0e10cSrcweir *
45cdf0e10cSrcweir */
46cdf0e10cSrcweir
47cdf0e10cSrcweir#if !defined(__sparcv8plus) && !defined(__sparcv9) && !defined(__sparc_v9__)
48cdf0e10cSrcweir
49cdf0e10cSrcweir.section ".data"
50cdf0e10cSrcweir.align 4
51cdf0e10cSrcweirosl_incrementInterLockCountFuncPtr:
52cdf0e10cSrcweir.word osl_incrementInterlockedCountV8
53cdf0e10cSrcweir.type osl_incrementInterLockCountFuncPtr,#object
54cdf0e10cSrcweir.size osl_incrementInterLockCountFuncPtr,4
55cdf0e10cSrcweir
56cdf0e10cSrcweir.align 4
57cdf0e10cSrcweirosl_decrementInterLockCountFuncPtr:
58cdf0e10cSrcweir.word osl_decrementInterlockedCountV8
59cdf0e10cSrcweir.type osl_decrementInterLockCountFuncPtr,#object
60cdf0e10cSrcweir.size osl_decrementInterLockCountFuncPtr,4
61cdf0e10cSrcweir
62cdf0e10cSrcweir.section   ".text"
63cdf0e10cSrcweir
64cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
65cdf0e10cSrcweir/* add the address of the calling "call" instruction (stored in %o7) to
66cdf0e10cSrcweir * %o5 which contains _GLOBAL_OFFSET_TABLE_
67cdf0e10cSrcweir */
68cdf0e10cSrcweir.Laddoseven:
69cdf0e10cSrcweir        retl
70cdf0e10cSrcweir        add %o7, %o5, %o5
71cdf0e10cSrcweir#endif
72cdf0e10cSrcweir
73cdf0e10cSrcweir .global   osl_incrementInterlockedCount
74cdf0e10cSrcweir .align   4
75cdf0e10cSrcweir
76cdf0e10cSrcweirosl_incrementInterlockedCount:
77cdf0e10cSrcweir
78cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
79cdf0e10cSrcweir        mov     %o7, %g1
80cdf0e10cSrcweir        sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), %o5
81cdf0e10cSrcweir        call    .Laddoseven
82cdf0e10cSrcweir        add     %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5
83cdf0e10cSrcweir        mov     %g1, %o7
84cdf0e10cSrcweir#endif
85cdf0e10cSrcweir        set     osl_incrementInterLockCountFuncPtr, %o1
86cdf0e10cSrcweir#if defined(NETBSD)
87cdf0e10cSrcweir        ld      [%o1 + %o5], %o1
88cdf0e10cSrcweir#endif
89cdf0e10cSrcweir        ld      [%o1], %o1
90cdf0e10cSrcweir        jmp     %o1
91cdf0e10cSrcweir        nop                                             ! delay slot
92cdf0e10cSrcweir .type  osl_incrementInterlockedCount,#function
93cdf0e10cSrcweir .size  osl_incrementInterlockedCount,.-osl_incrementInterlockedCount
94cdf0e10cSrcweir
95cdf0e10cSrcweir.section   ".text"
96cdf0e10cSrcweir .global   osl_decrementInterlockedCount
97cdf0e10cSrcweir .align   4
98cdf0e10cSrcweir
99cdf0e10cSrcweirosl_decrementInterlockedCount:
100cdf0e10cSrcweir
101cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
102cdf0e10cSrcweir        mov     %o7, %g1
103cdf0e10cSrcweir        sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), %o5
104cdf0e10cSrcweir        call    .Laddoseven
105cdf0e10cSrcweir        add     %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5
106cdf0e10cSrcweir        mov     %g1, %o7
107cdf0e10cSrcweir#endif
108cdf0e10cSrcweir        set     osl_decrementInterLockCountFuncPtr, %o1
109cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
110cdf0e10cSrcweir        ld      [%o1 + %o5], %o1
111cdf0e10cSrcweir#endif
112cdf0e10cSrcweir        ld      [%o1], %o1
113cdf0e10cSrcweir        jmp     %o1
114cdf0e10cSrcweir        nop                                             ! delay slot
115cdf0e10cSrcweir .type  osl_decrementInterlockedCount,#function
116cdf0e10cSrcweir .size  osl_decrementInterlockedCount,.-osl_decrementInterlockedCount
117cdf0e10cSrcweir
118cdf0e10cSrcweir.section   ".text"
119cdf0e10cSrcweir .global   osl_InterlockedCountSetV9
120cdf0e10cSrcweir .align   4
121cdf0e10cSrcweir
122cdf0e10cSrcweirosl_InterlockedCountSetV9:
123cdf0e10cSrcweir
124cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
125cdf0e10cSrcweir        mov	    %o7, %g1
126cdf0e10cSrcweir        sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %o5
127cdf0e10cSrcweir        call	.Laddoseven
128cdf0e10cSrcweir        add	    %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5
129cdf0e10cSrcweir        mov	    %g1, %o7
130cdf0e10cSrcweir#endif
131cdf0e10cSrcweir        set     osl_incrementInterLockCountFuncPtr, %o1
132cdf0e10cSrcweir        set     osl_decrementInterLockCountFuncPtr, %o2
133cdf0e10cSrcweir        cmp     %o0, %g0
134cdf0e10cSrcweir        bnz     1f
135cdf0e10cSrcweir        nop                                             ! delay slot
136cdf0e10cSrcweir        set     osl_incrementInterlockedCountV8, %o0
137cdf0e10cSrcweir        set     osl_decrementInterlockedCountV8, %o3
138cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
139cdf0e10cSrcweir        ld      [%o0 + %o5], %o0
140cdf0e10cSrcweir        ld      [%o1 + %o5], %o1
141cdf0e10cSrcweir        ld      [%o2 + %o5], %o2
142cdf0e10cSrcweir        ld      [%o3 + %o5], %o3
143cdf0e10cSrcweir#endif
144cdf0e10cSrcweir        st      %o3,[%o2]
145cdf0e10cSrcweir        retl
146cdf0e10cSrcweir        st      %o0,[%o1]
147cdf0e10cSrcweir1:      set     osl_incrementInterlockedCountV9, %o0
148cdf0e10cSrcweir        set     osl_decrementInterlockedCountV9, %o3
149cdf0e10cSrcweir#if defined(NETBSD) || defined(LINUX)
150cdf0e10cSrcweir        ld      [%o0 + %o5], %o0
151cdf0e10cSrcweir        ld      [%o1 + %o5], %o1
152cdf0e10cSrcweir        ld      [%o2 + %o5], %o2
153cdf0e10cSrcweir        ld      [%o3 + %o5], %o3
154cdf0e10cSrcweir#endif
155cdf0e10cSrcweir        st      %o3,[%o2]
156cdf0e10cSrcweir        retl
157cdf0e10cSrcweir        st      %o0,[%o1]
158cdf0e10cSrcweir
159cdf0e10cSrcweir .type  osl_InterlockedCountSetV9,#function
160cdf0e10cSrcweir .size  osl_InterlockedCountSetV9,.-osl_InterlockedCountSetV9
161cdf0e10cSrcweir
162cdf0e10cSrcweir
163cdf0e10cSrcweir.section   ".text"
164cdf0e10cSrcweir .local   osl_incrementInterlockedCountV8
165cdf0e10cSrcweir .align   4
166cdf0e10cSrcweir
167cdf0e10cSrcweir! Implements osl_[increment|decrement]InterlockedCount with sparcv8 "swap" instruction.
168cdf0e10cSrcweir! Uses -4096 as lock value for spinlock to allow for small negative counts.
169cdf0e10cSrcweir
170cdf0e10cSrcweirosl_incrementInterlockedCountV8:
171cdf0e10cSrcweir
172cdf0e10cSrcweir1:      ld      [%o0], %o1
173cdf0e10cSrcweir        cmp     %o1, -4096          ! test spinlock
174cdf0e10cSrcweir        be      1b
175cdf0e10cSrcweir        mov     -4096, %o1          ! delay slot
176cdf0e10cSrcweir        swap    [%o0], %o1
177cdf0e10cSrcweir        cmp     %o1, -4096
178cdf0e10cSrcweir        be      1b
179cdf0e10cSrcweir        inc     %o1                 ! delay slot, if we got spinlock, increment count
180cdf0e10cSrcweir        st      %o1, [%o0]
181cdf0e10cSrcweir        retl
182cdf0e10cSrcweir        mov     %o1, %o0            ! delay slot
183cdf0e10cSrcweir
184cdf0e10cSrcweir .type  osl_incrementInterlockedCountV8,#function
185cdf0e10cSrcweir .size  osl_incrementInterlockedCountV8,.-osl_incrementInterlockedCountV8
186cdf0e10cSrcweir
187cdf0e10cSrcweir
188cdf0e10cSrcweir.section   ".text"
189cdf0e10cSrcweir .local osl_decrementInterlockedCountV8
190cdf0e10cSrcweir .align   4
191cdf0e10cSrcweir
192cdf0e10cSrcweirosl_decrementInterlockedCountV8:
193cdf0e10cSrcweir
194cdf0e10cSrcweir1:      ld      [%o0], %o1
195cdf0e10cSrcweir        cmp     %o1, -4096          ! test spinlock
196cdf0e10cSrcweir        be      1b
197cdf0e10cSrcweir        mov     -4096, %o1          ! delay slot
198cdf0e10cSrcweir        swap    [%o0], %o1
199cdf0e10cSrcweir        cmp     %o1, -4096
200cdf0e10cSrcweir        be      1b
201cdf0e10cSrcweir        dec     %o1                 ! delay slot, if we got spinlock, decrement count
202cdf0e10cSrcweir        st      %o1, [%o0]          ! delay slot
203cdf0e10cSrcweir        retl
204cdf0e10cSrcweir        mov     %o1, %o0            ! delay slot
205cdf0e10cSrcweir
206cdf0e10cSrcweir .type  osl_decrementInterlockedCountV8,#function
207cdf0e10cSrcweir .size  osl_decrementInterlockedCountV8,.-osl_decrementInterlockedCountV8
208cdf0e10cSrcweir
209cdf0e10cSrcweir#endif /* !__sparcv8plus && !__sparcv9 && !_sparcv9__ */
210cdf0e10cSrcweir
211cdf0e10cSrcweir.section   ".text"
212cdf0e10cSrcweir#if defined(__sparcv8plus) || defined(__sparcv9) || defined(__sparc_v9__)
213cdf0e10cSrcweir#define   osl_incrementInterlockedCountV9 osl_incrementInterlockedCount
214cdf0e10cSrcweir .global  osl_incrementInterlockedCountV9
215cdf0e10cSrcweir#else
216cdf0e10cSrcweir .local   osl_incrementInterlockedCountV9
217cdf0e10cSrcweir#endif
218cdf0e10cSrcweir .align   8
219cdf0e10cSrcweir
220cdf0e10cSrcweir!   Implements osl_[increment|decrement]InterlockedCount with sparcv9(sparcv8plus) "cas"
221cdf0e10cSrcweir!   instruction.
222cdf0e10cSrcweir
223cdf0e10cSrcweirosl_incrementInterlockedCountV9:
224cdf0e10cSrcweir
225cdf0e10cSrcweir1:      ld      [%o0], %o1
226cdf0e10cSrcweir        add     %o1, 1, %o2
227cdf0e10cSrcweir!       allow linux to build for v8
228cdf0e10cSrcweir        .word 0xD5E21009
229cdf0e10cSrcweir!       cas     [%o0], %o1, %o2
230cdf0e10cSrcweir        cmp     %o1, %o2
231cdf0e10cSrcweir        bne     1b
232cdf0e10cSrcweir        nop                         ! delay slot
233cdf0e10cSrcweir        retl
234cdf0e10cSrcweir        add     %o2, 1, %o0         ! delay slot
235cdf0e10cSrcweir
236cdf0e10cSrcweir .type  osl_incrementInterlockedCountV9,#function
237cdf0e10cSrcweir .size  osl_incrementInterlockedCountV9,.-osl_incrementInterlockedCountV9
238cdf0e10cSrcweir
239cdf0e10cSrcweir
240cdf0e10cSrcweir.section   ".text"
241cdf0e10cSrcweir#if defined(__sparcv8plus) || defined(__sparcv9) || defined(__sparc_v9__)
242cdf0e10cSrcweir#define   osl_decrementInterlockedCountV9 osl_decrementInterlockedCount
243cdf0e10cSrcweir .global  osl_decrementInterlockedCountV9
244cdf0e10cSrcweir#else
245cdf0e10cSrcweir .local   osl_decrementInterlockedCountV9
246cdf0e10cSrcweir#endif
247cdf0e10cSrcweir .align   8
248cdf0e10cSrcweir
249cdf0e10cSrcweirosl_decrementInterlockedCountV9:
250cdf0e10cSrcweir
251cdf0e10cSrcweir1:      ld      [%o0], %o1
252cdf0e10cSrcweir        sub     %o1, 1, %o2
253cdf0e10cSrcweir!       allow linux to build for v8
254cdf0e10cSrcweir        .word 0xD5E21009
255cdf0e10cSrcweir!       cas     [%o0], %o1, %o2
256cdf0e10cSrcweir        cmp     %o1, %o2
257cdf0e10cSrcweir        bne     1b
258cdf0e10cSrcweir        nop                         ! delay slot
259cdf0e10cSrcweir        retl
260cdf0e10cSrcweir        sub     %o2, 1, %o0         ! delay slot
261cdf0e10cSrcweir
262cdf0e10cSrcweir .type  osl_decrementInterlockedCountV9,#function
263cdf0e10cSrcweir .size  osl_decrementInterlockedCountV9,.-osl_decrementInterlockedCountV9
264