xref: /trunk/main/bridges/source/cpp_uno/s5abi_macosx_aarch64/call.s (revision 5e139d9fe42a654147771da4118aea6285c03168)
1/**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements.  See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership.  The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License.  You may obtain a copy of the License at
10 *
11 *   http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied.  See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22// AArch64 (Apple Silicon, AAPCS64) outgoing-call trampoline for the C++-UNO
23// bridge.  Loads the argument registers from caller-prepared arrays, copies
24// any overflow arguments to the outgoing stack, performs the indirect call,
25// and stores the integer and FP/SIMD return registers back.
26//
27// See AAPCS64_BRIDGE_SPEC.md.  Mach-O assembler syntax; symbols are prefixed
28// with an underscore per the Darwin C ABI.
29//
30// extern "C" void callVirtualFunction(
31//     sal_uInt64  pFunction,     // x0: target C++ virtual method
32//     sal_uInt64  pIndirectRet,  // x1: value for x8 (indirect result ptr), 0 if none
33//     sal_uInt64 *pGPR,          // x2: 8 words  -> x0..x7
34//     double     *pFPR,          // x3: 8 doubles -> d0..d7
35//     sal_uInt64 *pStack,        // x4: overflow-arg words
36//     sal_uInt32  nStackWords,   // x5: number of 8-byte overflow words
37//     sal_uInt64 *pGPRReturn,    // x6: [out] x0,x1
38//     double     *pFPRReturn);   // x7: [out] d0..d3 (HFA up to 4 elements)
39
40    .text
41    .globl _callVirtualFunction
42    .p2align 2
43_callVirtualFunction:
44    // prologue: save fp/lr and the callee-saved registers we use
45    stp     x29, x30, [sp, #-16]!
46    stp     x19, x20, [sp, #-16]!
47    stp     x21, x22, [sp, #-16]!
48    stp     x23, x24, [sp, #-16]!
49    mov     x29, sp
50
51    // stash inputs that must survive the call into callee-saved registers
52    mov     x19, x0                     // pFunction
53    mov     x20, x2                     // pGPR
54    mov     x21, x3                     // pFPR
55    mov     x22, x6                     // pGPRReturn
56    mov     x23, x7                     // pFPRReturn
57    mov     x24, x1                     // x8 indirect-result value
58
59    // allocate and copy the outgoing overflow stack arguments.
60    // bytes = ((nStackWords + 1) & ~1) * 8, to keep sp 16-byte aligned.
61    add     x9, x5, #1
62    bic     x9, x9, #1
63    lsl     x9, x9, #3
64    sub     sp, sp, x9
65    mov     x10, #0
66Lcvf_copy:
67    cmp     x10, x5
68    b.ge    Lcvf_copied
69    ldr     x11, [x4, x10, lsl #3]
70    str     x11, [sp, x10, lsl #3]
71    add     x10, x10, #1
72    b       Lcvf_copy
73Lcvf_copied:
74
75    // load the FP/SIMD argument registers d0..d7
76    ldp     d0, d1, [x21, #0]
77    ldp     d2, d3, [x21, #16]
78    ldp     d4, d5, [x21, #32]
79    ldp     d6, d7, [x21, #48]
80
81    // load the GP argument registers x0..x7 and the x8 indirect-result reg
82    mov     x8, x24
83    ldp     x6, x7, [x20, #48]
84    ldp     x4, x5, [x20, #32]
85    ldp     x2, x3, [x20, #16]
86    ldp     x0, x1, [x20, #0]
87
88    // perform the virtual call
89    blr     x19
90
91    // store the return registers
92    str     x0, [x22, #0]
93    str     x1, [x22, #8]
94    str     d0, [x23, #0]
95    str     d1, [x23, #8]
96    str     d2, [x23, #16]
97    str     d3, [x23, #24]
98
99    // epilogue
100    mov     sp, x29
101    ldp     x23, x24, [sp], #16
102    ldp     x21, x22, [sp], #16
103    ldp     x19, x20, [sp], #16
104    ldp     x29, x30, [sp], #16
105    ret
106
107// ---------------------------------------------------------------------------
108// privateSnippetExecutor: the incoming (cpp2uno) register-spill executor.
109//
110// Reached by a BR from a per-vtable-slot code snippet (see codeSnippet() in
111// cpp2uno.cxx) with:
112//   x16 = (nVtableOffset << 32) | nFunctionIndex  (low bit 0x80000000 flags a
113//         hidden/indirect return)
114//   x0..x7, d0..d7, x8 = the original incoming arguments (untouched)
115//   sp  = the caller's stack-argument area (overflow)
116//   x30 = return address back into the original C++ caller
117//
118// It spills the argument registers to a save area and calls cpp_vtable_call,
119// then loads the return value back into x0/x1 and d0/d1 (or d0 for fp).
120//
121//   typelib_TypeClass cpp_vtable_call(
122//       sal_Int32 nFunctionIndex, sal_Int32 nVtableOffset,
123//       void** gpreg, void** fpreg, void** ovrflw,
124//       void* pIndirectReturn, sal_uInt64* pRegisterReturn);
125//
126// Frame (176 bytes): [0]=x29,x30  [16..79]=x0..x7  [80..143]=d0..d7
127//                    [144..159]=return buffer.
128    .globl _privateSnippetExecutor
129    .p2align 2
130_privateSnippetExecutor:
131    mov     x17, sp                     // x17 = ovrflw (incoming stack args)
132    stp     x29, x30, [sp, #-176]!
133    mov     x29, sp
134
135    stp     x0, x1, [sp, #16]           // save GP argument registers x0..x7
136    stp     x2, x3, [sp, #32]
137    stp     x4, x5, [sp, #48]
138    stp     x6, x7, [sp, #64]
139
140    stp     d0, d1, [sp, #80]           // save FP/SIMD argument registers d0..d7
141    stp     d2, d3, [sp, #96]
142    stp     d4, d5, [sp, #112]
143    stp     d6, d7, [sp, #128]
144
145    mov     w0, w16                     // nFunctionIndex (low 32 bits)
146    lsr     x1, x16, #32                // nVtableOffset (high 32 bits)
147    add     x2, sp, #16                 // gpreg
148    add     x3, sp, #80                 // fpreg
149    mov     x4, x17                     // ovrflw
150    mov     x5, x8                      // pIndirectReturn (x8 indirect-result reg)
151    add     x6, sp, #144                // pRegisterReturn (16-byte buffer)
152    bl      _cpp_vtable_call
153
154    cmp     w0, #10                     // typelib_TypeClass_FLOAT
155    b.eq    Lpse_float
156    cmp     w0, #11                     // typelib_TypeClass_DOUBLE
157    b.eq    Lpse_float
158    // integer / pointer / <=16B aggregate: load both banks; caller reads the
159    // ones that matter for its return type.
160    ldp     x0, x1, [sp, #144]
161    ldp     d0, d1, [sp, #144]
162    b       Lpse_done
163Lpse_float:
164    ldr     d0, [sp, #144]
165Lpse_done:
166    mov     sp, x29
167    ldp     x29, x30, [sp], #176
168    ret
169