source: rtems/cpukit/score/cpu/sparc/cpu.c @ 2afb22b

5
Last change on this file since 2afb22b was 146adb1, checked in by Sebastian Huber <sebastian.huber@…>, on 07/17/17 at 05:30:46

sparc: Add lazy floating point switch

The SPARC ABI is a bit special with respect to the floating point context.
The complete floating point context is volatile. Thus, from an ABI point
of view nothing needs to be saved and restored during a context switch.
Instead the floating point context must be saved and restored during
interrupt processing. Historically, the deferred floating point switch was
used for SPARC and the complete floating point context is saved and
restored during a context switch to the new floating point unit owner.
This is a bit dangerous since post-switch actions (e.g. signal handlers)
and context switch extensions may silently corrupt the floating point
context.

The floating point unit is disabled for interrupt handlers. Thus, in case
an interrupt handler uses the floating point unit then this will result in a
trap (INTERNAL_ERROR_ILLEGAL_USE_OF_FLOATING_POINT_UNIT).

In uniprocessor configurations, a lazy floating point context switch is
used. In case an active floating point thread is interrupted (PSR[EF] == 1)
and a thread dispatch is carried out, then this thread is registered as the
floating point owner. When a floating point owner is present during a
context switch, the floating point unit is disabled for the heir thread
(PSR[EF] == 0). The floating point disabled trap checks that the use of the
floating point unit is allowed and saves/restores the floating point context
on demand.

Update #3077.

  • Property mode set to 100644
File size: 11.6 KB
Line 
1/**
2 *  @file
3 *
4 *  @brief SPARC CPU Dependent Source
5 */
6
7/*
8 *  COPYRIGHT (c) 1989-2007.
9 *  On-Line Applications Research Corporation (OAR).
10 *
11 *  Copyright (c) 2017 embedded brains GmbH
12 *
13 *  The license and distribution terms for this file may be
14 *  found in the file LICENSE in this distribution or at
15 *  http://www.rtems.org/license/LICENSE.
16 */
17
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <rtems/system.h>
23#include <rtems/score/isr.h>
24#include <rtems/score/percpu.h>
25#include <rtems/score/tls.h>
26#include <rtems/score/thread.h>
27#include <rtems/rtems/cache.h>
28
29#if SPARC_HAS_FPU == 1
30  RTEMS_STATIC_ASSERT(
31    offsetof( Per_CPU_Control, cpu_per_cpu.fsr)
32      == SPARC_PER_CPU_FSR_OFFSET,
33    SPARC_PER_CPU_FSR_OFFSET
34  );
35
36  #if defined(SPARC_USE_LAZY_FP_SWITCH)
37    RTEMS_STATIC_ASSERT(
38      offsetof( Per_CPU_Control, cpu_per_cpu.fp_owner)
39        == SPARC_PER_CPU_FP_OWNER_OFFSET,
40      SPARC_PER_CPU_FP_OWNER_OFFSET
41    );
42  #endif
43#endif
44
45#define SPARC_ASSERT_OFFSET(field, off) \
46  RTEMS_STATIC_ASSERT( \
47    offsetof(Context_Control, field) == off ## _OFFSET, \
48    Context_Control_offset_ ## field \
49  )
50
51SPARC_ASSERT_OFFSET(g5, G5);
52SPARC_ASSERT_OFFSET(g7, G7);
53
54RTEMS_STATIC_ASSERT(
55  offsetof(Context_Control, l0_and_l1) == L0_OFFSET,
56  Context_Control_offset_L0
57);
58
59RTEMS_STATIC_ASSERT(
60  offsetof(Context_Control, l0_and_l1) + 4 == L1_OFFSET,
61  Context_Control_offset_L1
62);
63
64SPARC_ASSERT_OFFSET(l2, L2);
65SPARC_ASSERT_OFFSET(l3, L3);
66SPARC_ASSERT_OFFSET(l4, L4);
67SPARC_ASSERT_OFFSET(l5, L5);
68SPARC_ASSERT_OFFSET(l6, L6);
69SPARC_ASSERT_OFFSET(l7, L7);
70SPARC_ASSERT_OFFSET(i0, I0);
71SPARC_ASSERT_OFFSET(i1, I1);
72SPARC_ASSERT_OFFSET(i2, I2);
73SPARC_ASSERT_OFFSET(i3, I3);
74SPARC_ASSERT_OFFSET(i4, I4);
75SPARC_ASSERT_OFFSET(i5, I5);
76SPARC_ASSERT_OFFSET(i6_fp, I6_FP);
77SPARC_ASSERT_OFFSET(i7, I7);
78SPARC_ASSERT_OFFSET(o6_sp, O6_SP);
79SPARC_ASSERT_OFFSET(o7, O7);
80SPARC_ASSERT_OFFSET(psr, PSR);
81SPARC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE_STACK);
82
83#if defined(RTEMS_SMP)
84SPARC_ASSERT_OFFSET(is_executing, SPARC_CONTEXT_CONTROL_IS_EXECUTING);
85#endif
86
87#define SPARC_ASSERT_ISF_OFFSET(field, off) \
88  RTEMS_STATIC_ASSERT( \
89    offsetof(CPU_Interrupt_frame, field) == ISF_ ## off ## _OFFSET, \
90    CPU_Interrupt_frame_offset_ ## field \
91  )
92
93SPARC_ASSERT_ISF_OFFSET(psr, PSR);
94SPARC_ASSERT_ISF_OFFSET(pc, PC);
95SPARC_ASSERT_ISF_OFFSET(npc, NPC);
96SPARC_ASSERT_ISF_OFFSET(g1, G1);
97SPARC_ASSERT_ISF_OFFSET(g2, G2);
98SPARC_ASSERT_ISF_OFFSET(g3, G3);
99SPARC_ASSERT_ISF_OFFSET(g4, G4);
100SPARC_ASSERT_ISF_OFFSET(g5, G5);
101SPARC_ASSERT_ISF_OFFSET(g7, G7);
102SPARC_ASSERT_ISF_OFFSET(i0, I0);
103SPARC_ASSERT_ISF_OFFSET(i1, I1);
104SPARC_ASSERT_ISF_OFFSET(i2, I2);
105SPARC_ASSERT_ISF_OFFSET(i3, I3);
106SPARC_ASSERT_ISF_OFFSET(i4, I4);
107SPARC_ASSERT_ISF_OFFSET(i5, I5);
108SPARC_ASSERT_ISF_OFFSET(i6_fp, I6_FP);
109SPARC_ASSERT_ISF_OFFSET(i7, I7);
110SPARC_ASSERT_ISF_OFFSET(y, Y);
111SPARC_ASSERT_ISF_OFFSET(tpc, TPC);
112
113#define SPARC_ASSERT_FP_OFFSET(field, off) \
114  RTEMS_STATIC_ASSERT( \
115    offsetof(Context_Control_fp, field) == SPARC_FP_CONTEXT_OFFSET_ ## off, \
116    Context_Control_fp_offset_ ## field \
117  )
118
119SPARC_ASSERT_FP_OFFSET(f0_f1, F0_F1);
120SPARC_ASSERT_FP_OFFSET(f2_f3, F2_F3);
121SPARC_ASSERT_FP_OFFSET(f4_f5, F4_F5);
122SPARC_ASSERT_FP_OFFSET(f6_f7, F6_F7);
123SPARC_ASSERT_FP_OFFSET(f8_f9, F8_F9);
124SPARC_ASSERT_FP_OFFSET(f10_f11, F10_F11);
125SPARC_ASSERT_FP_OFFSET(f12_f13, F12_F13);
126SPARC_ASSERT_FP_OFFSET(f14_f15, F14_F15);
127SPARC_ASSERT_FP_OFFSET(f16_f17, F16_F17);
128SPARC_ASSERT_FP_OFFSET(f18_f19, F18_F19);
129SPARC_ASSERT_FP_OFFSET(f20_f21, F20_F21);
130SPARC_ASSERT_FP_OFFSET(f22_f23, F22_F23);
131SPARC_ASSERT_FP_OFFSET(f24_f25, F24_F25);
132SPARC_ASSERT_FP_OFFSET(f26_f27, F26_F27);
133SPARC_ASSERT_FP_OFFSET(f28_f29, F28_F29);
134SPARC_ASSERT_FP_OFFSET(f30_f31, F30_F31);
135SPARC_ASSERT_FP_OFFSET(fsr, FSR);
136
137RTEMS_STATIC_ASSERT(
138  sizeof(SPARC_Minimum_stack_frame) == SPARC_MINIMUM_STACK_FRAME_SIZE,
139  SPARC_MINIMUM_STACK_FRAME_SIZE
140);
141
142/* https://devel.rtems.org/ticket/2352 */
143RTEMS_STATIC_ASSERT(
144  sizeof(CPU_Interrupt_frame) % CPU_ALIGNMENT == 0,
145  CPU_Interrupt_frame_alignment
146);
147
148/*
149 *  _CPU_Initialize
150 *
151 *  This routine performs processor dependent initialization.
152 *
153 *  INPUT PARAMETERS: NONE
154 *
155 *  Output Parameters: NONE
156 *
157 *  NOTE: There is no need to save the pointer to the thread dispatch routine.
158 *        The SPARC's assembly code can reference it directly with no problems.
159 */
160
161void _CPU_Initialize(void)
162{
163#if defined(SPARC_USE_LAZY_FP_SWITCH)
164  __asm__ volatile (
165    ".global SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET\n"
166    ".set SPARC_THREAD_CONTROL_REGISTERS_FP_CONTEXT_OFFSET, %0\n"
167    ".global SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET\n"
168    ".set SPARC_THREAD_CONTROL_FP_CONTEXT_OFFSET, %1\n"
169    :
170    : "i" (offsetof(Thread_Control, Registers.fp_context)),
171      "i" (offsetof(Thread_Control, fp_context))
172  );
173#endif
174}
175
176uint32_t   _CPU_ISR_Get_level( void )
177{
178  uint32_t   level;
179
180  sparc_get_interrupt_level( level );
181
182  return level;
183}
184
185/*
186 *  _CPU_ISR_install_raw_handler
187 *
188 *  This routine installs the specified handler as a "raw" non-executive
189 *  supported trap handler (a.k.a. interrupt service routine).
190 *
191 *  Input Parameters:
192 *    vector      - trap table entry number plus synchronous
193 *                    vs. asynchronous information
194 *    new_handler - address of the handler to be installed
195 *    old_handler - pointer to an address of the handler previously installed
196 *
197 *  Output Parameters: NONE
198 *    *new_handler - address of the handler previously installed
199 *
200 *  NOTE:
201 *
202 *  On the SPARC, there are really only 256 vectors.  However, the executive
203 *  has no easy, fast, reliable way to determine which traps are synchronous
204 *  and which are asynchronous.  By default, synchronous traps return to the
205 *  instruction which caused the interrupt.  So if you install a software
206 *  trap handler as an executive interrupt handler (which is desirable since
207 *  RTEMS takes care of window and register issues), then the executive needs
208 *  to know that the return address is to the trap rather than the instruction
209 *  following the trap.
210 *
211 *  So vectors 0 through 255 are treated as regular asynchronous traps which
212 *  provide the "correct" return address.  Vectors 256 through 512 are assumed
213 *  by the executive to be synchronous and to require that the return address
214 *  be fudged.
215 *
216 *  If you use this mechanism to install a trap handler which must reexecute
217 *  the instruction which caused the trap, then it should be installed as
218 *  an asynchronous trap.  This will avoid the executive changing the return
219 *  address.
220 */
221
222void _CPU_ISR_install_raw_handler(
223  uint32_t    vector,
224  proc_ptr    new_handler,
225  proc_ptr   *old_handler
226)
227{
228  uint32_t               real_vector;
229  CPU_Trap_table_entry  *tbr;
230  CPU_Trap_table_entry  *slot;
231  uint32_t               u32_tbr;
232  uint32_t               u32_handler;
233
234  /*
235   *  Get the "real" trap number for this vector ignoring the synchronous
236   *  versus asynchronous indicator included with our vector numbers.
237   */
238
239  real_vector = SPARC_REAL_TRAP_NUMBER( vector );
240
241  /*
242   *  Get the current base address of the trap table and calculate a pointer
243   *  to the slot we are interested in.
244   */
245
246  sparc_get_tbr( u32_tbr );
247
248  u32_tbr &= 0xfffff000;
249
250  tbr = (CPU_Trap_table_entry *) u32_tbr;
251
252  slot = &tbr[ real_vector ];
253
254  /*
255   *  Get the address of the old_handler from the trap table.
256   *
257   *  NOTE: The old_handler returned will be bogus if it does not follow
258   *        the RTEMS model.
259   */
260
261#define HIGH_BITS_MASK   0xFFFFFC00
262#define HIGH_BITS_SHIFT  10
263#define LOW_BITS_MASK    0x000003FF
264
265  if ( slot->mov_psr_l0 == _CPU_Trap_slot_template.mov_psr_l0 ) {
266    u32_handler =
267      (slot->sethi_of_handler_to_l4 << HIGH_BITS_SHIFT) |
268      (slot->jmp_to_low_of_handler_plus_l4 & LOW_BITS_MASK);
269    *old_handler = (proc_ptr) u32_handler;
270  } else
271    *old_handler = 0;
272
273  /*
274   *  Copy the template to the slot and then fix it.
275   */
276
277  *slot = _CPU_Trap_slot_template;
278
279  u32_handler = (uint32_t) new_handler;
280
281  slot->mov_vector_l3 |= vector;
282  slot->sethi_of_handler_to_l4 |=
283    (u32_handler & HIGH_BITS_MASK) >> HIGH_BITS_SHIFT;
284  slot->jmp_to_low_of_handler_plus_l4 |= (u32_handler & LOW_BITS_MASK);
285
286  /*
287   * There is no instruction cache snooping, so we need to invalidate
288   * the instruction cache to make sure that the processor sees the
289   * changes to the trap table. This step is required on both single-
290   * and multiprocessor systems.
291   *
292   * In a SMP configuration a change to the trap table might be
293   * missed by other cores. If the system state is up, the other
294   * cores can be notified using SMP messages that they need to
295   * flush their icache. If the up state has not been reached
296   * there is no need to notify other cores. They will do an
297   * automatic flush of the icache just after entering the up
298   * state, but before enabling interrupts.
299   */
300  rtems_cache_invalidate_entire_instruction();
301}
302
303void _CPU_ISR_install_vector(
304  uint32_t    vector,
305  proc_ptr    new_handler,
306  proc_ptr   *old_handler
307)
308{
309   uint32_t   real_vector;
310   proc_ptr   ignored;
311
312  /*
313   *  Get the "real" trap number for this vector ignoring the synchronous
314   *  versus asynchronous indicator included with our vector numbers.
315   */
316
317   real_vector = SPARC_REAL_TRAP_NUMBER( vector );
318
319   /*
320    *  Return the previous ISR handler.
321    */
322
323   *old_handler = _ISR_Vector_table[ real_vector ];
324
325   /*
326    *  Install the wrapper so this ISR can be invoked properly.
327    */
328
329   _CPU_ISR_install_raw_handler( vector, _ISR_Handler, &ignored );
330
331   /*
332    *  We put the actual user ISR address in '_ISR_vector_table'.  This will
333    *  be used by the _ISR_Handler so the user gets control.
334    */
335
336    _ISR_Vector_table[ real_vector ] = new_handler;
337}
338
339void _CPU_Context_Initialize(
340  Context_Control  *the_context,
341  uint32_t         *stack_base,
342  uint32_t          size,
343  uint32_t          new_level,
344  void             *entry_point,
345  bool              is_fp,
346  void             *tls_area
347)
348{
349    uint32_t     stack_high;  /* highest "stack aligned" address */
350    uint32_t     tmp_psr;
351
352    /*
353     *  On CPUs with stacks which grow down (i.e. SPARC), we build the stack
354     *  based on the stack_high address.
355     */
356
357    stack_high = ((uint32_t)(stack_base) + size);
358    stack_high &= ~(CPU_STACK_ALIGNMENT - 1);
359
360    /*
361     *  See the README in this directory for a diagram of the stack.
362     */
363
364    the_context->o7    = ((uint32_t) entry_point) - 8;
365    the_context->o6_sp = stack_high - SPARC_MINIMUM_STACK_FRAME_SIZE;
366    the_context->i6_fp = 0;
367
368    /*
369     *  Build the PSR for the task.  Most everything can be 0 and the
370     *  CWP is corrected during the context switch.
371     *
372     *  The EF bit determines if the floating point unit is available.
373     *  The FPU is ONLY enabled if the context is associated with an FP task
374     *  and this SPARC model has an FPU.
375     */
376
377    sparc_get_psr( tmp_psr );
378    tmp_psr &= ~SPARC_PSR_PIL_MASK;
379    tmp_psr |= (new_level << 8) & SPARC_PSR_PIL_MASK;
380    tmp_psr &= ~SPARC_PSR_EF_MASK;      /* disabled by default */
381
382    /* _CPU_Context_restore_heir() relies on this */
383    _Assert( ( tmp_psr & SPARC_PSR_ET_MASK ) != 0 );
384
385#if (SPARC_HAS_FPU == 1)
386    /*
387     *  If this bit is not set, then a task gets a fault when it accesses
388     *  a floating point register.  This is a nice way to detect floating
389     *  point tasks which are not currently declared as such.
390     */
391
392    if ( is_fp )
393      tmp_psr |= SPARC_PSR_EF_MASK;
394#endif
395    the_context->psr = tmp_psr;
396
397  /*
398   *  Since THIS thread is being created, there is no way that THIS
399   *  thread can have an _ISR_Dispatch stack frame on its stack.
400   */
401    the_context->isr_dispatch_disable = 0;
402
403  if ( tls_area != NULL ) {
404    void *tcb = _TLS_TCB_after_TLS_block_initialize( tls_area );
405
406    the_context->g7 = (uintptr_t) tcb;
407  }
408}
Note: See TracBrowser for help on using the repository browser.