/* * head.S -- Bootloader Entry point * * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es * * Modified to compile in RTEMS development environment * by Eric Valette * * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr * * The license and distribution terms for this file may be * found in found in the file LICENSE in this distribution or at * http://www.OARcorp.com/rtems/license.html. * * $Id$ */ #include "bootldr.h" #include #include #include "asm.h" #undef TEST_PPCBUG_CALLS #define FRAME_SIZE 32 #define LOCK_CACHES (HID0_DLOCK|HID0_ILOCK) #define INVL_CACHES (HID0_DCI|HID0_ICFI) #define ENBL_CACHES (HID0_DCE|HID0_ICE) #define USE_PPCBUG #undef USE_PPCBUG #define MONITOR_ENTER \ mfmsr r10 ; \ ori r10,r10,MSR_IP ; \ mtmsr r10 ; \ li r10,0x63 ; \ sc START_GOT GOT_ENTRY(_GOT2_TABLE_) GOT_ENTRY(_FIXUP_TABLE_) GOT_ENTRY(.bss) GOT_ENTRY(codemove) GOT_ENTRY(0) GOT_ENTRY(__bd) GOT_ENTRY(moved) GOT_ENTRY(_binary_rtems_gz_start) GOT_ENTRY(_binary_initrd_gz_start) GOT_ENTRY(_binary_initrd_gz_end) #ifdef TEST_PPCBUG_CALLS GOT_ENTRY(banner_start) GOT_ENTRY(banner_end) #endif END_GOT .globl start .type start,@function /* Point the stack into the PreP partition header in the x86 reserved * code area, so that simple C routines can be called. */ start: #ifdef USE_PPCBUG MONITOR_ENTER #endif bl 1f 1: mflr r1 li r0,0 stwu r0,start-1b-0x400+0x1b0-FRAME_SIZE(r1) stmw r26,FRAME_SIZE-24(r1) GET_GOT mfmsr r28 /* Turn off interrupts */ ori r0,r28,MSR_EE xori r0,r0,MSR_EE mtmsr r0 /* Enable the caches, from now on cr2.eq set means processor is 601 */ mfpvr r0 mfspr r29,HID0 srwi r0,r0,16 cmplwi cr2,r0,1 beq 2,2f #ifndef USE_PPCBUG ori r0,r29,ENBL_CACHES|INVL_CACHES|LOCK_CACHES xori r0,r0,INVL_CACHES|LOCK_CACHES sync isync mtspr HID0,r0 #endif 2: bl reloc /* save all the parameters and the orginal msr/hid0/r31 */ lwz bd,GOT(__bd) stw r3,0(bd) stw r4,4(bd) stw r5,8(bd) stw r6,12(bd) lis r3,__size@sectoff@ha stw r7,16(bd) stw r8,20(bd) addi r3,r3,__size@sectoff@l stw r9,24(bd) stw r10,28(bd) stw r28,o_msr(bd) stw r29,o_hid0(bd) stw r31,o_r31(bd) /* Call the routine to fill boot_data structure from residual data. * And to find where the code has to be moved. */ bl early_setup /* Now we need to relocate ourselves, where we are told to. First put a * copy of the codemove routine to some place in memory. * (which may be where the 0x41 partition was loaded, so size is critical). */ lwz r4,GOT(codemove) li r5,_size_codemove lwz r3,mover(bd) lwz r6,cache_lsize(bd) bl codemove mtctr r3 # Where the temporary codemove is. lwz r3,image(bd) lis r5,_edata@sectoff@ha lwz r4,GOT(0) # Our own address addi r5,r5,_edata@sectoff@l lwz r6,cache_lsize(bd) lwz r8,GOT(moved) sub r7,r3,r4 # Difference to adjust pointers. add r8,r8,r7 add r30,r30,r7 add bd,bd,r7 /* Call the copy routine but return to the new area. */ mtlr r8 # for the return address bctr # returns to the moved instruction /* Establish the new top stack frame. */ moved: lwz r1,stack(bd) li r0,0 stwu r0,-16(r1) /* relocate again */ bl reloc /* Clear all of BSS */ lwz r10,GOT(.bss) li r0,__bss_words@sectoff@l subi r10,r10,4 cmpwi r0,0 mtctr r0 li r0,0 beq 4f 3: stwu r0,4(r10) bdnz 3b /* Final memory initialization. First switch to unmapped mode * in case the FW had set the MMU on, and flush the TLB to avoid * stale entries from interfering. No I/O access is allowed * during this time! */ #ifndef USE_PPCBUG 4: bl MMUoff #endif bl flush_tlb /* Some firmware versions leave stale values in the BATs, it's time * to invalidate them to avoid interferences with our own mappings. * But the 601 valid bit is in the BATL (IBAT only) and others are in * the [ID]BATU. Bloat, bloat.. fortunately thrown away later. */ li r3,0 beq cr2,5f mtdbatu 0,r3 mtdbatu 1,r3 mtdbatu 2,r3 mtdbatu 3,r3 5: mtibatu 0,r3 mtibatl 0,r3 mtibatu 1,r3 mtibatl 1,r3 mtibatu 2,r3 mtibatl 2,r3 mtibatu 3,r3 mtibatl 3,r3 lis r3,__size@sectoff@ha addi r3,r3,__size@sectoff@l sync # We are going to touch SDR1 ! bl mm_init bl MMUon /* Now we are mapped and can perform I/O if we want */ #ifdef TEST_PPCBUG_CALLS /* Experience seems to show that PPCBug can only be called with the * data cache disabled and with MMU disabled. Bummer. */ li r10,0x22 # .OUTLN lwz r3,GOT(banner_start) lwz r4,GOT(banner_end) sc #endif bl setup_hw lwz r4,GOT(_binary_rtems_gz_start) lis r5,_rtems_gz_size@sectoff@ha lwz r6,GOT(_binary_initrd_gz_start) lis r3,_rtems_size@sectoff@ha lwz r7,GOT(_binary_initrd_gz_end) addi r5,r5,_rtems_gz_size@sectoff@l addi r3,r3,_rtems_size@sectoff@l sub r7,r7,r6 bl decompress_kernel /* Back here we are unmapped and we start the kernel, passing up to eight * parameters just in case, only r3 to r7 used for now. Flush the tlb so * that the loaded image starts in a clean state. */ bl flush_tlb lwz r3,0(bd) lwz r4,4(bd) lwz r5,8(bd) lwz r6,12(bd) lwz r7,16(bd) lwz r8,20(bd) lwz r9,24(bd) lwz r10,28(bd) lwz r30,0(0) mtctr r30 /* * Linux code again lis r30,0xdeadc0de@ha addi r30,r30,0xdeadc0de@l stw r30,0(0) li r30,0 */ dcbst 0,r30 /* Make sure it's in memory ! */ /* We just flash invalidate and disable the dcache, unless it's a 601, * critical areas have been flushed and we don't care about the stack * and other scratch areas. */ beq cr2,1f mfspr r0,HID0 ori r0,r0,HID0_DCI|HID0_DCE sync mtspr HID0,r0 xori r0,r0,HID0_DCI|HID0_DCE mtspr HID0,r0 /* Provisional return to FW, works for PPCBug */ #if 0 MONITOR_ENTER #else 1: bctr #endif /* relocation function, r30 must point to got2+0x8000 */ reloc: /* Adjust got2 pointers, no need to check for 0, this code already puts * a few entries in the table. */ li r0,__got2_entries@sectoff@l la r12,GOT(_GOT2_TABLE_) lwz r11,GOT(_GOT2_TABLE_) mtctr r0 sub r11,r12,r11 addi r12,r12,-4 1: lwzu r0,4(r12) add r0,r0,r11 stw r0,0(r12) bdnz 1b /* Now adjust the fixups and the pointers to the fixups in case we need * to move ourselves again. */ 2: li r0,__fixup_entries@sectoff@l lwz r12,GOT(_FIXUP_TABLE_) cmpwi r0,0 mtctr r0 addi r12,r12,-4 beqlr 3: lwzu r10,4(r12) lwzux r0,r10,r11 add r0,r0,r11 stw r10,0(r12) stw r0,0(r10) bdnz 3b blr /* Set the MMU on and off: code is always mapped 1:1 and does not need MMU, * but it does not cost so much to map it also and it catches calls through * NULL function pointers. */ .globl MMUon .type MMUon,@function MMUon: mfmsr r0 ori r0,r0,MSR_IR|MSR_DR|MSR_IP mflr r11 xori r0,r0,MSR_IP mtsrr0 r11 mtsrr1 r0 rfi .globl MMUoff .type MMUoff,@function MMUoff: mfmsr r0 ori r0,r0,MSR_IR|MSR_DR|MSR_IP mflr r11 xori r0,r0,MSR_IR|MSR_DR mtsrr0 r11 mtsrr1 r0 rfi /* Due to the PPC architecture (and according to the specifications), a * series of tlbie which goes through a whole 256 MB segment always flushes * the whole TLB. This is obviously overkill and slow, but who cares ? * It takes about 1 ms on a 200 MHz 603e and works even if residual data * get the number of TLB entries wrong. */ flush_tlb: lis r11,0x1000 1: addic. r11,r11,-0x1000 tlbie r11 bnl 1b /* tlbsync is not implemented on 601, so use sync which seems to be a superset * of tlbsync in all cases and do not bother with CPU dependant code */ sync blr .globl codemove codemove: .type codemove,@function /* r3 dest, r4 src, r5 length in bytes, r6 cachelinesize */ cmplw cr1,r3,r4 addi r0,r5,3 srwi. r0,r0,2 beq cr1,4f /* In place copy is not necessary */ beq 7f /* Protect against 0 count */ mtctr r0 bge cr1,2f la r8,-4(r4) la r7,-4(r3) 1: lwzu r0,4(r8) stwu r0,4(r7) bdnz 1b b 4f 2: slwi r0,r0,2 add r8,r4,r0 add r7,r3,r0 3: lwzu r0,-4(r8) stwu r0,-4(r7) bdnz 3b /* Now flush the cache: note that we must start from a cache aligned * address. Otherwise we might miss one cache line. */ 4: cmpwi r6,0 add r5,r3,r5 beq 7f /* Always flush prefetch queue in any case */ subi r0,r6,1 andc r3,r3,r0 mr r4,r3 5: cmplw r4,r5 dcbst 0,r4 add r4,r4,r6 blt 5b sync /* Wait for all dcbst to complete on bus */ mr r4,r3 6: cmplw r4,r5 icbi 0,r4 add r4,r4,r6 blt 6b 7: sync /* Wait for all icbi to complete on bus */ isync blr .size codemove,.-codemove _size_codemove=.-codemove .section ".data" # .rodata .align 2 #ifdef TEST_PPCBUG_CALLS banner_start: .ascii "This message was printed by PPCBug with MMU enabled" banner_end: #endif