[60c01a5] | 1 | /* |
---|
| 2 | * Copyright (c) 2011 embedded brains GmbH. All rights reserved. |
---|
| 3 | * |
---|
| 4 | * embedded brains GmbH |
---|
| 5 | * Obere Lagerstr. 30 |
---|
| 6 | * 82178 Puchheim |
---|
| 7 | * Germany |
---|
| 8 | * <info@embedded-brains.de> |
---|
| 9 | * |
---|
| 10 | * The license and distribution terms for this file may be |
---|
| 11 | * found in the file LICENSE in this distribution or at |
---|
[c499856] | 12 | * http://www.rtems.org/license/LICENSE. |
---|
[60c01a5] | 13 | */ |
---|
| 14 | |
---|
| 15 | #include <bspopts.h> |
---|
| 16 | #include <rtems/powerpc/powerpc.h> |
---|
| 17 | |
---|
[c934a10d] | 18 | #if BSP_DATA_CACHE_ENABLED \ |
---|
| 19 | && PPC_CACHE_ALIGNMENT == 32 \ |
---|
| 20 | && !defined(BSP_DATA_CACHE_USE_WRITE_THROUGH) |
---|
[60c01a5] | 21 | |
---|
| 22 | #include <string.h> |
---|
| 23 | #include <stdint.h> |
---|
| 24 | #include <stdbool.h> |
---|
| 25 | |
---|
| 26 | #include <libcpu/powerpc-utility.h> |
---|
| 27 | |
---|
| 28 | #define CACHE_LINE_SIZE 32 |
---|
| 29 | |
---|
| 30 | #define WORD_SIZE 4 |
---|
| 31 | |
---|
| 32 | #define WORD_MASK (WORD_SIZE - 1) |
---|
| 33 | |
---|
| 34 | static bool aligned(const void *a, const void *b) |
---|
| 35 | { |
---|
| 36 | return ((((uintptr_t) a) | ((uintptr_t) b)) & WORD_MASK) == 0; |
---|
| 37 | } |
---|
| 38 | |
---|
| 39 | void *memcpy(void *dst_ptr, const void *src_ptr, size_t n) |
---|
| 40 | { |
---|
| 41 | uint8_t *dst = dst_ptr; |
---|
| 42 | const uint8_t *src = src_ptr; |
---|
| 43 | |
---|
| 44 | ppc_data_cache_block_touch(src); |
---|
| 45 | |
---|
| 46 | if (__builtin_expect(n >= WORD_SIZE && aligned(src, dst), 1)) { |
---|
| 47 | uint32_t *word_dst = (uint32_t *) dst - 1; |
---|
| 48 | const uint32_t *word_src = (const uint32_t *) src - 1; |
---|
| 49 | |
---|
| 50 | if (n >= 2 * CACHE_LINE_SIZE - WORD_SIZE) { |
---|
| 51 | while ((uintptr_t) (word_dst + 1) % CACHE_LINE_SIZE != 0) { |
---|
| 52 | uint32_t tmp; |
---|
| 53 | __asm__ volatile ( |
---|
| 54 | "lwzu %[tmp], 0x4(%[src])\n" |
---|
| 55 | "stwu %[tmp], 0x4(%[dst])\n" |
---|
| 56 | : [src] "+b" (word_src), |
---|
| 57 | [dst] "+b" (word_dst), |
---|
| 58 | [tmp] "=&r" (tmp) |
---|
| 59 | ); |
---|
| 60 | n -= WORD_SIZE; |
---|
| 61 | } |
---|
| 62 | |
---|
| 63 | while (n >= CACHE_LINE_SIZE) { |
---|
| 64 | uint32_t dst_offset = 4; |
---|
| 65 | uint32_t src_offset = 32 + 4; |
---|
| 66 | uint32_t tmp0; |
---|
| 67 | uint32_t tmp1; |
---|
| 68 | uint32_t tmp2; |
---|
| 69 | uint32_t tmp3; |
---|
| 70 | __asm__ volatile ( |
---|
| 71 | "dcbz %[dst], %[dst_offset]\n" |
---|
| 72 | "lwz %[tmp0], 0x04(%[src])\n" |
---|
| 73 | "dcbt %[src], %[src_offset]\n" |
---|
| 74 | "lwz %[tmp1], 0x08(%[src])\n" |
---|
| 75 | "lwz %[tmp2], 0x0c(%[src])\n" |
---|
| 76 | "lwz %[tmp3], 0x10(%[src])\n" |
---|
| 77 | "stw %[tmp0], 0x04(%[dst])\n" |
---|
| 78 | "stw %[tmp1], 0x08(%[dst])\n" |
---|
| 79 | "stw %[tmp2], 0x0c(%[dst])\n" |
---|
| 80 | "stw %[tmp3], 0x10(%[dst])\n" |
---|
| 81 | "lwz %[tmp0], 0x14(%[src])\n" |
---|
| 82 | "lwz %[tmp1], 0x18(%[src])\n" |
---|
| 83 | "lwz %[tmp2], 0x1c(%[src])\n" |
---|
| 84 | "lwzu %[tmp3], 0x20(%[src])\n" |
---|
| 85 | "stw %[tmp0], 0x14(%[dst])\n" |
---|
| 86 | "stw %[tmp1], 0x18(%[dst])\n" |
---|
| 87 | "stw %[tmp2], 0x1c(%[dst])\n" |
---|
| 88 | "stwu %[tmp3], 0x20(%[dst])\n" |
---|
| 89 | : [src] "+b" (word_src), |
---|
| 90 | [dst] "+b" (word_dst), |
---|
| 91 | [tmp0] "=&r" (tmp0), |
---|
| 92 | [tmp1] "=&r" (tmp1), |
---|
| 93 | [tmp2] "=&r" (tmp2), |
---|
| 94 | [tmp3] "=&r" (tmp3) |
---|
| 95 | : [src_offset] "r" (src_offset), |
---|
| 96 | [dst_offset] "r" (dst_offset) |
---|
| 97 | ); |
---|
| 98 | n -= CACHE_LINE_SIZE; |
---|
| 99 | } |
---|
| 100 | } |
---|
| 101 | |
---|
| 102 | while (n >= WORD_SIZE) { |
---|
| 103 | uint32_t tmp; |
---|
| 104 | __asm__ volatile ( |
---|
| 105 | "lwzu %[tmp], 0x4(%[src])\n" |
---|
| 106 | "stwu %[tmp], 0x4(%[dst])\n" |
---|
| 107 | : [src] "+b" (word_src), |
---|
| 108 | [dst] "+b" (word_dst), |
---|
| 109 | [tmp] "=&r" (tmp) |
---|
| 110 | ); |
---|
| 111 | n -= WORD_SIZE; |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | dst = (uint8_t *) word_dst + 4; |
---|
| 115 | src = (const uint8_t *) word_src + 4; |
---|
| 116 | } |
---|
| 117 | |
---|
| 118 | while (n > 0) { |
---|
| 119 | *dst = *src; |
---|
| 120 | ++src; |
---|
| 121 | ++dst; |
---|
| 122 | --n; |
---|
| 123 | } |
---|
| 124 | |
---|
| 125 | return dst_ptr; |
---|
| 126 | } |
---|
| 127 | |
---|
| 128 | #endif /* BSP_DATA_CACHE_ENABLED && PPC_CACHE_ALIGNMENT == 32 */ |
---|