1 | /* |
2 | * Checksum routine for Internet Protocol family headers. |
3 | * |
4 | * This routine is very heavily used in the network |
5 | * code and should be modified for each CPU to be as fast as possible. |
6 | * |
7 | * This implementation is the PowerPC version. |
8 | * |
9 | * $Id$ |
10 | */ |
11 | |
12 | #include <stdio.h> /* for puts */ |
13 | |
14 | #undef ADDCARRY |
15 | #define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff |
16 | #define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);} |
17 | |
18 | /* |
19 | * Thanks to gcc we don't have to guess |
20 | * which registers contain sum & w. |
21 | */ |
22 | |
23 | #define LDTMP(n) tmp = *((u_int *)((u_char *)w + n)) |
24 | |
25 | #define ADD(n) \ |
26 | LDTMP(n); \ |
27 | __asm__ volatile("addc %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp)) |
28 | |
29 | #define ADDC(n) \ |
30 | LDTMP(n); \ |
31 | __asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp)) |
32 | |
33 | #define MOP \ |
34 | tmp = 0; \ |
35 | __asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp)) |
36 | |
37 | #define LOAD(n) junk = (u_char) *((volatile u_char *) w + n) |
38 | |
39 | |
40 | int |
41 | in_cksum(m, len) |
42 | register struct mbuf *m; |
43 | register int len; |
44 | { |
45 | register u_short *w; |
46 | register unsigned sum = 0; |
47 | register unsigned tmp; |
48 | register int mlen = 0; |
49 | int byte_swapped = 0; |
50 | union { char c[2]; u_short s; } su; |
51 | |
52 | for (;m && len; m = m->m_next) { |
53 | if (m->m_len == 0) |
54 | continue; |
55 | w = mtod(m, u_short *); |
56 | if (mlen == -1) { |
57 | /* |
58 | * The first byte of this mbuf is the continuation |
59 | * of a word spanning between this mbuf and the |
60 | * last mbuf. |
61 | */ |
62 | |
63 | /* su.c[0] is already saved when scanning previous |
64 | * mbuf. sum was REDUCEd when we found mlen == -1 |
65 | */ |
66 | su.c[1] = *(u_char *)w; |
67 | sum += su.s; |
68 | w = (u_short *)((char *)w + 1); |
69 | mlen = m->m_len - 1; |
70 | len--; |
71 | } else |
72 | mlen = m->m_len; |
73 | if (len < mlen) |
74 | mlen = len; |
75 | len -= mlen; |
76 | /* |
77 | * Force to long boundary so we do longword aligned |
78 | * memory operations |
79 | */ |
80 | if (3 & (int) w) { |
81 | REDUCE; |
82 | if ((1 & (int) w) && (mlen > 0)) { |
83 | sum <<= 8; |
84 | su.c[0] = *(char *)w; |
85 | w = (u_short *)((char *)w + 1); |
86 | mlen--; |
87 | byte_swapped = 1; |
88 | } |
89 | if ((2 & (int) w) && (mlen >= 2)) { |
90 | sum += *w++; |
91 | mlen -= 2; |
92 | } |
93 | } |
94 | /* |
95 | * Advance to a cache line boundary. |
96 | */ |
97 | if (4 & (int) w && mlen >= 4) { |
98 | ADD(0); |
99 | MOP; |
100 | w += 2; |
101 | mlen -= 4; |
102 | } |
103 | if (8 & (int) w && mlen >= 8) { |
104 | ADD(0); |
105 | ADDC(4); |
106 | MOP; |
107 | w += 4; |
108 | mlen -= 8; |
109 | } |
110 | /* |
111 | * Do as much of the checksum as possible 32 bits at at time. |
112 | * In fact, this loop is unrolled to make overhead from |
113 | * branches &c small. |
114 | */ |
115 | mlen -= 1; |
116 | while ((mlen -= 32) >= 0) { |
117 | u_char junk; |
118 | /* |
119 | * Add with carry 16 words and fold in the last |
120 | * carry by adding a 0 with carry. |
121 | * |
122 | * The early ADD(16) and the LOAD(32) are intended |
123 | * to help get the data into the cache. |
124 | */ |
125 | ADD(16); |
126 | ADDC(0); |
127 | ADDC(4); |
128 | ADDC(8); |
129 | ADDC(12); |
130 | LOAD(32); |
131 | ADDC(20); |
132 | ADDC(24); |
133 | ADDC(28); |
134 | MOP; |
135 | w += 16; |
136 | } |
137 | mlen += 32 + 1; |
138 | if (mlen >= 32) { |
139 | ADD(16); |
140 | ADDC(0); |
141 | ADDC(4); |
142 | ADDC(8); |
143 | ADDC(12); |
144 | ADDC(20); |
145 | ADDC(24); |
146 | ADDC(28); |
147 | MOP; |
148 | w += 16; |
149 | mlen -= 32; |
150 | } |
151 | if (mlen >= 16) { |
152 | ADD(0); |
153 | ADDC(4); |
154 | ADDC(8); |
155 | ADDC(12); |
156 | MOP; |
157 | w += 8; |
158 | mlen -= 16; |
159 | } |
160 | if (mlen >= 8) { |
161 | ADD(0); |
162 | ADDC(4); |
163 | MOP; |
164 | w += 4; |
165 | mlen -= 8; |
166 | } |
167 | if (mlen == 0 && byte_swapped == 0) |
168 | continue; /* worth 1% maybe ?? */ |
169 | REDUCE; |
170 | while ((mlen -= 2) >= 0) { |
171 | sum += *w++; |
172 | } |
173 | if (byte_swapped) { |
174 | sum <<= 8; |
175 | byte_swapped = 0; |
176 | if (mlen == -1) { |
177 | su.c[1] = *(char *)w; |
178 | sum += su.s; |
179 | mlen = 0; |
180 | } else |
181 | mlen = -1; |
182 | } else if (mlen == -1) |
183 | /* |
184 | * This mbuf has odd number of bytes. |
185 | * There could be a word split betwen |
186 | * this mbuf and the next mbuf. |
187 | * Save the last byte (to prepend to next mbuf). |
188 | */ |
189 | su.c[0] = *(char *)w; |
190 | } |
191 | |
192 | if (len) |
193 | puts("cksum: out of data"); |
194 | if (mlen == -1) { |
195 | /* The last mbuf has odd # of bytes. Follow the |
196 | standard (the odd byte is shifted left by 8 bits) */ |
197 | su.c[1] = 0; |
198 | sum += su.s; |
199 | } |
200 | REDUCE; |
201 | return (~sum & 0xffff); |
202 | } |
