1 | #include <freebsd/machine/rtems-bsd-config.h> |
---|
2 | |
---|
3 | /*- |
---|
4 | * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG |
---|
5 | * All rights reserved. |
---|
6 | * |
---|
7 | * Redistribution and use in source and binary forms, with or without |
---|
8 | * modification, are permitted provided that the following conditions |
---|
9 | * are met: |
---|
10 | * 1. Redistributions of source code must retain the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer. |
---|
12 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
13 | * notice, this list of conditions and the following disclaimer in the |
---|
14 | * documentation and/or other materials provided with the distribution. |
---|
15 | * 3. The name of the author may not be used to endorse or promote |
---|
16 | * products derived from this software without specific prior written |
---|
17 | * permission. |
---|
18 | * |
---|
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
---|
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
---|
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
29 | * SUCH DAMAGE. |
---|
30 | */ |
---|
31 | |
---|
32 | /* |
---|
33 | * The tcp_hostcache moves the tcp-specific cached metrics from the routing |
---|
34 | * table to a dedicated structure indexed by the remote IP address. It keeps |
---|
35 | * information on the measured TCP parameters of past TCP sessions to allow |
---|
36 | * better initial start values to be used with later connections to/from the |
---|
37 | * same source. Depending on the network parameters (delay, bandwidth, max |
---|
38 | * MTU, congestion window) between local and remote sites, this can lead to |
---|
39 | * significant speed-ups for new TCP connections after the first one. |
---|
40 | * |
---|
41 | * Due to the tcp_hostcache, all TCP-specific metrics information in the |
---|
42 | * routing table have been removed. The inpcb no longer keeps a pointer to |
---|
43 | * the routing entry, and protocol-initiated route cloning has been removed |
---|
44 | * as well. With these changes, the routing table has gone back to being |
---|
45 | * more lightwight and only carries information related to packet forwarding. |
---|
46 | * |
---|
47 | * tcp_hostcache is designed for multiple concurrent access in SMP |
---|
48 | * environments and high contention. All bucket rows have their own lock and |
---|
49 | * thus multiple lookups and modifies can be done at the same time as long as |
---|
50 | * they are in different bucket rows. If a request for insertion of a new |
---|
51 | * record can't be satisfied, it simply returns an empty structure. Nobody |
---|
52 | * and nothing outside of tcp_hostcache.c will ever point directly to any |
---|
53 | * entry in the tcp_hostcache. All communication is done in an |
---|
54 | * object-oriented way and only functions of tcp_hostcache will manipulate |
---|
55 | * hostcache entries. Otherwise, we are unable to achieve good behaviour in |
---|
56 | * concurrent access situations. Since tcp_hostcache is only caching |
---|
57 | * information, there are no fatal consequences if we either can't satisfy |
---|
58 | * any particular request or have to drop/overwrite an existing entry because |
---|
59 | * of bucket limit memory constrains. |
---|
60 | */ |
---|
61 | |
---|
62 | /* |
---|
63 | * Many thanks to jlemon for basic structure of tcp_syncache which is being |
---|
64 | * followed here. |
---|
65 | */ |
---|
66 | |
---|
67 | #include <freebsd/sys/cdefs.h> |
---|
68 | __FBSDID("$FreeBSD$"); |
---|
69 | |
---|
70 | #include <freebsd/local/opt_inet6.h> |
---|
71 | |
---|
72 | #include <freebsd/sys/param.h> |
---|
73 | #include <freebsd/sys/systm.h> |
---|
74 | #include <freebsd/sys/kernel.h> |
---|
75 | #include <freebsd/sys/lock.h> |
---|
76 | #include <freebsd/sys/mutex.h> |
---|
77 | #include <freebsd/sys/malloc.h> |
---|
78 | #include <freebsd/sys/socket.h> |
---|
79 | #include <freebsd/sys/socketvar.h> |
---|
80 | #include <freebsd/sys/sysctl.h> |
---|
81 | |
---|
82 | #include <freebsd/net/if.h> |
---|
83 | #include <freebsd/net/route.h> |
---|
84 | #include <freebsd/net/vnet.h> |
---|
85 | |
---|
86 | #include <freebsd/netinet/in.h> |
---|
87 | #include <freebsd/netinet/in_systm.h> |
---|
88 | #include <freebsd/netinet/ip.h> |
---|
89 | #include <freebsd/netinet/in_var.h> |
---|
90 | #include <freebsd/netinet/in_pcb.h> |
---|
91 | #include <freebsd/netinet/ip_var.h> |
---|
92 | #ifdef INET6 |
---|
93 | #include <freebsd/netinet/ip6.h> |
---|
94 | #include <freebsd/netinet6/ip6_var.h> |
---|
95 | #endif |
---|
96 | #include <freebsd/netinet/tcp.h> |
---|
97 | #include <freebsd/netinet/tcp_var.h> |
---|
98 | #include <freebsd/netinet/tcp_hostcache.h> |
---|
99 | #ifdef INET6 |
---|
100 | #include <freebsd/netinet6/tcp6_var.h> |
---|
101 | #endif |
---|
102 | |
---|
103 | #include <freebsd/vm/uma.h> |
---|
104 | |
---|
105 | /* Arbitrary values */ |
---|
106 | #define TCP_HOSTCACHE_HASHSIZE 512 |
---|
107 | #define TCP_HOSTCACHE_BUCKETLIMIT 30 |
---|
108 | #define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */ |
---|
109 | #define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */ |
---|
110 | |
---|
111 | static VNET_DEFINE(struct tcp_hostcache, tcp_hostcache); |
---|
112 | #define V_tcp_hostcache VNET(tcp_hostcache) |
---|
113 | |
---|
114 | static VNET_DEFINE(struct callout, tcp_hc_callout); |
---|
115 | #define V_tcp_hc_callout VNET(tcp_hc_callout) |
---|
116 | |
---|
117 | static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *); |
---|
118 | static struct hc_metrics *tcp_hc_insert(struct in_conninfo *); |
---|
119 | static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS); |
---|
120 | static void tcp_hc_purge_internal(int); |
---|
121 | static void tcp_hc_purge(void *); |
---|
122 | |
---|
123 | SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, |
---|
124 | "TCP Host cache"); |
---|
125 | |
---|
126 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, |
---|
127 | &VNET_NAME(tcp_hostcache.cache_limit), 0, |
---|
128 | "Overall entry limit for hostcache"); |
---|
129 | |
---|
130 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN, |
---|
131 | &VNET_NAME(tcp_hostcache.hashsize), 0, |
---|
132 | "Size of TCP hostcache hashtable"); |
---|
133 | |
---|
134 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, |
---|
135 | CTLFLAG_RDTUN, &VNET_NAME(tcp_hostcache.bucket_limit), 0, |
---|
136 | "Per-bucket hash limit for hostcache"); |
---|
137 | |
---|
138 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD, |
---|
139 | &VNET_NAME(tcp_hostcache.cache_count), 0, |
---|
140 | "Current number of entries in hostcache"); |
---|
141 | |
---|
142 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW, |
---|
143 | &VNET_NAME(tcp_hostcache.expire), 0, |
---|
144 | "Expire time of TCP hostcache entries"); |
---|
145 | |
---|
146 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_RW, |
---|
147 | &VNET_NAME(tcp_hostcache.prune), 0, |
---|
148 | "Time between purge runs"); |
---|
149 | |
---|
150 | SYSCTL_VNET_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW, |
---|
151 | &VNET_NAME(tcp_hostcache.purgeall), 0, |
---|
152 | "Expire all entires on next purge run"); |
---|
153 | |
---|
154 | SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, |
---|
155 | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0, |
---|
156 | sysctl_tcp_hc_list, "A", "List of all hostcache entries"); |
---|
157 | |
---|
158 | |
---|
159 | static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache"); |
---|
160 | |
---|
161 | #define HOSTCACHE_HASH(ip) \ |
---|
162 | (((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) & \ |
---|
163 | V_tcp_hostcache.hashmask) |
---|
164 | |
---|
165 | /* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */ |
---|
166 | #define HOSTCACHE_HASH6(ip6) \ |
---|
167 | (((ip6)->s6_addr32[0] ^ \ |
---|
168 | (ip6)->s6_addr32[1] ^ \ |
---|
169 | (ip6)->s6_addr32[2] ^ \ |
---|
170 | (ip6)->s6_addr32[3]) & \ |
---|
171 | V_tcp_hostcache.hashmask) |
---|
172 | |
---|
173 | #define THC_LOCK(lp) mtx_lock(lp) |
---|
174 | #define THC_UNLOCK(lp) mtx_unlock(lp) |
---|
175 | |
---|
176 | void |
---|
177 | tcp_hc_init(void) |
---|
178 | { |
---|
179 | int i; |
---|
180 | |
---|
181 | /* |
---|
182 | * Initialize hostcache structures. |
---|
183 | */ |
---|
184 | V_tcp_hostcache.cache_count = 0; |
---|
185 | V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; |
---|
186 | V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT; |
---|
187 | V_tcp_hostcache.cache_limit = |
---|
188 | V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit; |
---|
189 | V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE; |
---|
190 | V_tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE; |
---|
191 | |
---|
192 | TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize", |
---|
193 | &V_tcp_hostcache.hashsize); |
---|
194 | TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit", |
---|
195 | &V_tcp_hostcache.cache_limit); |
---|
196 | TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit", |
---|
197 | &V_tcp_hostcache.bucket_limit); |
---|
198 | if (!powerof2(V_tcp_hostcache.hashsize)) { |
---|
199 | printf("WARNING: hostcache hash size is not a power of 2.\n"); |
---|
200 | V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */ |
---|
201 | } |
---|
202 | V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1; |
---|
203 | |
---|
204 | /* |
---|
205 | * Allocate the hash table. |
---|
206 | */ |
---|
207 | V_tcp_hostcache.hashbase = (struct hc_head *) |
---|
208 | malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head), |
---|
209 | M_HOSTCACHE, M_WAITOK | M_ZERO); |
---|
210 | |
---|
211 | /* |
---|
212 | * Initialize the hash buckets. |
---|
213 | */ |
---|
214 | for (i = 0; i < V_tcp_hostcache.hashsize; i++) { |
---|
215 | TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket); |
---|
216 | V_tcp_hostcache.hashbase[i].hch_length = 0; |
---|
217 | mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry", |
---|
218 | NULL, MTX_DEF); |
---|
219 | } |
---|
220 | |
---|
221 | /* |
---|
222 | * Allocate the hostcache entries. |
---|
223 | */ |
---|
224 | V_tcp_hostcache.zone = |
---|
225 | uma_zcreate("hostcache", sizeof(struct hc_metrics), |
---|
226 | NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); |
---|
227 | uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit); |
---|
228 | |
---|
229 | /* |
---|
230 | * Set up periodic cache cleanup. |
---|
231 | */ |
---|
232 | callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE); |
---|
233 | callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz, |
---|
234 | tcp_hc_purge, curvnet); |
---|
235 | } |
---|
236 | |
---|
237 | #ifdef VIMAGE |
---|
238 | void |
---|
239 | tcp_hc_destroy(void) |
---|
240 | { |
---|
241 | int i; |
---|
242 | |
---|
243 | callout_drain(&V_tcp_hc_callout); |
---|
244 | |
---|
245 | /* Purge all hc entries. */ |
---|
246 | tcp_hc_purge_internal(1); |
---|
247 | |
---|
248 | /* Free the uma zone and the allocated hash table. */ |
---|
249 | uma_zdestroy(V_tcp_hostcache.zone); |
---|
250 | |
---|
251 | for (i = 0; i < V_tcp_hostcache.hashsize; i++) |
---|
252 | mtx_destroy(&V_tcp_hostcache.hashbase[i].hch_mtx); |
---|
253 | free(V_tcp_hostcache.hashbase, M_HOSTCACHE); |
---|
254 | } |
---|
255 | #endif |
---|
256 | |
---|
257 | /* |
---|
258 | * Internal function: look up an entry in the hostcache or return NULL. |
---|
259 | * |
---|
260 | * If an entry has been returned, the caller becomes responsible for |
---|
261 | * unlocking the bucket row after he is done reading/modifying the entry. |
---|
262 | */ |
---|
263 | static struct hc_metrics * |
---|
264 | tcp_hc_lookup(struct in_conninfo *inc) |
---|
265 | { |
---|
266 | int hash; |
---|
267 | struct hc_head *hc_head; |
---|
268 | struct hc_metrics *hc_entry; |
---|
269 | |
---|
270 | KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer")); |
---|
271 | |
---|
272 | /* |
---|
273 | * Hash the foreign ip address. |
---|
274 | */ |
---|
275 | if (inc->inc_flags & INC_ISIPV6) |
---|
276 | hash = HOSTCACHE_HASH6(&inc->inc6_faddr); |
---|
277 | else |
---|
278 | hash = HOSTCACHE_HASH(&inc->inc_faddr); |
---|
279 | |
---|
280 | hc_head = &V_tcp_hostcache.hashbase[hash]; |
---|
281 | |
---|
282 | /* |
---|
283 | * Acquire lock for this bucket row; we release the lock if we don't |
---|
284 | * find an entry, otherwise the caller has to unlock after he is |
---|
285 | * done. |
---|
286 | */ |
---|
287 | THC_LOCK(&hc_head->hch_mtx); |
---|
288 | |
---|
289 | /* |
---|
290 | * Iterate through entries in bucket row looking for a match. |
---|
291 | */ |
---|
292 | TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) { |
---|
293 | if (inc->inc_flags & INC_ISIPV6) { |
---|
294 | if (memcmp(&inc->inc6_faddr, &hc_entry->ip6, |
---|
295 | sizeof(inc->inc6_faddr)) == 0) |
---|
296 | return hc_entry; |
---|
297 | } else { |
---|
298 | if (memcmp(&inc->inc_faddr, &hc_entry->ip4, |
---|
299 | sizeof(inc->inc_faddr)) == 0) |
---|
300 | return hc_entry; |
---|
301 | } |
---|
302 | } |
---|
303 | |
---|
304 | /* |
---|
305 | * We were unsuccessful and didn't find anything. |
---|
306 | */ |
---|
307 | THC_UNLOCK(&hc_head->hch_mtx); |
---|
308 | return NULL; |
---|
309 | } |
---|
310 | |
---|
311 | /* |
---|
312 | * Internal function: insert an entry into the hostcache or return NULL if |
---|
313 | * unable to allocate a new one. |
---|
314 | * |
---|
315 | * If an entry has been returned, the caller becomes responsible for |
---|
316 | * unlocking the bucket row after he is done reading/modifying the entry. |
---|
317 | */ |
---|
318 | static struct hc_metrics * |
---|
319 | tcp_hc_insert(struct in_conninfo *inc) |
---|
320 | { |
---|
321 | int hash; |
---|
322 | struct hc_head *hc_head; |
---|
323 | struct hc_metrics *hc_entry; |
---|
324 | |
---|
325 | KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer")); |
---|
326 | |
---|
327 | /* |
---|
328 | * Hash the foreign ip address. |
---|
329 | */ |
---|
330 | if (inc->inc_flags & INC_ISIPV6) |
---|
331 | hash = HOSTCACHE_HASH6(&inc->inc6_faddr); |
---|
332 | else |
---|
333 | hash = HOSTCACHE_HASH(&inc->inc_faddr); |
---|
334 | |
---|
335 | hc_head = &V_tcp_hostcache.hashbase[hash]; |
---|
336 | |
---|
337 | /* |
---|
338 | * Acquire lock for this bucket row; we release the lock if we don't |
---|
339 | * find an entry, otherwise the caller has to unlock after he is |
---|
340 | * done. |
---|
341 | */ |
---|
342 | THC_LOCK(&hc_head->hch_mtx); |
---|
343 | |
---|
344 | /* |
---|
345 | * If the bucket limit is reached, reuse the least-used element. |
---|
346 | */ |
---|
347 | if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit || |
---|
348 | V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) { |
---|
349 | hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead); |
---|
350 | /* |
---|
351 | * At first we were dropping the last element, just to |
---|
352 | * reacquire it in the next two lines again, which isn't very |
---|
353 | * efficient. Instead just reuse the least used element. |
---|
354 | * We may drop something that is still "in-use" but we can be |
---|
355 | * "lossy". |
---|
356 | * Just give up if this bucket row is empty and we don't have |
---|
357 | * anything to replace. |
---|
358 | */ |
---|
359 | if (hc_entry == NULL) { |
---|
360 | THC_UNLOCK(&hc_head->hch_mtx); |
---|
361 | return NULL; |
---|
362 | } |
---|
363 | TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q); |
---|
364 | V_tcp_hostcache.hashbase[hash].hch_length--; |
---|
365 | V_tcp_hostcache.cache_count--; |
---|
366 | TCPSTAT_INC(tcps_hc_bucketoverflow); |
---|
367 | #if 0 |
---|
368 | uma_zfree(V_tcp_hostcache.zone, hc_entry); |
---|
369 | #endif |
---|
370 | } else { |
---|
371 | /* |
---|
372 | * Allocate a new entry, or balk if not possible. |
---|
373 | */ |
---|
374 | hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT); |
---|
375 | if (hc_entry == NULL) { |
---|
376 | THC_UNLOCK(&hc_head->hch_mtx); |
---|
377 | return NULL; |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | /* |
---|
382 | * Initialize basic information of hostcache entry. |
---|
383 | */ |
---|
384 | bzero(hc_entry, sizeof(*hc_entry)); |
---|
385 | if (inc->inc_flags & INC_ISIPV6) |
---|
386 | bcopy(&inc->inc6_faddr, &hc_entry->ip6, sizeof(hc_entry->ip6)); |
---|
387 | else |
---|
388 | hc_entry->ip4 = inc->inc_faddr; |
---|
389 | hc_entry->rmx_head = hc_head; |
---|
390 | hc_entry->rmx_expire = V_tcp_hostcache.expire; |
---|
391 | |
---|
392 | /* |
---|
393 | * Put it upfront. |
---|
394 | */ |
---|
395 | TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q); |
---|
396 | V_tcp_hostcache.hashbase[hash].hch_length++; |
---|
397 | V_tcp_hostcache.cache_count++; |
---|
398 | TCPSTAT_INC(tcps_hc_added); |
---|
399 | |
---|
400 | return hc_entry; |
---|
401 | } |
---|
402 | |
---|
403 | /* |
---|
404 | * External function: look up an entry in the hostcache and fill out the |
---|
405 | * supplied TCP metrics structure. Fills in NULL when no entry was found or |
---|
406 | * a value is not set. |
---|
407 | */ |
---|
408 | void |
---|
409 | tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite) |
---|
410 | { |
---|
411 | struct hc_metrics *hc_entry; |
---|
412 | |
---|
413 | /* |
---|
414 | * Find the right bucket. |
---|
415 | */ |
---|
416 | hc_entry = tcp_hc_lookup(inc); |
---|
417 | |
---|
418 | /* |
---|
419 | * If we don't have an existing object. |
---|
420 | */ |
---|
421 | if (hc_entry == NULL) { |
---|
422 | bzero(hc_metrics_lite, sizeof(*hc_metrics_lite)); |
---|
423 | return; |
---|
424 | } |
---|
425 | hc_entry->rmx_hits++; |
---|
426 | hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ |
---|
427 | |
---|
428 | hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu; |
---|
429 | hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh; |
---|
430 | hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt; |
---|
431 | hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar; |
---|
432 | hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth; |
---|
433 | hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd; |
---|
434 | hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe; |
---|
435 | hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe; |
---|
436 | |
---|
437 | /* |
---|
438 | * Unlock bucket row. |
---|
439 | */ |
---|
440 | THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); |
---|
441 | } |
---|
442 | |
---|
443 | /* |
---|
444 | * External function: look up an entry in the hostcache and return the |
---|
445 | * discovered path MTU. Returns NULL if no entry is found or value is not |
---|
446 | * set. |
---|
447 | */ |
---|
448 | u_long |
---|
449 | tcp_hc_getmtu(struct in_conninfo *inc) |
---|
450 | { |
---|
451 | struct hc_metrics *hc_entry; |
---|
452 | u_long mtu; |
---|
453 | |
---|
454 | hc_entry = tcp_hc_lookup(inc); |
---|
455 | if (hc_entry == NULL) { |
---|
456 | return 0; |
---|
457 | } |
---|
458 | hc_entry->rmx_hits++; |
---|
459 | hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ |
---|
460 | |
---|
461 | mtu = hc_entry->rmx_mtu; |
---|
462 | THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); |
---|
463 | return mtu; |
---|
464 | } |
---|
465 | |
---|
466 | /* |
---|
467 | * External function: update the MTU value of an entry in the hostcache. |
---|
468 | * Creates a new entry if none was found. |
---|
469 | */ |
---|
470 | void |
---|
471 | tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu) |
---|
472 | { |
---|
473 | struct hc_metrics *hc_entry; |
---|
474 | |
---|
475 | /* |
---|
476 | * Find the right bucket. |
---|
477 | */ |
---|
478 | hc_entry = tcp_hc_lookup(inc); |
---|
479 | |
---|
480 | /* |
---|
481 | * If we don't have an existing object, try to insert a new one. |
---|
482 | */ |
---|
483 | if (hc_entry == NULL) { |
---|
484 | hc_entry = tcp_hc_insert(inc); |
---|
485 | if (hc_entry == NULL) |
---|
486 | return; |
---|
487 | } |
---|
488 | hc_entry->rmx_updates++; |
---|
489 | hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ |
---|
490 | |
---|
491 | hc_entry->rmx_mtu = mtu; |
---|
492 | |
---|
493 | /* |
---|
494 | * Put it upfront so we find it faster next time. |
---|
495 | */ |
---|
496 | TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); |
---|
497 | TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); |
---|
498 | |
---|
499 | /* |
---|
500 | * Unlock bucket row. |
---|
501 | */ |
---|
502 | THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); |
---|
503 | } |
---|
504 | |
---|
505 | /* |
---|
506 | * External function: update the TCP metrics of an entry in the hostcache. |
---|
507 | * Creates a new entry if none was found. |
---|
508 | */ |
---|
509 | void |
---|
510 | tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml) |
---|
511 | { |
---|
512 | struct hc_metrics *hc_entry; |
---|
513 | |
---|
514 | hc_entry = tcp_hc_lookup(inc); |
---|
515 | if (hc_entry == NULL) { |
---|
516 | hc_entry = tcp_hc_insert(inc); |
---|
517 | if (hc_entry == NULL) |
---|
518 | return; |
---|
519 | } |
---|
520 | hc_entry->rmx_updates++; |
---|
521 | hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ |
---|
522 | |
---|
523 | if (hcml->rmx_rtt != 0) { |
---|
524 | if (hc_entry->rmx_rtt == 0) |
---|
525 | hc_entry->rmx_rtt = hcml->rmx_rtt; |
---|
526 | else |
---|
527 | hc_entry->rmx_rtt = |
---|
528 | (hc_entry->rmx_rtt + hcml->rmx_rtt) / 2; |
---|
529 | TCPSTAT_INC(tcps_cachedrtt); |
---|
530 | } |
---|
531 | if (hcml->rmx_rttvar != 0) { |
---|
532 | if (hc_entry->rmx_rttvar == 0) |
---|
533 | hc_entry->rmx_rttvar = hcml->rmx_rttvar; |
---|
534 | else |
---|
535 | hc_entry->rmx_rttvar = |
---|
536 | (hc_entry->rmx_rttvar + hcml->rmx_rttvar) / 2; |
---|
537 | TCPSTAT_INC(tcps_cachedrttvar); |
---|
538 | } |
---|
539 | if (hcml->rmx_ssthresh != 0) { |
---|
540 | if (hc_entry->rmx_ssthresh == 0) |
---|
541 | hc_entry->rmx_ssthresh = hcml->rmx_ssthresh; |
---|
542 | else |
---|
543 | hc_entry->rmx_ssthresh = |
---|
544 | (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2; |
---|
545 | TCPSTAT_INC(tcps_cachedssthresh); |
---|
546 | } |
---|
547 | if (hcml->rmx_bandwidth != 0) { |
---|
548 | if (hc_entry->rmx_bandwidth == 0) |
---|
549 | hc_entry->rmx_bandwidth = hcml->rmx_bandwidth; |
---|
550 | else |
---|
551 | hc_entry->rmx_bandwidth = |
---|
552 | (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2; |
---|
553 | /* TCPSTAT_INC(tcps_cachedbandwidth); */ |
---|
554 | } |
---|
555 | if (hcml->rmx_cwnd != 0) { |
---|
556 | if (hc_entry->rmx_cwnd == 0) |
---|
557 | hc_entry->rmx_cwnd = hcml->rmx_cwnd; |
---|
558 | else |
---|
559 | hc_entry->rmx_cwnd = |
---|
560 | (hc_entry->rmx_cwnd + hcml->rmx_cwnd) / 2; |
---|
561 | /* TCPSTAT_INC(tcps_cachedcwnd); */ |
---|
562 | } |
---|
563 | if (hcml->rmx_sendpipe != 0) { |
---|
564 | if (hc_entry->rmx_sendpipe == 0) |
---|
565 | hc_entry->rmx_sendpipe = hcml->rmx_sendpipe; |
---|
566 | else |
---|
567 | hc_entry->rmx_sendpipe = |
---|
568 | (hc_entry->rmx_sendpipe + hcml->rmx_sendpipe) /2; |
---|
569 | /* TCPSTAT_INC(tcps_cachedsendpipe); */ |
---|
570 | } |
---|
571 | if (hcml->rmx_recvpipe != 0) { |
---|
572 | if (hc_entry->rmx_recvpipe == 0) |
---|
573 | hc_entry->rmx_recvpipe = hcml->rmx_recvpipe; |
---|
574 | else |
---|
575 | hc_entry->rmx_recvpipe = |
---|
576 | (hc_entry->rmx_recvpipe + hcml->rmx_recvpipe) /2; |
---|
577 | /* TCPSTAT_INC(tcps_cachedrecvpipe); */ |
---|
578 | } |
---|
579 | |
---|
580 | TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); |
---|
581 | TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q); |
---|
582 | THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); |
---|
583 | } |
---|
584 | |
---|
585 | /* |
---|
586 | * Sysctl function: prints the list and values of all hostcache entries in |
---|
587 | * unsorted order. |
---|
588 | */ |
---|
589 | static int |
---|
590 | sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) |
---|
591 | { |
---|
592 | int bufsize; |
---|
593 | int linesize = 128; |
---|
594 | char *p, *buf; |
---|
595 | int len, i, error; |
---|
596 | struct hc_metrics *hc_entry; |
---|
597 | #ifdef INET6 |
---|
598 | char ip6buf[INET6_ADDRSTRLEN]; |
---|
599 | #endif |
---|
600 | |
---|
601 | bufsize = linesize * (V_tcp_hostcache.cache_count + 1); |
---|
602 | |
---|
603 | p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); |
---|
604 | |
---|
605 | len = snprintf(p, linesize, |
---|
606 | "\nIP address MTU SSTRESH RTT RTTVAR BANDWIDTH " |
---|
607 | " CWND SENDPIPE RECVPIPE HITS UPD EXP\n"); |
---|
608 | p += len; |
---|
609 | |
---|
610 | #define msec(u) (((u) + 500) / 1000) |
---|
611 | for (i = 0; i < V_tcp_hostcache.hashsize; i++) { |
---|
612 | THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); |
---|
613 | TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket, |
---|
614 | rmx_q) { |
---|
615 | len = snprintf(p, linesize, |
---|
616 | "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu " |
---|
617 | "%4lu %4lu %4i\n", |
---|
618 | hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) : |
---|
619 | #ifdef INET6 |
---|
620 | ip6_sprintf(ip6buf, &hc_entry->ip6), |
---|
621 | #else |
---|
622 | "IPv6?", |
---|
623 | #endif |
---|
624 | hc_entry->rmx_mtu, |
---|
625 | hc_entry->rmx_ssthresh, |
---|
626 | msec(hc_entry->rmx_rtt * |
---|
627 | (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), |
---|
628 | msec(hc_entry->rmx_rttvar * |
---|
629 | (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), |
---|
630 | hc_entry->rmx_bandwidth * 8, |
---|
631 | hc_entry->rmx_cwnd, |
---|
632 | hc_entry->rmx_sendpipe, |
---|
633 | hc_entry->rmx_recvpipe, |
---|
634 | hc_entry->rmx_hits, |
---|
635 | hc_entry->rmx_updates, |
---|
636 | hc_entry->rmx_expire); |
---|
637 | p += len; |
---|
638 | } |
---|
639 | THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); |
---|
640 | } |
---|
641 | #undef msec |
---|
642 | error = SYSCTL_OUT(req, buf, p - buf); |
---|
643 | free(buf, M_TEMP); |
---|
644 | return(error); |
---|
645 | } |
---|
646 | |
---|
647 | /* |
---|
648 | * Caller has to make sure the curvnet is set properly. |
---|
649 | */ |
---|
650 | static void |
---|
651 | tcp_hc_purge_internal(int all) |
---|
652 | { |
---|
653 | struct hc_metrics *hc_entry, *hc_next; |
---|
654 | int i; |
---|
655 | |
---|
656 | for (i = 0; i < V_tcp_hostcache.hashsize; i++) { |
---|
657 | THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); |
---|
658 | TAILQ_FOREACH_SAFE(hc_entry, |
---|
659 | &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) { |
---|
660 | if (all || hc_entry->rmx_expire <= 0) { |
---|
661 | TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket, |
---|
662 | hc_entry, rmx_q); |
---|
663 | uma_zfree(V_tcp_hostcache.zone, hc_entry); |
---|
664 | V_tcp_hostcache.hashbase[i].hch_length--; |
---|
665 | V_tcp_hostcache.cache_count--; |
---|
666 | } else |
---|
667 | hc_entry->rmx_expire -= V_tcp_hostcache.prune; |
---|
668 | } |
---|
669 | THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); |
---|
670 | } |
---|
671 | } |
---|
672 | |
---|
673 | /* |
---|
674 | * Expire and purge (old|all) entries in the tcp_hostcache. Runs |
---|
675 | * periodically from the callout. |
---|
676 | */ |
---|
677 | static void |
---|
678 | tcp_hc_purge(void *arg) |
---|
679 | { |
---|
680 | CURVNET_SET((struct vnet *) arg); |
---|
681 | int all = 0; |
---|
682 | |
---|
683 | if (V_tcp_hostcache.purgeall) { |
---|
684 | all = 1; |
---|
685 | V_tcp_hostcache.purgeall = 0; |
---|
686 | } |
---|
687 | |
---|
688 | tcp_hc_purge_internal(all); |
---|
689 | |
---|
690 | callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz, |
---|
691 | tcp_hc_purge, arg); |
---|
692 | CURVNET_RESTORE(); |
---|
693 | } |
---|