1 | /* |
---|
2 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 |
---|
3 | * The Regents of the University of California. All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 4. Neither the name of the University nor the names of its contributors |
---|
14 | * may be used to endorse or promote products derived from this software |
---|
15 | * without specific prior written permission. |
---|
16 | * |
---|
17 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
27 | * SUCH DAMAGE. |
---|
28 | * |
---|
29 | * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 |
---|
30 | * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.226 2005/05/07 00:41:36 cperciva Exp $ |
---|
31 | */ |
---|
32 | |
---|
33 | /* |
---|
34 | * $Id$ |
---|
35 | */ |
---|
36 | |
---|
37 | #include "opt_tcpdebug.h" |
---|
38 | |
---|
39 | #include <sys/param.h> |
---|
40 | #include <sys/queue.h> |
---|
41 | #include <sys/proc.h> |
---|
42 | #include <sys/systm.h> |
---|
43 | #include <sys/kernel.h> |
---|
44 | #include <sys/sysctl.h> |
---|
45 | #include <sys/malloc.h> |
---|
46 | #include <sys/mbuf.h> |
---|
47 | #include <sys/socket.h> |
---|
48 | #include <sys/socketvar.h> |
---|
49 | #include <sys/protosw.h> |
---|
50 | #include <sys/errno.h> |
---|
51 | |
---|
52 | #include <net/route.h> |
---|
53 | #include <net/if.h> |
---|
54 | |
---|
55 | #define _IP_VHL |
---|
56 | #include <netinet/in.h> |
---|
57 | #include <netinet/in_systm.h> |
---|
58 | #include <netinet/ip.h> |
---|
59 | #include <netinet/in_pcb.h> |
---|
60 | #include <netinet/in_var.h> |
---|
61 | #include <netinet/ip_var.h> |
---|
62 | #include <netinet/ip_icmp.h> |
---|
63 | #include <netinet/tcp.h> |
---|
64 | #include <netinet/tcp_fsm.h> |
---|
65 | #include <netinet/tcp_seq.h> |
---|
66 | #include <netinet/tcp_timer.h> |
---|
67 | #include <netinet/tcp_var.h> |
---|
68 | #include <netinet/tcpip.h> |
---|
69 | #ifdef TCPDEBUG |
---|
70 | #include <netinet/tcp_debug.h> |
---|
71 | #endif |
---|
72 | |
---|
73 | int tcp_mssdflt = TCP_MSS; |
---|
74 | SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, |
---|
75 | &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); |
---|
76 | |
---|
77 | static int tcp_do_rfc1323 = 1; |
---|
78 | #if !defined(__rtems__) |
---|
79 | static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; |
---|
80 | SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, |
---|
81 | CTLFLAG_RW, &tcp_rttdflt , 0, ""); |
---|
82 | |
---|
83 | SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, |
---|
84 | CTLFLAG_RW, &tcp_do_rfc1323 , 0, ""); |
---|
85 | #endif |
---|
86 | |
---|
87 | static void tcp_notify __P((struct inpcb *, int)); |
---|
88 | |
---|
89 | /* |
---|
90 | * Target size of TCP PCB hash table. Will be rounded down to a prime |
---|
91 | * number. |
---|
92 | */ |
---|
93 | #ifndef TCBHASHSIZE |
---|
94 | #define TCBHASHSIZE 128 |
---|
95 | #endif |
---|
96 | |
---|
97 | /* |
---|
98 | * Tcp initialization |
---|
99 | */ |
---|
100 | void |
---|
101 | tcp_init() |
---|
102 | { |
---|
103 | |
---|
104 | tcp_iss = random(); /* wrong, but better than a constant */ |
---|
105 | tcp_ccgen = 1; |
---|
106 | LIST_INIT(&tcb); |
---|
107 | tcbinfo.listhead = &tcb; |
---|
108 | tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); |
---|
109 | if (max_protohdr < sizeof(struct tcpiphdr)) |
---|
110 | max_protohdr = sizeof(struct tcpiphdr); |
---|
111 | if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) |
---|
112 | panic("tcp_init"); |
---|
113 | } |
---|
114 | |
---|
115 | /* |
---|
116 | * Create template to be used to send tcp packets on a connection. |
---|
117 | * Call after host entry created, allocates an mbuf and fills |
---|
118 | * in a skeletal tcp/ip header, minimizing the amount of work |
---|
119 | * necessary when the connection is used. |
---|
120 | */ |
---|
121 | struct tcpiphdr * |
---|
122 | tcp_template(tp) |
---|
123 | struct tcpcb *tp; |
---|
124 | { |
---|
125 | register struct inpcb *inp = tp->t_inpcb; |
---|
126 | register struct mbuf *m; |
---|
127 | register struct tcpiphdr *n; |
---|
128 | |
---|
129 | if ((n = tp->t_template) == 0) { |
---|
130 | m = m_get(M_DONTWAIT, MT_HEADER); |
---|
131 | if (m == NULL) |
---|
132 | return (0); |
---|
133 | m->m_len = sizeof (struct tcpiphdr); |
---|
134 | n = mtod(m, struct tcpiphdr *); |
---|
135 | } |
---|
136 | n->ti_next = n->ti_prev = 0; |
---|
137 | n->ti_x1 = 0; |
---|
138 | n->ti_pr = IPPROTO_TCP; |
---|
139 | n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); |
---|
140 | n->ti_src = inp->inp_laddr; |
---|
141 | n->ti_dst = inp->inp_faddr; |
---|
142 | n->ti_sport = inp->inp_lport; |
---|
143 | n->ti_dport = inp->inp_fport; |
---|
144 | n->ti_seq = 0; |
---|
145 | n->ti_ack = 0; |
---|
146 | n->ti_x2 = 0; |
---|
147 | n->ti_off = 5; |
---|
148 | n->ti_flags = 0; |
---|
149 | n->ti_win = 0; |
---|
150 | n->ti_sum = 0; |
---|
151 | n->ti_urp = 0; |
---|
152 | return (n); |
---|
153 | } |
---|
154 | |
---|
155 | /* |
---|
156 | * Send a single message to the TCP at address specified by |
---|
157 | * the given TCP/IP header. If m == 0, then we make a copy |
---|
158 | * of the tcpiphdr at ti and send directly to the addressed host. |
---|
159 | * This is used to force keep alive messages out using the TCP |
---|
160 | * template for a connection tp->t_template. If flags are given |
---|
161 | * then we send a message back to the TCP which originated the |
---|
162 | * segment ti, and discard the mbuf containing it and any other |
---|
163 | * attached mbufs. |
---|
164 | * |
---|
165 | * In any case the ack and sequence number of the transmitted |
---|
166 | * segment are as specified by the parameters. |
---|
167 | * |
---|
168 | * NOTE: If m != NULL, then ti must point to *inside* the mbuf. |
---|
169 | */ |
---|
170 | void |
---|
171 | tcp_respond(tp, ti, m, ack, seq, flags) |
---|
172 | struct tcpcb *tp; |
---|
173 | register struct tcpiphdr *ti; |
---|
174 | register struct mbuf *m; |
---|
175 | tcp_seq ack, seq; |
---|
176 | int flags; |
---|
177 | { |
---|
178 | register int tlen; |
---|
179 | int win = 0; |
---|
180 | struct route *ro = 0; |
---|
181 | struct route sro; |
---|
182 | |
---|
183 | if (tp) { |
---|
184 | win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); |
---|
185 | ro = &tp->t_inpcb->inp_route; |
---|
186 | } else { |
---|
187 | ro = &sro; |
---|
188 | bzero(ro, sizeof *ro); |
---|
189 | } |
---|
190 | if (m == NULL) { |
---|
191 | m = m_gethdr(M_DONTWAIT, MT_HEADER); |
---|
192 | if (m == NULL) |
---|
193 | return; |
---|
194 | #ifdef TCP_COMPAT_42 |
---|
195 | tlen = 1; |
---|
196 | #else |
---|
197 | tlen = 0; |
---|
198 | #endif |
---|
199 | m->m_data += max_linkhdr; |
---|
200 | *mtod(m, struct tcpiphdr *) = *ti; |
---|
201 | ti = mtod(m, struct tcpiphdr *); |
---|
202 | flags = TH_ACK; |
---|
203 | } else { |
---|
204 | m_freem(m->m_next); |
---|
205 | m->m_next = NULL; |
---|
206 | m->m_data = (caddr_t)ti; |
---|
207 | m->m_len = sizeof (struct tcpiphdr); |
---|
208 | tlen = 0; |
---|
209 | #define xchg(a,b,type) { type t; t=a; a=b; b=t; } |
---|
210 | xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); |
---|
211 | xchg(ti->ti_dport, ti->ti_sport, u_short); |
---|
212 | #undef xchg |
---|
213 | } |
---|
214 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); |
---|
215 | tlen += sizeof (struct tcpiphdr); |
---|
216 | m->m_len = tlen; |
---|
217 | m->m_pkthdr.len = tlen; |
---|
218 | m->m_pkthdr.rcvif = (struct ifnet *) 0; |
---|
219 | ti->ti_next = ti->ti_prev = 0; |
---|
220 | ti->ti_x1 = 0; |
---|
221 | ti->ti_seq = htonl(seq); |
---|
222 | ti->ti_ack = htonl(ack); |
---|
223 | ti->ti_x2 = 0; |
---|
224 | ti->ti_off = sizeof (struct tcphdr) >> 2; |
---|
225 | ti->ti_flags = flags; |
---|
226 | if (tp) |
---|
227 | ti->ti_win = htons((u_short) (win >> tp->rcv_scale)); |
---|
228 | else |
---|
229 | ti->ti_win = htons((u_short)win); |
---|
230 | ti->ti_urp = 0; |
---|
231 | ti->ti_sum = 0; |
---|
232 | ti->ti_sum = in_cksum(m, tlen); |
---|
233 | ((struct ip *)ti)->ip_len = tlen; |
---|
234 | ((struct ip *)ti)->ip_ttl = ip_defttl; |
---|
235 | #ifdef TCPDEBUG |
---|
236 | if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) |
---|
237 | tcp_trace(TA_OUTPUT, 0, tp, ti, 0); |
---|
238 | #endif |
---|
239 | (void) ip_output(m, NULL, ro, 0, NULL); |
---|
240 | if (ro == &sro && ro->ro_rt) { |
---|
241 | RTFREE(ro->ro_rt); |
---|
242 | } |
---|
243 | } |
---|
244 | |
---|
245 | /* |
---|
246 | * Create a new TCP control block, making an |
---|
247 | * empty reassembly queue and hooking it to the argument |
---|
248 | * protocol control block. |
---|
249 | */ |
---|
250 | struct tcpcb * |
---|
251 | tcp_newtcpcb(inp) |
---|
252 | struct inpcb *inp; |
---|
253 | { |
---|
254 | struct tcpcb *tp; |
---|
255 | |
---|
256 | tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); |
---|
257 | if (tp == NULL) |
---|
258 | return ((struct tcpcb *)0); |
---|
259 | bzero((char *) tp, sizeof(struct tcpcb)); |
---|
260 | tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; |
---|
261 | tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; |
---|
262 | |
---|
263 | if (tcp_do_rfc1323) |
---|
264 | tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); |
---|
265 | tp->t_inpcb = inp; |
---|
266 | /* |
---|
267 | * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no |
---|
268 | * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives |
---|
269 | * reasonable initial retransmit time. |
---|
270 | */ |
---|
271 | tp->t_srtt = TCPTV_SRTTBASE; |
---|
272 | tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; |
---|
273 | tp->t_rttmin = TCPTV_MIN; |
---|
274 | tp->t_rxtcur = TCPTV_RTOBASE; |
---|
275 | tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
---|
276 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
---|
277 | inp->inp_ip_ttl = ip_defttl; |
---|
278 | inp->inp_ppcb = (caddr_t)tp; |
---|
279 | return (tp); |
---|
280 | } |
---|
281 | |
---|
282 | /* |
---|
283 | * Drop a TCP connection, reporting |
---|
284 | * the specified error. If connection is synchronized, |
---|
285 | * then send a RST to peer. |
---|
286 | */ |
---|
287 | struct tcpcb * |
---|
288 | tcp_drop(tp, errnum) |
---|
289 | register struct tcpcb *tp; |
---|
290 | int errnum; |
---|
291 | { |
---|
292 | struct socket *so = tp->t_inpcb->inp_socket; |
---|
293 | |
---|
294 | if (TCPS_HAVERCVDSYN(tp->t_state)) { |
---|
295 | tp->t_state = TCPS_CLOSED; |
---|
296 | (void) tcp_output(tp); |
---|
297 | tcpstat.tcps_drops++; |
---|
298 | } else |
---|
299 | tcpstat.tcps_conndrops++; |
---|
300 | if (errnum == ETIMEDOUT && tp->t_softerror) |
---|
301 | errnum = tp->t_softerror; |
---|
302 | so->so_error = errnum; |
---|
303 | return (tcp_close(tp)); |
---|
304 | } |
---|
305 | |
---|
306 | /* |
---|
307 | * Close a TCP control block: |
---|
308 | * discard all space held by the tcp |
---|
309 | * discard internet protocol block |
---|
310 | * wake up any sleepers |
---|
311 | */ |
---|
312 | struct tcpcb * |
---|
313 | tcp_close(tp) |
---|
314 | struct tcpcb *tp; |
---|
315 | { |
---|
316 | register struct tcpiphdr *t; |
---|
317 | struct inpcb *inp = tp->t_inpcb; |
---|
318 | struct socket *so = inp->inp_socket; |
---|
319 | register struct mbuf *m; |
---|
320 | register struct rtentry *rt; |
---|
321 | |
---|
322 | /* |
---|
323 | * If we got enough samples through the srtt filter, |
---|
324 | * save the rtt and rttvar in the routing entry. |
---|
325 | * 'Enough' is arbitrarily defined as the 16 samples. |
---|
326 | * 16 samples is enough for the srtt filter to converge |
---|
327 | * to within 5% of the correct value; fewer samples and |
---|
328 | * we could save a very bogus rtt. |
---|
329 | * |
---|
330 | * Don't update the default route's characteristics and don't |
---|
331 | * update anything that the user "locked". |
---|
332 | */ |
---|
333 | if (tp->t_rttupdated >= 16 && |
---|
334 | (rt = inp->inp_route.ro_rt) && |
---|
335 | ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { |
---|
336 | register u_long i = 0; |
---|
337 | |
---|
338 | if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { |
---|
339 | i = tp->t_srtt * |
---|
340 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); |
---|
341 | if (rt->rt_rmx.rmx_rtt && i) |
---|
342 | /* |
---|
343 | * filter this update to half the old & half |
---|
344 | * the new values, converting scale. |
---|
345 | * See route.h and tcp_var.h for a |
---|
346 | * description of the scaling constants. |
---|
347 | */ |
---|
348 | rt->rt_rmx.rmx_rtt = |
---|
349 | (rt->rt_rmx.rmx_rtt + i) / 2; |
---|
350 | else |
---|
351 | rt->rt_rmx.rmx_rtt = i; |
---|
352 | tcpstat.tcps_cachedrtt++; |
---|
353 | } |
---|
354 | if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { |
---|
355 | i = tp->t_rttvar * |
---|
356 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); |
---|
357 | if (rt->rt_rmx.rmx_rttvar && i) |
---|
358 | rt->rt_rmx.rmx_rttvar = |
---|
359 | (rt->rt_rmx.rmx_rttvar + i) / 2; |
---|
360 | else |
---|
361 | rt->rt_rmx.rmx_rttvar = i; |
---|
362 | tcpstat.tcps_cachedrttvar++; |
---|
363 | } |
---|
364 | /* |
---|
365 | * update the pipelimit (ssthresh) if it has been updated |
---|
366 | * already or if a pipesize was specified & the threshhold |
---|
367 | * got below half the pipesize. I.e., wait for bad news |
---|
368 | * before we start updating, then update on both good |
---|
369 | * and bad news. |
---|
370 | */ |
---|
371 | if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && |
---|
372 | ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) || |
---|
373 | i < (rt->rt_rmx.rmx_sendpipe / 2)) { |
---|
374 | /* |
---|
375 | * convert the limit from user data bytes to |
---|
376 | * packets then to packet data bytes. |
---|
377 | */ |
---|
378 | i = (i + tp->t_maxseg / 2) / tp->t_maxseg; |
---|
379 | if (i < 2) |
---|
380 | i = 2; |
---|
381 | i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); |
---|
382 | if (rt->rt_rmx.rmx_ssthresh) |
---|
383 | rt->rt_rmx.rmx_ssthresh = |
---|
384 | (rt->rt_rmx.rmx_ssthresh + i) / 2; |
---|
385 | else |
---|
386 | rt->rt_rmx.rmx_ssthresh = i; |
---|
387 | tcpstat.tcps_cachedssthresh++; |
---|
388 | } |
---|
389 | } |
---|
390 | /* free the reassembly queue, if any */ |
---|
391 | t = tp->seg_next; |
---|
392 | while (t != (struct tcpiphdr *)tp) { |
---|
393 | t = (struct tcpiphdr *)t->ti_next; |
---|
394 | #if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__))) |
---|
395 | LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m); |
---|
396 | #else |
---|
397 | m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); |
---|
398 | #endif |
---|
399 | remque(t->ti_prev); |
---|
400 | m_freem(m); |
---|
401 | } |
---|
402 | if (tp->t_template) |
---|
403 | (void) m_free(dtom(tp->t_template)); |
---|
404 | free(tp, M_PCB); |
---|
405 | inp->inp_ppcb = 0; |
---|
406 | soisdisconnected(so); |
---|
407 | in_pcbdetach(inp); |
---|
408 | tcpstat.tcps_closed++; |
---|
409 | return ((struct tcpcb *)0); |
---|
410 | } |
---|
411 | |
---|
412 | void |
---|
413 | tcp_drain() |
---|
414 | { |
---|
415 | |
---|
416 | } |
---|
417 | |
---|
418 | /* |
---|
419 | * Notify a tcp user of an asynchronous error; |
---|
420 | * store error as soft error, but wake up user |
---|
421 | * (for now, won't do anything until can select for soft error). |
---|
422 | */ |
---|
423 | static void |
---|
424 | tcp_notify(inp, error) |
---|
425 | struct inpcb *inp; |
---|
426 | int error; |
---|
427 | { |
---|
428 | struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; |
---|
429 | struct socket *so = inp->inp_socket; |
---|
430 | |
---|
431 | /* |
---|
432 | * Ignore some errors if we are hooked up. |
---|
433 | * If connection hasn't completed, has retransmitted several times, |
---|
434 | * and receives a second error, give up now. This is better |
---|
435 | * than waiting a long time to establish a connection that |
---|
436 | * can never complete. |
---|
437 | */ |
---|
438 | if (tp->t_state == TCPS_ESTABLISHED && |
---|
439 | (error == EHOSTUNREACH || error == ENETUNREACH || |
---|
440 | error == EHOSTDOWN)) { |
---|
441 | return; |
---|
442 | } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && |
---|
443 | tp->t_softerror) |
---|
444 | so->so_error = error; |
---|
445 | else |
---|
446 | tp->t_softerror = error; |
---|
447 | soconnwakeup (so); |
---|
448 | sorwakeup(so); |
---|
449 | sowwakeup(so); |
---|
450 | } |
---|
451 | |
---|
452 | #ifdef __rtems__ |
---|
453 | #define INP_INFO_RLOCK(a) |
---|
454 | #define INP_INFO_RUNLOCK(a) |
---|
455 | #define INP_LOCK(a) |
---|
456 | #define INP_UNLOCK(a) |
---|
457 | #endif |
---|
458 | |
---|
459 | static int |
---|
460 | tcp_pcblist(SYSCTL_HANDLER_ARGS) |
---|
461 | { |
---|
462 | int error, i, n, s; |
---|
463 | struct inpcb *inp, **inp_list; |
---|
464 | inp_gen_t gencnt; |
---|
465 | struct xinpgen xig; |
---|
466 | |
---|
467 | /* |
---|
468 | * The process of preparing the TCB list is too time-consuming and |
---|
469 | * resource-intensive to repeat twice on every request. |
---|
470 | */ |
---|
471 | if (req->oldptr == NULL) { |
---|
472 | n = tcbinfo.ipi_count; |
---|
473 | req->oldidx = 2 * (sizeof xig) |
---|
474 | + (n + n/8) * sizeof(struct xtcpcb); |
---|
475 | return (0); |
---|
476 | } |
---|
477 | |
---|
478 | if (req->newptr != NULL) |
---|
479 | return (EPERM); |
---|
480 | |
---|
481 | /* |
---|
482 | * OK, now we're committed to doing something. |
---|
483 | */ |
---|
484 | s = splnet(); |
---|
485 | INP_INFO_RLOCK(&tcbinfo); |
---|
486 | gencnt = tcbinfo.ipi_gencnt; |
---|
487 | n = tcbinfo.ipi_count; |
---|
488 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
489 | splx(s); |
---|
490 | |
---|
491 | sysctl_wire_old_buffer(req, 2 * (sizeof xig) |
---|
492 | + n * sizeof(struct xtcpcb)); |
---|
493 | |
---|
494 | xig.xig_len = sizeof xig; |
---|
495 | xig.xig_count = n; |
---|
496 | xig.xig_gen = gencnt; |
---|
497 | /* xig.xig_sogen = so_gencnt; remove by ccj */ |
---|
498 | error = SYSCTL_OUT(req, &xig, sizeof xig); |
---|
499 | if (error) |
---|
500 | return error; |
---|
501 | |
---|
502 | /* ccj add exit if the count is 0 */ |
---|
503 | if (!n) |
---|
504 | return error; |
---|
505 | |
---|
506 | inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); |
---|
507 | if (inp_list == 0) |
---|
508 | return ENOMEM; |
---|
509 | |
---|
510 | s = splnet(); |
---|
511 | INP_INFO_RLOCK(&tcbinfo); |
---|
512 | for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; |
---|
513 | inp = LIST_NEXT(inp, inp_list)) { |
---|
514 | INP_LOCK(inp); |
---|
515 | if (inp->inp_gencnt <= gencnt) |
---|
516 | #if 0 |
---|
517 | && |
---|
518 | cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) |
---|
519 | #endif |
---|
520 | inp_list[i++] = inp; |
---|
521 | INP_UNLOCK(inp); |
---|
522 | } |
---|
523 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
524 | splx(s); |
---|
525 | n = i; |
---|
526 | |
---|
527 | error = 0; |
---|
528 | for (i = 0; i < n; i++) { |
---|
529 | inp = inp_list[i]; |
---|
530 | INP_LOCK(inp); |
---|
531 | if (inp->inp_gencnt <= gencnt) { |
---|
532 | struct xtcpcb xt; |
---|
533 | caddr_t inp_ppcb; |
---|
534 | xt.xt_len = sizeof xt; |
---|
535 | /* XXX should avoid extra copy */ |
---|
536 | bcopy(inp, &xt.xt_inp, sizeof *inp); |
---|
537 | inp_ppcb = inp->inp_ppcb; |
---|
538 | if (inp_ppcb != NULL) |
---|
539 | bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); |
---|
540 | else |
---|
541 | bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); |
---|
542 | #if 0 |
---|
543 | if (inp->inp_socket) |
---|
544 | sotoxsocket(inp->inp_socket, &xt.xt_socket); |
---|
545 | #endif |
---|
546 | error = SYSCTL_OUT(req, &xt, sizeof xt); |
---|
547 | } |
---|
548 | INP_UNLOCK(inp); |
---|
549 | } |
---|
550 | if (!error) { |
---|
551 | /* |
---|
552 | * Give the user an updated idea of our state. |
---|
553 | * If the generation differs from what we told |
---|
554 | * her before, she knows that something happened |
---|
555 | * while we were processing this request, and it |
---|
556 | * might be necessary to retry. |
---|
557 | */ |
---|
558 | s = splnet(); |
---|
559 | INP_INFO_RLOCK(&tcbinfo); |
---|
560 | xig.xig_gen = tcbinfo.ipi_gencnt; |
---|
561 | #if 0 |
---|
562 | xig.xig_sogen = so_gencnt; |
---|
563 | #endif |
---|
564 | xig.xig_count = tcbinfo.ipi_count; |
---|
565 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
566 | splx(s); |
---|
567 | error = SYSCTL_OUT(req, &xig, sizeof xig); |
---|
568 | } |
---|
569 | free(inp_list, M_TEMP); |
---|
570 | return error; |
---|
571 | } |
---|
572 | |
---|
573 | SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, |
---|
574 | tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); |
---|
575 | |
---|
576 | void |
---|
577 | tcp_ctlinput(cmd, sa, vip) |
---|
578 | int cmd; |
---|
579 | struct sockaddr *sa; |
---|
580 | void *vip; |
---|
581 | { |
---|
582 | struct ip *ip = vip; |
---|
583 | struct tcphdr *th; |
---|
584 | void (*notify) __P((struct inpcb *, int)) = tcp_notify; |
---|
585 | |
---|
586 | if (cmd == PRC_QUENCH) |
---|
587 | notify = tcp_quench; |
---|
588 | #if 1 |
---|
589 | else if (cmd == PRC_MSGSIZE) |
---|
590 | notify = tcp_mtudisc; |
---|
591 | #endif |
---|
592 | else if (!PRC_IS_REDIRECT(cmd) && |
---|
593 | ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) |
---|
594 | return; |
---|
595 | if (ip) { |
---|
596 | th = (struct tcphdr *)((caddr_t)ip |
---|
597 | + (IP_VHL_HL(ip->ip_vhl) << 2)); |
---|
598 | in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, |
---|
599 | cmd, notify); |
---|
600 | } else |
---|
601 | in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); |
---|
602 | } |
---|
603 | |
---|
604 | /* |
---|
605 | * When a source quench is received, close congestion window |
---|
606 | * to one segment. We will gradually open it again as we proceed. |
---|
607 | */ |
---|
608 | void |
---|
609 | tcp_quench(inp, errnum) |
---|
610 | struct inpcb *inp; |
---|
611 | int errnum; |
---|
612 | { |
---|
613 | struct tcpcb *tp = intotcpcb(inp); |
---|
614 | |
---|
615 | if (tp) |
---|
616 | tp->snd_cwnd = tp->t_maxseg; |
---|
617 | } |
---|
618 | |
---|
619 | /* |
---|
620 | * When `need fragmentation' ICMP is received, update our idea of the MSS |
---|
621 | * based on the new value in the route. Also nudge TCP to send something, |
---|
622 | * since we know the packet we just sent was dropped. |
---|
623 | * This duplicates some code in the tcp_mss() function in tcp_input.c. |
---|
624 | */ |
---|
625 | void |
---|
626 | tcp_mtudisc(inp, errnum) |
---|
627 | struct inpcb *inp; |
---|
628 | int errnum; |
---|
629 | { |
---|
630 | struct tcpcb *tp = intotcpcb(inp); |
---|
631 | struct rtentry *rt; |
---|
632 | struct rmxp_tao *taop; |
---|
633 | struct socket *so = inp->inp_socket; |
---|
634 | int offered; |
---|
635 | int mss; |
---|
636 | |
---|
637 | if (tp) { |
---|
638 | rt = tcp_rtlookup(inp); |
---|
639 | if (!rt || !rt->rt_rmx.rmx_mtu) { |
---|
640 | tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; |
---|
641 | return; |
---|
642 | } |
---|
643 | taop = rmx_taop(rt->rt_rmx); |
---|
644 | offered = taop->tao_mssopt; |
---|
645 | mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); |
---|
646 | if (offered) |
---|
647 | mss = min(mss, offered); |
---|
648 | /* |
---|
649 | * XXX - The above conditional probably violates the TCP |
---|
650 | * spec. The problem is that, since we don't know the |
---|
651 | * other end's MSS, we are supposed to use a conservative |
---|
652 | * default. But, if we do that, then MTU discovery will |
---|
653 | * never actually take place, because the conservative |
---|
654 | * default is much less than the MTUs typically seen |
---|
655 | * on the Internet today. For the moment, we'll sweep |
---|
656 | * this under the carpet. |
---|
657 | * |
---|
658 | * The conservative default might not actually be a problem |
---|
659 | * if the only case this occurs is when sending an initial |
---|
660 | * SYN with options and data to a host we've never talked |
---|
661 | * to before. Then, they will reply with an MSS value which |
---|
662 | * will get recorded and the new parameters should get |
---|
663 | * recomputed. For Further Study. |
---|
664 | */ |
---|
665 | if (tp->t_maxopd <= mss) |
---|
666 | return; |
---|
667 | tp->t_maxopd = mss; |
---|
668 | |
---|
669 | if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && |
---|
670 | (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) |
---|
671 | mss -= TCPOLEN_TSTAMP_APPA; |
---|
672 | if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && |
---|
673 | (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) |
---|
674 | mss -= TCPOLEN_CC_APPA; |
---|
675 | #if (MCLBYTES & (MCLBYTES - 1)) == 0 |
---|
676 | if (mss > MCLBYTES) |
---|
677 | mss &= ~(MCLBYTES-1); |
---|
678 | #else |
---|
679 | if (mss > MCLBYTES) |
---|
680 | mss = mss / MCLBYTES * MCLBYTES; |
---|
681 | #endif |
---|
682 | if (so->so_snd.sb_hiwat < mss) |
---|
683 | mss = so->so_snd.sb_hiwat; |
---|
684 | |
---|
685 | tp->t_maxseg = mss; |
---|
686 | |
---|
687 | tcpstat.tcps_mturesent++; |
---|
688 | tp->t_rtt = 0; |
---|
689 | tp->snd_nxt = tp->snd_una; |
---|
690 | tcp_output(tp); |
---|
691 | } |
---|
692 | } |
---|
693 | |
---|
694 | /* |
---|
695 | * Look-up the routing entry to the peer of this inpcb. If no route |
---|
696 | * is found and it cannot be allocated, then return NULL. This routine |
---|
697 | * is called by TCP routines that access the rmx structure and by tcp_mss |
---|
698 | * to get the interface MTU. |
---|
699 | */ |
---|
700 | struct rtentry * |
---|
701 | tcp_rtlookup(inp) |
---|
702 | struct inpcb *inp; |
---|
703 | { |
---|
704 | struct route *ro; |
---|
705 | struct rtentry *rt; |
---|
706 | |
---|
707 | ro = &inp->inp_route; |
---|
708 | rt = ro->ro_rt; |
---|
709 | if (rt == NULL || !(rt->rt_flags & RTF_UP)) { |
---|
710 | /* No route yet, so try to acquire one */ |
---|
711 | if (inp->inp_faddr.s_addr != INADDR_ANY) { |
---|
712 | ro->ro_dst.sa_family = AF_INET; |
---|
713 | ro->ro_dst.sa_len = sizeof(ro->ro_dst); |
---|
714 | ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = |
---|
715 | inp->inp_faddr; |
---|
716 | rtalloc(ro); |
---|
717 | rt = ro->ro_rt; |
---|
718 | } |
---|
719 | } |
---|
720 | return rt; |
---|
721 | } |
---|
722 | |
---|
723 | /* |
---|
724 | * Return a pointer to the cached information about the remote host. |
---|
725 | * The cached information is stored in the protocol specific part of |
---|
726 | * the route metrics. |
---|
727 | */ |
---|
728 | struct rmxp_tao * |
---|
729 | tcp_gettaocache(inp) |
---|
730 | struct inpcb *inp; |
---|
731 | { |
---|
732 | struct rtentry *rt = tcp_rtlookup(inp); |
---|
733 | |
---|
734 | /* Make sure this is a host route and is up. */ |
---|
735 | if (rt == NULL || |
---|
736 | (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) |
---|
737 | return NULL; |
---|
738 | |
---|
739 | return rmx_taop(rt->rt_rmx); |
---|
740 | } |
---|