1 | #include <machine/rtems-bsd-kernel-space.h> |
---|
2 | |
---|
3 | /* |
---|
4 | * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 |
---|
5 | * The Regents of the University of California. All rights reserved. |
---|
6 | * |
---|
7 | * Redistribution and use in source and binary forms, with or without |
---|
8 | * modification, are permitted provided that the following conditions |
---|
9 | * are met: |
---|
10 | * 1. Redistributions of source code must retain the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer. |
---|
12 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
13 | * notice, this list of conditions and the following disclaimer in the |
---|
14 | * documentation and/or other materials provided with the distribution. |
---|
15 | * 4. Neither the name of the University nor the names of its contributors |
---|
16 | * may be used to endorse or promote products derived from this software |
---|
17 | * without specific prior written permission. |
---|
18 | * |
---|
19 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
29 | * SUCH DAMAGE. |
---|
30 | * |
---|
31 | * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 |
---|
32 | * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.226 2005/05/07 00:41:36 cperciva Exp $ |
---|
33 | */ |
---|
34 | |
---|
35 | |
---|
36 | #ifdef HAVE_CONFIG_H |
---|
37 | #include "config.h" |
---|
38 | #endif |
---|
39 | |
---|
40 | #include "opt_tcpdebug.h" |
---|
41 | |
---|
42 | #include <sys/param.h> |
---|
43 | #include <sys/queue.h> |
---|
44 | #include <sys/proc.h> |
---|
45 | #include <sys/systm.h> |
---|
46 | #include <sys/kernel.h> |
---|
47 | #include <sys/sysctl.h> |
---|
48 | #include <sys/malloc.h> |
---|
49 | #include <sys/mbuf.h> |
---|
50 | #include <sys/socket.h> |
---|
51 | #include <sys/socketvar.h> |
---|
52 | #include <sys/protosw.h> |
---|
53 | #include <errno.h> |
---|
54 | |
---|
55 | #include <net/route.h> |
---|
56 | #include <net/if.h> |
---|
57 | |
---|
58 | #define _IP_VHL |
---|
59 | #include <netinet/in.h> |
---|
60 | #include <rtems/rtems_netinet_in.h> |
---|
61 | #include <netinet/in_systm.h> |
---|
62 | #include <netinet/ip.h> |
---|
63 | #include <netinet/in_pcb.h> |
---|
64 | #include <netinet/in_var.h> |
---|
65 | #include <netinet/ip_var.h> |
---|
66 | #include <netinet/ip_icmp.h> |
---|
67 | #include <netinet/tcp.h> |
---|
68 | #include <netinet/tcp_fsm.h> |
---|
69 | #include <netinet/tcp_seq.h> |
---|
70 | #include <netinet/tcp_timer.h> |
---|
71 | #include <netinet/tcp_var.h> |
---|
72 | #include <netinet/tcpip.h> |
---|
73 | #ifdef TCPDEBUG |
---|
74 | #include <netinet/tcp_debug.h> |
---|
75 | #endif |
---|
76 | |
---|
77 | int tcp_mssdflt = TCP_MSS; |
---|
78 | SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, |
---|
79 | &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); |
---|
80 | |
---|
81 | static int tcp_do_rfc1323 = 1; |
---|
82 | #if !defined(__rtems__) |
---|
83 | static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; |
---|
84 | SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, |
---|
85 | CTLFLAG_RW, &tcp_rttdflt , 0, ""); |
---|
86 | |
---|
87 | SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, |
---|
88 | CTLFLAG_RW, &tcp_do_rfc1323 , 0, ""); |
---|
89 | #endif |
---|
90 | |
---|
91 | static void tcp_notify(struct inpcb *, int); |
---|
92 | |
---|
93 | /* |
---|
94 | * Target size of TCP PCB hash table. Will be rounded down to a prime |
---|
95 | * number. |
---|
96 | */ |
---|
97 | #ifndef TCBHASHSIZE |
---|
98 | #define TCBHASHSIZE 128 |
---|
99 | #endif |
---|
100 | |
---|
101 | /* |
---|
102 | * Tcp initialization |
---|
103 | */ |
---|
104 | void |
---|
105 | tcp_init(void) |
---|
106 | { |
---|
107 | |
---|
108 | tcp_iss = random(); /* wrong, but better than a constant */ |
---|
109 | tcp_ccgen = 1; |
---|
110 | LIST_INIT(&tcb); |
---|
111 | tcbinfo.listhead = &tcb; |
---|
112 | tcbinfo.hashbase = hashinit(TCBHASHSIZE, M_PCB, &tcbinfo.hashmask); |
---|
113 | if (max_protohdr < sizeof(struct tcpiphdr)) |
---|
114 | max_protohdr = sizeof(struct tcpiphdr); |
---|
115 | if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN) |
---|
116 | panic("tcp_init"); |
---|
117 | } |
---|
118 | |
---|
119 | /* |
---|
120 | * Create template to be used to send tcp packets on a connection. |
---|
121 | * Call after host entry created, allocates an mbuf and fills |
---|
122 | * in a skeletal tcp/ip header, minimizing the amount of work |
---|
123 | * necessary when the connection is used. |
---|
124 | */ |
---|
125 | struct tcpiphdr * |
---|
126 | tcp_template(struct tcpcb *tp) |
---|
127 | { |
---|
128 | register struct inpcb *inp = tp->t_inpcb; |
---|
129 | register struct mbuf *m; |
---|
130 | register struct tcpiphdr *n; |
---|
131 | |
---|
132 | if ((n = tp->t_template) == 0) { |
---|
133 | m = m_get(M_DONTWAIT, MT_HEADER); |
---|
134 | if (m == NULL) |
---|
135 | return (0); |
---|
136 | m->m_len = sizeof (struct tcpiphdr); |
---|
137 | n = mtod(m, struct tcpiphdr *); |
---|
138 | } |
---|
139 | n->ti_next = n->ti_prev = 0; |
---|
140 | n->ti_x1 = 0; |
---|
141 | n->ti_pr = IPPROTO_TCP; |
---|
142 | n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); |
---|
143 | n->ti_src = inp->inp_laddr; |
---|
144 | n->ti_dst = inp->inp_faddr; |
---|
145 | n->ti_sport = inp->inp_lport; |
---|
146 | n->ti_dport = inp->inp_fport; |
---|
147 | n->ti_seq = 0; |
---|
148 | n->ti_ack = 0; |
---|
149 | n->ti_x2 = 0; |
---|
150 | n->ti_off = 5; |
---|
151 | n->ti_flags = 0; |
---|
152 | n->ti_win = 0; |
---|
153 | n->ti_sum = 0; |
---|
154 | n->ti_urp = 0; |
---|
155 | return (n); |
---|
156 | } |
---|
157 | |
---|
158 | /* |
---|
159 | * Send a single message to the TCP at address specified by |
---|
160 | * the given TCP/IP header. If m == 0, then we make a copy |
---|
161 | * of the tcpiphdr at ti and send directly to the addressed host. |
---|
162 | * This is used to force keep alive messages out using the TCP |
---|
163 | * template for a connection tp->t_template. If flags are given |
---|
164 | * then we send a message back to the TCP which originated the |
---|
165 | * segment ti, and discard the mbuf containing it and any other |
---|
166 | * attached mbufs. |
---|
167 | * |
---|
168 | * In any case the ack and sequence number of the transmitted |
---|
169 | * segment are as specified by the parameters. |
---|
170 | * |
---|
171 | * NOTE: If m != NULL, then ti must point to *inside* the mbuf. |
---|
172 | */ |
---|
173 | void |
---|
174 | tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, |
---|
175 | tcp_seq ack, tcp_seq seq, int flags) |
---|
176 | { |
---|
177 | register int tlen; |
---|
178 | int win = 0; |
---|
179 | struct route *ro = 0; |
---|
180 | struct route sro; |
---|
181 | |
---|
182 | if (tp) { |
---|
183 | win = sbspace(&tp->t_inpcb->inp_socket->so_rcv); |
---|
184 | ro = &tp->t_inpcb->inp_route; |
---|
185 | } else { |
---|
186 | ro = &sro; |
---|
187 | bzero(ro, sizeof *ro); |
---|
188 | } |
---|
189 | if (m == NULL) { |
---|
190 | m = m_gethdr(M_DONTWAIT, MT_HEADER); |
---|
191 | if (m == NULL) |
---|
192 | return; |
---|
193 | #ifdef TCP_COMPAT_42 |
---|
194 | tlen = 1; |
---|
195 | #else |
---|
196 | tlen = 0; |
---|
197 | #endif |
---|
198 | m->m_data += max_linkhdr; |
---|
199 | *mtod(m, struct tcpiphdr *) = *ti; |
---|
200 | ti = mtod(m, struct tcpiphdr *); |
---|
201 | flags = TH_ACK; |
---|
202 | } else { |
---|
203 | m_freem(m->m_next); |
---|
204 | m->m_next = NULL; |
---|
205 | m->m_data = (caddr_t)ti; |
---|
206 | m->m_len = sizeof (struct tcpiphdr); |
---|
207 | tlen = 0; |
---|
208 | #define xchg(a,b,type) { type t; t=a; a=b; b=t; } |
---|
209 | xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long); |
---|
210 | xchg(ti->ti_dport, ti->ti_sport, u_short); |
---|
211 | #undef xchg |
---|
212 | } |
---|
213 | ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); |
---|
214 | tlen += sizeof (struct tcpiphdr); |
---|
215 | m->m_len = tlen; |
---|
216 | m->m_pkthdr.len = tlen; |
---|
217 | m->m_pkthdr.rcvif = (struct ifnet *) 0; |
---|
218 | ti->ti_next = ti->ti_prev = 0; |
---|
219 | ti->ti_x1 = 0; |
---|
220 | ti->ti_seq = htonl(seq); |
---|
221 | ti->ti_ack = htonl(ack); |
---|
222 | ti->ti_x2 = 0; |
---|
223 | ti->ti_off = sizeof (struct tcphdr) >> 2; |
---|
224 | ti->ti_flags = flags; |
---|
225 | if (tp) |
---|
226 | ti->ti_win = htons((u_short) (win >> tp->rcv_scale)); |
---|
227 | else |
---|
228 | ti->ti_win = htons((u_short)win); |
---|
229 | ti->ti_urp = 0; |
---|
230 | ti->ti_sum = 0; |
---|
231 | ti->ti_sum = in_cksum(m, tlen); |
---|
232 | ((struct ip *)ti)->ip_len = tlen; |
---|
233 | ((struct ip *)ti)->ip_ttl = ip_defttl; |
---|
234 | #ifdef TCPDEBUG |
---|
235 | if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) |
---|
236 | tcp_trace(TA_OUTPUT, 0, tp, ti, 0); |
---|
237 | #endif |
---|
238 | (void) ip_output(m, NULL, ro, 0, NULL); |
---|
239 | if (ro == &sro && ro->ro_rt) { |
---|
240 | RTFREE(ro->ro_rt); |
---|
241 | } |
---|
242 | } |
---|
243 | |
---|
244 | /* |
---|
245 | * Create a new TCP control block, making an |
---|
246 | * empty reassembly queue and hooking it to the argument |
---|
247 | * protocol control block. |
---|
248 | */ |
---|
249 | struct tcpcb * |
---|
250 | tcp_newtcpcb(struct inpcb *inp) |
---|
251 | { |
---|
252 | struct tcpcb *tp; |
---|
253 | |
---|
254 | tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT); |
---|
255 | if (tp == NULL) |
---|
256 | return ((struct tcpcb *)0); |
---|
257 | bzero((char *) tp, sizeof(struct tcpcb)); |
---|
258 | tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; |
---|
259 | tp->t_maxseg = tp->t_maxopd = tcp_mssdflt; |
---|
260 | |
---|
261 | if (tcp_do_rfc1323) |
---|
262 | tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); |
---|
263 | tp->t_inpcb = inp; |
---|
264 | /* |
---|
265 | * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no |
---|
266 | * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives |
---|
267 | * reasonable initial retransmit time. |
---|
268 | */ |
---|
269 | tp->t_srtt = TCPTV_SRTTBASE; |
---|
270 | tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; |
---|
271 | tp->t_rttmin = TCPTV_MIN; |
---|
272 | tp->t_rxtcur = TCPTV_RTOBASE; |
---|
273 | tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
---|
274 | tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; |
---|
275 | inp->inp_ip_ttl = ip_defttl; |
---|
276 | inp->inp_ppcb = (caddr_t)tp; |
---|
277 | return (tp); |
---|
278 | } |
---|
279 | |
---|
280 | /* |
---|
281 | * Drop a TCP connection, reporting |
---|
282 | * the specified error. If connection is synchronized, |
---|
283 | * then send a RST to peer. |
---|
284 | */ |
---|
285 | struct tcpcb * |
---|
286 | tcp_drop(struct tcpcb *tp, int errnum) |
---|
287 | { |
---|
288 | struct socket *so = tp->t_inpcb->inp_socket; |
---|
289 | |
---|
290 | if (TCPS_HAVERCVDSYN(tp->t_state)) { |
---|
291 | tp->t_state = TCPS_CLOSED; |
---|
292 | (void) tcp_output(tp); |
---|
293 | tcpstat.tcps_drops++; |
---|
294 | } else |
---|
295 | tcpstat.tcps_conndrops++; |
---|
296 | if (errnum == ETIMEDOUT && tp->t_softerror) |
---|
297 | errnum = tp->t_softerror; |
---|
298 | so->so_error = errnum; |
---|
299 | return (tcp_close(tp)); |
---|
300 | } |
---|
301 | |
---|
302 | /* |
---|
303 | * Close a TCP control block: |
---|
304 | * discard all space held by the tcp |
---|
305 | * discard internet protocol block |
---|
306 | * wake up any sleepers |
---|
307 | */ |
---|
308 | struct tcpcb * |
---|
309 | tcp_close(struct tcpcb *tp) |
---|
310 | { |
---|
311 | register struct tcpiphdr *t; |
---|
312 | struct inpcb *inp = tp->t_inpcb; |
---|
313 | struct socket *so = inp->inp_socket; |
---|
314 | register struct mbuf *m; |
---|
315 | register struct rtentry *rt; |
---|
316 | |
---|
317 | /* |
---|
318 | * If we got enough samples through the srtt filter, |
---|
319 | * save the rtt and rttvar in the routing entry. |
---|
320 | * 'Enough' is arbitrarily defined as the 16 samples. |
---|
321 | * 16 samples is enough for the srtt filter to converge |
---|
322 | * to within 5% of the correct value; fewer samples and |
---|
323 | * we could save a very bogus rtt. |
---|
324 | * |
---|
325 | * Don't update the default route's characteristics and don't |
---|
326 | * update anything that the user "locked". |
---|
327 | */ |
---|
328 | if (tp->t_rttupdated >= 16 && |
---|
329 | (rt = inp->inp_route.ro_rt) && |
---|
330 | ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) { |
---|
331 | register u_long i = 0; |
---|
332 | |
---|
333 | if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { |
---|
334 | i = tp->t_srtt * |
---|
335 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); |
---|
336 | if (rt->rt_rmx.rmx_rtt && i) |
---|
337 | /* |
---|
338 | * filter this update to half the old & half |
---|
339 | * the new values, converting scale. |
---|
340 | * See route.h and tcp_var.h for a |
---|
341 | * description of the scaling constants. |
---|
342 | */ |
---|
343 | rt->rt_rmx.rmx_rtt = |
---|
344 | (rt->rt_rmx.rmx_rtt + i) / 2; |
---|
345 | else |
---|
346 | rt->rt_rmx.rmx_rtt = i; |
---|
347 | tcpstat.tcps_cachedrtt++; |
---|
348 | } |
---|
349 | if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) { |
---|
350 | i = tp->t_rttvar * |
---|
351 | (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); |
---|
352 | if (rt->rt_rmx.rmx_rttvar && i) |
---|
353 | rt->rt_rmx.rmx_rttvar = |
---|
354 | (rt->rt_rmx.rmx_rttvar + i) / 2; |
---|
355 | else |
---|
356 | rt->rt_rmx.rmx_rttvar = i; |
---|
357 | tcpstat.tcps_cachedrttvar++; |
---|
358 | } |
---|
359 | /* |
---|
360 | * update the pipelimit (ssthresh) if it has been updated |
---|
361 | * already or if a pipesize was specified & the threshhold |
---|
362 | * got below half the pipesize. I.e., wait for bad news |
---|
363 | * before we start updating, then update on both good |
---|
364 | * and bad news. |
---|
365 | */ |
---|
366 | if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 && |
---|
367 | ((i = tp->snd_ssthresh) != 0) && rt->rt_rmx.rmx_ssthresh) || |
---|
368 | i < (rt->rt_rmx.rmx_sendpipe / 2)) { |
---|
369 | /* |
---|
370 | * convert the limit from user data bytes to |
---|
371 | * packets then to packet data bytes. |
---|
372 | */ |
---|
373 | i = (i + tp->t_maxseg / 2) / tp->t_maxseg; |
---|
374 | if (i < 2) |
---|
375 | i = 2; |
---|
376 | i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr)); |
---|
377 | if (rt->rt_rmx.rmx_ssthresh) |
---|
378 | rt->rt_rmx.rmx_ssthresh = |
---|
379 | (rt->rt_rmx.rmx_ssthresh + i) / 2; |
---|
380 | else |
---|
381 | rt->rt_rmx.rmx_ssthresh = i; |
---|
382 | tcpstat.tcps_cachedssthresh++; |
---|
383 | } |
---|
384 | } |
---|
385 | /* free the reassembly queue, if any */ |
---|
386 | t = tp->seg_next; |
---|
387 | while (t != (struct tcpiphdr *)tp) { |
---|
388 | t = (struct tcpiphdr *)t->ti_next; |
---|
389 | #if (defined(__GNUC__) && (defined(__arm__) || defined(__mips__))) |
---|
390 | LD32_UNALGN((struct tcpiphdr *)t->ti_prev,m); |
---|
391 | #else |
---|
392 | m = REASS_MBUF((struct tcpiphdr *)t->ti_prev); |
---|
393 | #endif |
---|
394 | remque(t->ti_prev); |
---|
395 | m_freem(m); |
---|
396 | } |
---|
397 | if (tp->t_template) |
---|
398 | (void) m_free(dtom(tp->t_template)); |
---|
399 | free(tp, M_PCB); |
---|
400 | inp->inp_ppcb = 0; |
---|
401 | soisdisconnected(so); |
---|
402 | in_pcbdetach(inp); |
---|
403 | tcpstat.tcps_closed++; |
---|
404 | return ((struct tcpcb *)0); |
---|
405 | } |
---|
406 | |
---|
407 | void |
---|
408 | tcp_drain(void) |
---|
409 | { |
---|
410 | |
---|
411 | } |
---|
412 | |
---|
413 | /* |
---|
414 | * Notify a tcp user of an asynchronous error; |
---|
415 | * store error as soft error, but wake up user |
---|
416 | * (for now, won't do anything until can select for soft error). |
---|
417 | */ |
---|
418 | static void |
---|
419 | tcp_notify(struct inpcb *inp, int error) |
---|
420 | { |
---|
421 | struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; |
---|
422 | struct socket *so = inp->inp_socket; |
---|
423 | |
---|
424 | /* |
---|
425 | * Ignore some errors if we are hooked up. |
---|
426 | * If connection hasn't completed, has retransmitted several times, |
---|
427 | * and receives a second error, give up now. This is better |
---|
428 | * than waiting a long time to establish a connection that |
---|
429 | * can never complete. |
---|
430 | */ |
---|
431 | if (tp->t_state == TCPS_ESTABLISHED && |
---|
432 | (error == EHOSTUNREACH || error == ENETUNREACH || |
---|
433 | error == EHOSTDOWN)) { |
---|
434 | return; |
---|
435 | } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && |
---|
436 | tp->t_softerror) |
---|
437 | so->so_error = error; |
---|
438 | else |
---|
439 | tp->t_softerror = error; |
---|
440 | soconnwakeup (so); |
---|
441 | sorwakeup(so); |
---|
442 | sowwakeup(so); |
---|
443 | } |
---|
444 | |
---|
445 | #ifdef __rtems__ |
---|
446 | #define INP_INFO_RLOCK(a) |
---|
447 | #define INP_INFO_RUNLOCK(a) |
---|
448 | #define INP_LOCK(a) |
---|
449 | #define INP_UNLOCK(a) |
---|
450 | #endif |
---|
451 | |
---|
452 | static int |
---|
453 | tcp_pcblist(SYSCTL_HANDLER_ARGS) |
---|
454 | { |
---|
455 | int error, i, n, s; |
---|
456 | struct inpcb *inp, **inp_list; |
---|
457 | inp_gen_t gencnt; |
---|
458 | struct xinpgen xig; |
---|
459 | |
---|
460 | /* |
---|
461 | * The process of preparing the TCB list is too time-consuming and |
---|
462 | * resource-intensive to repeat twice on every request. |
---|
463 | */ |
---|
464 | if (req->oldptr == NULL) { |
---|
465 | n = tcbinfo.ipi_count; |
---|
466 | req->oldidx = 2 * (sizeof xig) |
---|
467 | + (n + n/8) * sizeof(struct xtcpcb); |
---|
468 | return (0); |
---|
469 | } |
---|
470 | |
---|
471 | if (req->newptr != NULL) |
---|
472 | return (EPERM); |
---|
473 | |
---|
474 | /* |
---|
475 | * OK, now we're committed to doing something. |
---|
476 | */ |
---|
477 | s = splnet(); |
---|
478 | INP_INFO_RLOCK(&tcbinfo); |
---|
479 | gencnt = tcbinfo.ipi_gencnt; |
---|
480 | n = tcbinfo.ipi_count; |
---|
481 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
482 | splx(s); |
---|
483 | |
---|
484 | sysctl_wire_old_buffer(req, 2 * (sizeof xig) |
---|
485 | + n * sizeof(struct xtcpcb)); |
---|
486 | |
---|
487 | xig.xig_len = sizeof xig; |
---|
488 | xig.xig_count = n; |
---|
489 | xig.xig_gen = gencnt; |
---|
490 | /* xig.xig_sogen = so_gencnt; remove by ccj */ |
---|
491 | error = SYSCTL_OUT(req, &xig, sizeof xig); |
---|
492 | if (error) |
---|
493 | return error; |
---|
494 | |
---|
495 | /* ccj add exit if the count is 0 */ |
---|
496 | if (!n) |
---|
497 | return error; |
---|
498 | |
---|
499 | inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); |
---|
500 | if (inp_list == 0) |
---|
501 | return ENOMEM; |
---|
502 | |
---|
503 | s = splnet(); |
---|
504 | INP_INFO_RLOCK(&tcbinfo); |
---|
505 | for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; |
---|
506 | inp = LIST_NEXT(inp, inp_list)) { |
---|
507 | INP_LOCK(inp); |
---|
508 | if (inp->inp_gencnt <= gencnt) |
---|
509 | #if 0 |
---|
510 | && |
---|
511 | cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) |
---|
512 | #endif |
---|
513 | inp_list[i++] = inp; |
---|
514 | INP_UNLOCK(inp); |
---|
515 | } |
---|
516 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
517 | splx(s); |
---|
518 | n = i; |
---|
519 | |
---|
520 | error = 0; |
---|
521 | for (i = 0; i < n; i++) { |
---|
522 | inp = inp_list[i]; |
---|
523 | INP_LOCK(inp); |
---|
524 | if (inp->inp_gencnt <= gencnt) { |
---|
525 | struct xtcpcb xt; |
---|
526 | caddr_t inp_ppcb; |
---|
527 | xt.xt_len = sizeof xt; |
---|
528 | /* XXX should avoid extra copy */ |
---|
529 | bcopy(inp, &xt.xt_inp, sizeof *inp); |
---|
530 | inp_ppcb = inp->inp_ppcb; |
---|
531 | if (inp_ppcb != NULL) |
---|
532 | bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); |
---|
533 | else |
---|
534 | bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); |
---|
535 | #if 0 |
---|
536 | if (inp->inp_socket) |
---|
537 | sotoxsocket(inp->inp_socket, &xt.xt_socket); |
---|
538 | #endif |
---|
539 | error = SYSCTL_OUT(req, &xt, sizeof xt); |
---|
540 | } |
---|
541 | INP_UNLOCK(inp); |
---|
542 | } |
---|
543 | if (!error) { |
---|
544 | /* |
---|
545 | * Give the user an updated idea of our state. |
---|
546 | * If the generation differs from what we told |
---|
547 | * her before, she knows that something happened |
---|
548 | * while we were processing this request, and it |
---|
549 | * might be necessary to retry. |
---|
550 | */ |
---|
551 | s = splnet(); |
---|
552 | INP_INFO_RLOCK(&tcbinfo); |
---|
553 | xig.xig_gen = tcbinfo.ipi_gencnt; |
---|
554 | #if 0 |
---|
555 | xig.xig_sogen = so_gencnt; |
---|
556 | #endif |
---|
557 | xig.xig_count = tcbinfo.ipi_count; |
---|
558 | INP_INFO_RUNLOCK(&tcbinfo); |
---|
559 | splx(s); |
---|
560 | error = SYSCTL_OUT(req, &xig, sizeof xig); |
---|
561 | } |
---|
562 | free(inp_list, M_TEMP); |
---|
563 | return error; |
---|
564 | } |
---|
565 | |
---|
566 | SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, |
---|
567 | tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); |
---|
568 | |
---|
569 | void |
---|
570 | tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) |
---|
571 | { |
---|
572 | struct ip *ip = vip; |
---|
573 | struct tcphdr *th; |
---|
574 | void (*notify)(struct inpcb *, int) = tcp_notify; |
---|
575 | |
---|
576 | if (cmd == PRC_QUENCH) |
---|
577 | notify = tcp_quench; |
---|
578 | #if 1 |
---|
579 | else if (cmd == PRC_MSGSIZE) |
---|
580 | notify = tcp_mtudisc; |
---|
581 | #endif |
---|
582 | else if (!PRC_IS_REDIRECT(cmd) && |
---|
583 | ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)) |
---|
584 | return; |
---|
585 | if (ip != NULL) { |
---|
586 | #ifdef _IP_VHL |
---|
587 | th = (struct tcphdr *)((caddr_t)ip |
---|
588 | + (IP_VHL_HL(ip->ip_vhl) << 2)); |
---|
589 | #else |
---|
590 | th = (struct tcphdr *)((caddr_t)ip |
---|
591 | + (ip->ip_hl << 2)); |
---|
592 | #endif |
---|
593 | in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport, |
---|
594 | cmd, notify); |
---|
595 | } else |
---|
596 | in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify); |
---|
597 | } |
---|
598 | |
---|
599 | /* |
---|
600 | * When a source quench is received, close congestion window |
---|
601 | * to one segment. We will gradually open it again as we proceed. |
---|
602 | */ |
---|
603 | void |
---|
604 | tcp_quench( struct inpcb *inp, int errnum) |
---|
605 | { |
---|
606 | struct tcpcb *tp = intotcpcb(inp); |
---|
607 | |
---|
608 | if (tp) |
---|
609 | tp->snd_cwnd = tp->t_maxseg; |
---|
610 | } |
---|
611 | |
---|
612 | /* |
---|
613 | * When `need fragmentation' ICMP is received, update our idea of the MSS |
---|
614 | * based on the new value in the route. Also nudge TCP to send something, |
---|
615 | * since we know the packet we just sent was dropped. |
---|
616 | * This duplicates some code in the tcp_mss() function in tcp_input.c. |
---|
617 | */ |
---|
618 | void |
---|
619 | tcp_mtudisc(struct inpcb *inp, int errnum) |
---|
620 | { |
---|
621 | struct tcpcb *tp = intotcpcb(inp); |
---|
622 | struct rtentry *rt; |
---|
623 | struct rmxp_tao *taop; |
---|
624 | struct socket *so = inp->inp_socket; |
---|
625 | int offered; |
---|
626 | int mss; |
---|
627 | |
---|
628 | if (tp) { |
---|
629 | rt = tcp_rtlookup(inp); |
---|
630 | if (!rt || !rt->rt_rmx.rmx_mtu) { |
---|
631 | tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; |
---|
632 | return; |
---|
633 | } |
---|
634 | taop = rmx_taop(rt->rt_rmx); |
---|
635 | offered = taop->tao_mssopt; |
---|
636 | mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); |
---|
637 | if (offered) |
---|
638 | mss = min(mss, offered); |
---|
639 | /* |
---|
640 | * XXX - The above conditional probably violates the TCP |
---|
641 | * spec. The problem is that, since we don't know the |
---|
642 | * other end's MSS, we are supposed to use a conservative |
---|
643 | * default. But, if we do that, then MTU discovery will |
---|
644 | * never actually take place, because the conservative |
---|
645 | * default is much less than the MTUs typically seen |
---|
646 | * on the Internet today. For the moment, we'll sweep |
---|
647 | * this under the carpet. |
---|
648 | * |
---|
649 | * The conservative default might not actually be a problem |
---|
650 | * if the only case this occurs is when sending an initial |
---|
651 | * SYN with options and data to a host we've never talked |
---|
652 | * to before. Then, they will reply with an MSS value which |
---|
653 | * will get recorded and the new parameters should get |
---|
654 | * recomputed. For Further Study. |
---|
655 | */ |
---|
656 | if (tp->t_maxopd <= mss) |
---|
657 | return; |
---|
658 | tp->t_maxopd = mss; |
---|
659 | |
---|
660 | if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && |
---|
661 | (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) |
---|
662 | mss -= TCPOLEN_TSTAMP_APPA; |
---|
663 | if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && |
---|
664 | (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC) |
---|
665 | mss -= TCPOLEN_CC_APPA; |
---|
666 | #if (MCLBYTES & (MCLBYTES - 1)) == 0 |
---|
667 | if (mss > MCLBYTES) |
---|
668 | mss &= ~(MCLBYTES-1); |
---|
669 | #else |
---|
670 | if (mss > MCLBYTES) |
---|
671 | mss = mss / MCLBYTES * MCLBYTES; |
---|
672 | #endif |
---|
673 | if (so->so_snd.sb_hiwat < mss) |
---|
674 | mss = so->so_snd.sb_hiwat; |
---|
675 | |
---|
676 | tp->t_maxseg = mss; |
---|
677 | |
---|
678 | tcpstat.tcps_mturesent++; |
---|
679 | tp->t_rtt = 0; |
---|
680 | tp->snd_nxt = tp->snd_una; |
---|
681 | tcp_output(tp); |
---|
682 | } |
---|
683 | } |
---|
684 | |
---|
685 | /* |
---|
686 | * Look-up the routing entry to the peer of this inpcb. If no route |
---|
687 | * is found and it cannot be allocated, then return NULL. This routine |
---|
688 | * is called by TCP routines that access the rmx structure and by tcp_mss |
---|
689 | * to get the interface MTU. |
---|
690 | */ |
---|
691 | struct rtentry * |
---|
692 | tcp_rtlookup(struct inpcb *inp) |
---|
693 | { |
---|
694 | struct route *ro; |
---|
695 | struct rtentry *rt; |
---|
696 | |
---|
697 | ro = &inp->inp_route; |
---|
698 | rt = ro->ro_rt; |
---|
699 | if (rt == NULL || !(rt->rt_flags & RTF_UP)) { |
---|
700 | /* No route yet, so try to acquire one */ |
---|
701 | if (inp->inp_faddr.s_addr != INADDR_ANY) { |
---|
702 | ro->ro_dst.sa_family = AF_INET; |
---|
703 | ro->ro_dst.sa_len = sizeof(ro->ro_dst); |
---|
704 | ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = |
---|
705 | inp->inp_faddr; |
---|
706 | rtalloc(ro); |
---|
707 | rt = ro->ro_rt; |
---|
708 | } |
---|
709 | } |
---|
710 | return rt; |
---|
711 | } |
---|
712 | |
---|
713 | /* |
---|
714 | * Return a pointer to the cached information about the remote host. |
---|
715 | * The cached information is stored in the protocol specific part of |
---|
716 | * the route metrics. |
---|
717 | */ |
---|
718 | struct rmxp_tao * |
---|
719 | tcp_gettaocache(struct inpcb *inp) |
---|
720 | { |
---|
721 | struct rtentry *rt = tcp_rtlookup(inp); |
---|
722 | |
---|
723 | /* Make sure this is a host route and is up. */ |
---|
724 | if (rt == NULL || |
---|
725 | (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) |
---|
726 | return NULL; |
---|
727 | |
---|
728 | return rmx_taop(rt->rt_rmx); |
---|
729 | } |
---|