1 | /* |
---|
2 | * Copyright (c) 1982, 1986, 1993, 1994, 1995 |
---|
3 | * The Regents of the University of California. All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 3. All advertising materials mentioning features or use of this software |
---|
14 | * must display the following acknowledgement: |
---|
15 | * This product includes software developed by the University of |
---|
16 | * California, Berkeley and its contributors. |
---|
17 | * 4. Neither the name of the University nor the names of its contributors |
---|
18 | * may be used to endorse or promote products derived from this software |
---|
19 | * without specific prior written permission. |
---|
20 | * |
---|
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
31 | * SUCH DAMAGE. |
---|
32 | * |
---|
33 | * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 |
---|
34 | * $Id$ |
---|
35 | */ |
---|
36 | |
---|
37 | #ifndef _NETINET_TCP_VAR_H_ |
---|
38 | #define _NETINET_TCP_VAR_H_ |
---|
39 | /* |
---|
40 | * Kernel variables for tcp. |
---|
41 | */ |
---|
42 | |
---|
43 | /* |
---|
44 | * Tcp control block, one per tcp; fields: |
---|
45 | */ |
---|
46 | struct tcpcb { |
---|
47 | struct tcpiphdr *seg_next; /* sequencing queue */ |
---|
48 | struct tcpiphdr *seg_prev; |
---|
49 | int t_state; /* state of this connection */ |
---|
50 | int t_timer[TCPT_NTIMERS]; /* tcp timers */ |
---|
51 | int t_rxtshift; /* log(2) of rexmt exp. backoff */ |
---|
52 | int t_rxtcur; /* current retransmit value */ |
---|
53 | int t_dupacks; /* consecutive dup acks recd */ |
---|
54 | u_int t_maxseg; /* maximum segment size */ |
---|
55 | u_int t_maxopd; /* mss plus options */ |
---|
56 | int t_force; /* 1 if forcing out a byte */ |
---|
57 | u_int t_flags; |
---|
58 | #define TF_ACKNOW 0x0001 /* ack peer immediately */ |
---|
59 | #define TF_DELACK 0x0002 /* ack, but try to delay it */ |
---|
60 | #define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ |
---|
61 | #define TF_NOOPT 0x0008 /* don't use tcp options */ |
---|
62 | #define TF_SENTFIN 0x0010 /* have sent FIN */ |
---|
63 | #define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ |
---|
64 | #define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ |
---|
65 | #define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ |
---|
66 | #define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ |
---|
67 | #define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ |
---|
68 | #define TF_NEEDSYN 0x0400 /* send SYN (implicit state) */ |
---|
69 | #define TF_NEEDFIN 0x0800 /* send FIN (implicit state) */ |
---|
70 | #define TF_NOPUSH 0x1000 /* don't push */ |
---|
71 | #define TF_REQ_CC 0x2000 /* have/will request CC */ |
---|
72 | #define TF_RCVD_CC 0x4000 /* a CC was received in SYN */ |
---|
73 | #define TF_SENDCCNEW 0x8000 /* send CCnew instead of CC in SYN */ |
---|
74 | |
---|
75 | struct tcpiphdr *t_template; /* skeletal packet for transmit */ |
---|
76 | struct inpcb *t_inpcb; /* back pointer to internet pcb */ |
---|
77 | /* |
---|
78 | * The following fields are used as in the protocol specification. |
---|
79 | * See RFC783, Dec. 1981, page 21. |
---|
80 | */ |
---|
81 | /* send sequence variables */ |
---|
82 | tcp_seq snd_una; /* send unacknowledged */ |
---|
83 | tcp_seq snd_nxt; /* send next */ |
---|
84 | tcp_seq snd_up; /* send urgent pointer */ |
---|
85 | tcp_seq snd_wl1; /* window update seg seq number */ |
---|
86 | tcp_seq snd_wl2; /* window update seg ack number */ |
---|
87 | tcp_seq iss; /* initial send sequence number */ |
---|
88 | u_long snd_wnd; /* send window */ |
---|
89 | /* receive sequence variables */ |
---|
90 | u_long rcv_wnd; /* receive window */ |
---|
91 | tcp_seq rcv_nxt; /* receive next */ |
---|
92 | tcp_seq rcv_up; /* receive urgent pointer */ |
---|
93 | tcp_seq irs; /* initial receive sequence number */ |
---|
94 | /* |
---|
95 | * Additional variables for this implementation. |
---|
96 | */ |
---|
97 | /* receive variables */ |
---|
98 | tcp_seq rcv_adv; /* advertised window */ |
---|
99 | /* retransmit variables */ |
---|
100 | tcp_seq snd_max; /* highest sequence number sent; |
---|
101 | * used to recognize retransmits |
---|
102 | */ |
---|
103 | /* congestion control (for slow start, source quench, retransmit after loss) */ |
---|
104 | u_long snd_cwnd; /* congestion-controlled window */ |
---|
105 | u_long snd_ssthresh; /* snd_cwnd size threshold for |
---|
106 | * for slow start exponential to |
---|
107 | * linear switch |
---|
108 | */ |
---|
109 | /* |
---|
110 | * transmit timing stuff. See below for scale of srtt and rttvar. |
---|
111 | * "Variance" is actually smoothed difference. |
---|
112 | */ |
---|
113 | u_int t_idle; /* inactivity time */ |
---|
114 | int t_rtt; /* round trip time */ |
---|
115 | tcp_seq t_rtseq; /* sequence number being timed */ |
---|
116 | int t_srtt; /* smoothed round-trip time */ |
---|
117 | int t_rttvar; /* variance in round-trip time */ |
---|
118 | u_int t_rttmin; /* minimum rtt allowed */ |
---|
119 | u_long max_sndwnd; /* largest window peer has offered */ |
---|
120 | |
---|
121 | /* out-of-band data */ |
---|
122 | char t_oobflags; /* have some */ |
---|
123 | char t_iobc; /* input character */ |
---|
124 | #define TCPOOB_HAVEDATA 0x01 |
---|
125 | #define TCPOOB_HADDATA 0x02 |
---|
126 | int t_softerror; /* possible error not yet reported */ |
---|
127 | |
---|
128 | /* RFC 1323 variables */ |
---|
129 | u_char snd_scale; /* window scaling for send window */ |
---|
130 | u_char rcv_scale; /* window scaling for recv window */ |
---|
131 | u_char request_r_scale; /* pending window scaling */ |
---|
132 | u_char requested_s_scale; |
---|
133 | u_long ts_recent; /* timestamp echo data */ |
---|
134 | u_long ts_recent_age; /* when last updated */ |
---|
135 | tcp_seq last_ack_sent; |
---|
136 | /* RFC 1644 variables */ |
---|
137 | tcp_cc cc_send; /* send connection count */ |
---|
138 | tcp_cc cc_recv; /* receive connection count */ |
---|
139 | u_long t_duration; /* connection duration */ |
---|
140 | |
---|
141 | /* TUBA stuff */ |
---|
142 | caddr_t t_tuba_pcb; /* next level down pcb for TCP over z */ |
---|
143 | /* More RTT stuff */ |
---|
144 | u_long t_rttupdated; /* number of times rtt sampled */ |
---|
145 | }; |
---|
146 | |
---|
147 | /* |
---|
148 | * Structure to hold TCP options that are only used during segment |
---|
149 | * processing (in tcp_input), but not held in the tcpcb. |
---|
150 | * It's basically used to reduce the number of parameters |
---|
151 | * to tcp_dooptions. |
---|
152 | */ |
---|
153 | struct tcpopt { |
---|
154 | u_long to_flag; /* which options are present */ |
---|
155 | #define TOF_TS 0x0001 /* timestamp */ |
---|
156 | #define TOF_CC 0x0002 /* CC and CCnew are exclusive */ |
---|
157 | #define TOF_CCNEW 0x0004 |
---|
158 | #define TOF_CCECHO 0x0008 |
---|
159 | u_long to_tsval; |
---|
160 | u_long to_tsecr; |
---|
161 | tcp_cc to_cc; /* holds CC or CCnew */ |
---|
162 | tcp_cc to_ccecho; |
---|
163 | }; |
---|
164 | |
---|
165 | /* |
---|
166 | * The TAO cache entry which is stored in the protocol family specific |
---|
167 | * portion of the route metrics. |
---|
168 | */ |
---|
169 | struct rmxp_tao { |
---|
170 | tcp_cc tao_cc; /* latest CC in valid SYN */ |
---|
171 | tcp_cc tao_ccsent; /* latest CC sent to peer */ |
---|
172 | u_short tao_mssopt; /* peer's cached MSS */ |
---|
173 | #ifdef notyet |
---|
174 | u_short tao_flags; /* cache status flags */ |
---|
175 | #define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */ |
---|
176 | #define TAOF_OK 0x0002 /* peer does understand rfc1644 */ |
---|
177 | #define TAOF_UNDEF 0 /* we don't know yet */ |
---|
178 | #endif /* notyet */ |
---|
179 | }; |
---|
180 | #define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) |
---|
181 | |
---|
182 | #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) |
---|
183 | #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) |
---|
184 | |
---|
185 | /* |
---|
186 | * The smoothed round-trip time and estimated variance |
---|
187 | * are stored as fixed point numbers scaled by the values below. |
---|
188 | * For convenience, these scales are also used in smoothing the average |
---|
189 | * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). |
---|
190 | * With these scales, srtt has 3 bits to the right of the binary point, |
---|
191 | * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the |
---|
192 | * binary point, and is smoothed with an ALPHA of 0.75. |
---|
193 | */ |
---|
194 | #define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ |
---|
195 | #define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ |
---|
196 | #define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ |
---|
197 | #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ |
---|
198 | #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ |
---|
199 | |
---|
200 | /* |
---|
201 | * The initial retransmission should happen at rtt + 4 * rttvar. |
---|
202 | * Because of the way we do the smoothing, srtt and rttvar |
---|
203 | * will each average +1/2 tick of bias. When we compute |
---|
204 | * the retransmit timer, we want 1/2 tick of rounding and |
---|
205 | * 1 extra tick because of +-1/2 tick uncertainty in the |
---|
206 | * firing of the timer. The bias will give us exactly the |
---|
207 | * 1.5 tick we need. But, because the bias is |
---|
208 | * statistical, we have to test that we don't drop below |
---|
209 | * the minimum feasible timer (which is 2 ticks). |
---|
210 | * This version of the macro adapted from a paper by Lawrence |
---|
211 | * Brakmo and Larry Peterson which outlines a problem caused |
---|
212 | * by insufficient precision in the original implementation, |
---|
213 | * which results in inappropriately large RTO values for very |
---|
214 | * fast networks. |
---|
215 | */ |
---|
216 | #define TCP_REXMTVAL(tp) \ |
---|
217 | ((((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ |
---|
218 | + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) |
---|
219 | |
---|
220 | /* XXX |
---|
221 | * We want to avoid doing m_pullup on incoming packets but that |
---|
222 | * means avoiding dtom on the tcp reassembly code. That in turn means |
---|
223 | * keeping an mbuf pointer in the reassembly queue (since we might |
---|
224 | * have a cluster). As a quick hack, the source & destination |
---|
225 | * port numbers (which are no longer needed once we've located the |
---|
226 | * tcpcb) are overlayed with an mbuf pointer. |
---|
227 | */ |
---|
228 | #define REASS_MBUF(ti) (*(struct mbuf **)&((ti)->ti_t)) |
---|
229 | |
---|
230 | /* |
---|
231 | * TCP statistics. |
---|
232 | * Many of these should be kept per connection, |
---|
233 | * but that's inconvenient at the moment. |
---|
234 | */ |
---|
235 | struct tcpstat { |
---|
236 | u_long tcps_connattempt; /* connections initiated */ |
---|
237 | u_long tcps_accepts; /* connections accepted */ |
---|
238 | u_long tcps_connects; /* connections established */ |
---|
239 | u_long tcps_drops; /* connections dropped */ |
---|
240 | u_long tcps_conndrops; /* embryonic connections dropped */ |
---|
241 | u_long tcps_closed; /* conn. closed (includes drops) */ |
---|
242 | u_long tcps_segstimed; /* segs where we tried to get rtt */ |
---|
243 | u_long tcps_rttupdated; /* times we succeeded */ |
---|
244 | u_long tcps_delack; /* delayed acks sent */ |
---|
245 | u_long tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ |
---|
246 | u_long tcps_rexmttimeo; /* retransmit timeouts */ |
---|
247 | u_long tcps_persisttimeo; /* persist timeouts */ |
---|
248 | u_long tcps_keeptimeo; /* keepalive timeouts */ |
---|
249 | u_long tcps_keepprobe; /* keepalive probes sent */ |
---|
250 | u_long tcps_keepdrops; /* connections dropped in keepalive */ |
---|
251 | |
---|
252 | u_long tcps_sndtotal; /* total packets sent */ |
---|
253 | u_long tcps_sndpack; /* data packets sent */ |
---|
254 | u_long tcps_sndbyte; /* data bytes sent */ |
---|
255 | u_long tcps_sndrexmitpack; /* data packets retransmitted */ |
---|
256 | u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ |
---|
257 | u_long tcps_sndacks; /* ack-only packets sent */ |
---|
258 | u_long tcps_sndprobe; /* window probes sent */ |
---|
259 | u_long tcps_sndurg; /* packets sent with URG only */ |
---|
260 | u_long tcps_sndwinup; /* window update-only packets sent */ |
---|
261 | u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ |
---|
262 | |
---|
263 | u_long tcps_rcvtotal; /* total packets received */ |
---|
264 | u_long tcps_rcvpack; /* packets received in sequence */ |
---|
265 | u_long tcps_rcvbyte; /* bytes received in sequence */ |
---|
266 | u_long tcps_rcvbadsum; /* packets received with ccksum errs */ |
---|
267 | u_long tcps_rcvbadoff; /* packets received with bad offset */ |
---|
268 | u_long tcps_rcvshort; /* packets received too short */ |
---|
269 | u_long tcps_rcvduppack; /* duplicate-only packets received */ |
---|
270 | u_long tcps_rcvdupbyte; /* duplicate-only bytes received */ |
---|
271 | u_long tcps_rcvpartduppack; /* packets with some duplicate data */ |
---|
272 | u_long tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ |
---|
273 | u_long tcps_rcvoopack; /* out-of-order packets received */ |
---|
274 | u_long tcps_rcvoobyte; /* out-of-order bytes received */ |
---|
275 | u_long tcps_rcvpackafterwin; /* packets with data after window */ |
---|
276 | u_long tcps_rcvbyteafterwin; /* bytes rcvd after window */ |
---|
277 | u_long tcps_rcvafterclose; /* packets rcvd after "close" */ |
---|
278 | u_long tcps_rcvwinprobe; /* rcvd window probe packets */ |
---|
279 | u_long tcps_rcvdupack; /* rcvd duplicate acks */ |
---|
280 | u_long tcps_rcvacktoomuch; /* rcvd acks for unsent data */ |
---|
281 | u_long tcps_rcvackpack; /* rcvd ack packets */ |
---|
282 | u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ |
---|
283 | u_long tcps_rcvwinupd; /* rcvd window update packets */ |
---|
284 | u_long tcps_pawsdrop; /* segments dropped due to PAWS */ |
---|
285 | u_long tcps_predack; /* times hdr predict ok for acks */ |
---|
286 | u_long tcps_preddat; /* times hdr predict ok for data pkts */ |
---|
287 | u_long tcps_pcbcachemiss; |
---|
288 | u_long tcps_cachedrtt; /* times cached RTT in route updated */ |
---|
289 | u_long tcps_cachedrttvar; /* times cached rttvar updated */ |
---|
290 | u_long tcps_cachedssthresh; /* times cached ssthresh updated */ |
---|
291 | u_long tcps_usedrtt; /* times RTT initialized from route */ |
---|
292 | u_long tcps_usedrttvar; /* times RTTVAR initialized from rt */ |
---|
293 | u_long tcps_usedssthresh; /* times ssthresh initialized from rt*/ |
---|
294 | u_long tcps_persistdrop; /* timeout in persist state */ |
---|
295 | u_long tcps_badsyn; /* bogus SYN, e.g. premature ACK */ |
---|
296 | u_long tcps_mturesent; /* resends due to MTU discovery */ |
---|
297 | u_long tcps_listendrop; /* listen queue overflows */ |
---|
298 | }; |
---|
299 | |
---|
300 | /* |
---|
301 | * Names for TCP sysctl objects |
---|
302 | */ |
---|
303 | #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ |
---|
304 | #define TCPCTL_DO_RFC1644 2 /* use RFC-1644 extensions */ |
---|
305 | #define TCPCTL_MSSDFLT 3 /* MSS default */ |
---|
306 | #define TCPCTL_STATS 4 /* statistics (read-only) */ |
---|
307 | #define TCPCTL_RTTDFLT 5 /* default RTT estimate */ |
---|
308 | #define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ |
---|
309 | #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ |
---|
310 | #define TCPCTL_SENDSPACE 8 /* send buffer space */ |
---|
311 | #define TCPCTL_RECVSPACE 9 /* receive buffer space */ |
---|
312 | #define TCPCTL_KEEPINIT 10 /* receive buffer space */ |
---|
313 | #define TCPCTL_MAXID 11 |
---|
314 | |
---|
315 | #define TCPCTL_NAMES { \ |
---|
316 | { 0, 0 }, \ |
---|
317 | { "rfc1323", CTLTYPE_INT }, \ |
---|
318 | { "rfc1644", CTLTYPE_INT }, \ |
---|
319 | { "mssdflt", CTLTYPE_INT }, \ |
---|
320 | { "stats", CTLTYPE_STRUCT }, \ |
---|
321 | { "rttdflt", CTLTYPE_INT }, \ |
---|
322 | { "keepidle", CTLTYPE_INT }, \ |
---|
323 | { "keepintvl", CTLTYPE_INT }, \ |
---|
324 | { "sendspace", CTLTYPE_INT }, \ |
---|
325 | { "recvspace", CTLTYPE_INT }, \ |
---|
326 | { "keepinit", CTLTYPE_INT }, \ |
---|
327 | } |
---|
328 | |
---|
329 | #ifdef KERNEL |
---|
330 | extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ |
---|
331 | extern struct inpcbinfo tcbinfo; |
---|
332 | extern struct tcpstat tcpstat; /* tcp statistics */ |
---|
333 | extern int tcp_mssdflt; /* XXX */ |
---|
334 | extern u_long tcp_now; /* for RFC 1323 timestamps */ |
---|
335 | |
---|
336 | void tcp_canceltimers __P((struct tcpcb *)); |
---|
337 | struct tcpcb * |
---|
338 | tcp_close __P((struct tcpcb *)); |
---|
339 | void tcp_ctlinput __P((int, struct sockaddr *, void *)); |
---|
340 | int tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); |
---|
341 | struct tcpcb * |
---|
342 | tcp_drop __P((struct tcpcb *, int)); |
---|
343 | void tcp_drain __P((void)); |
---|
344 | void tcp_fasttimo __P((void)); |
---|
345 | struct rmxp_tao * |
---|
346 | tcp_gettaocache __P((struct inpcb *)); |
---|
347 | void tcp_init __P((void)); |
---|
348 | void tcp_input __P((struct mbuf *, int)); |
---|
349 | void tcp_mss __P((struct tcpcb *, int)); |
---|
350 | int tcp_mssopt __P((struct tcpcb *)); |
---|
351 | void tcp_mtudisc __P((struct inpcb *, int)); |
---|
352 | struct tcpcb * |
---|
353 | tcp_newtcpcb __P((struct inpcb *)); |
---|
354 | int tcp_output __P((struct tcpcb *)); |
---|
355 | void tcp_quench __P((struct inpcb *, int)); |
---|
356 | void tcp_respond __P((struct tcpcb *, |
---|
357 | struct tcpiphdr *, struct mbuf *, u_long, u_long, int)); |
---|
358 | struct rtentry * |
---|
359 | tcp_rtlookup __P((struct inpcb *)); |
---|
360 | void tcp_setpersist __P((struct tcpcb *)); |
---|
361 | void tcp_slowtimo __P((void)); |
---|
362 | struct tcpiphdr * |
---|
363 | tcp_template __P((struct tcpcb *)); |
---|
364 | struct tcpcb * |
---|
365 | tcp_timers __P((struct tcpcb *, int)); |
---|
366 | void tcp_trace __P((int, int, struct tcpcb *, struct tcpiphdr *, int)); |
---|
367 | |
---|
368 | extern struct pr_usrreqs tcp_usrreqs; |
---|
369 | extern u_long tcp_sendspace; |
---|
370 | extern u_long tcp_recvspace; |
---|
371 | |
---|
372 | #endif /* KERNEL */ |
---|
373 | |
---|
374 | #endif /* _NETINET_TCP_VAR_H_ */ |
---|