blob: f9e56272e4284e1b9c762cf2e30b4432b8376a72 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Functions operating on SOCK_STREAM and buffers.
3 *
Willy Tarreaue09e0ce2007-03-18 16:31:29 +01004 * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17
18#include <sys/socket.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020023#include <common/config.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020024#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/polling.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
30#include <proto/client.h>
31#include <proto/fd.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020032#include <proto/stream_sock.h>
33#include <proto/task.h>
34
35
36/*
Willy Tarreaud7971282006-07-29 18:36:34 +020037 * this function is called on a read event from a stream socket.
Willy Tarreaubaaee002006-06-26 02:48:02 +020038 * It returns 0.
39 */
Willy Tarreaud7971282006-07-29 18:36:34 +020040int stream_sock_read(int fd) {
Willy Tarreau54469402006-07-29 16:59:06 +020041 struct buffer *b = fdtab[fd].cb[DIR_RD].b;
Willy Tarreaubaaee002006-06-26 02:48:02 +020042 int ret, max;
Willy Tarreaub8949f12007-03-23 22:39:59 +010043 int read_poll = MAX_READ_POLL_LOOPS;
Willy Tarreaubaaee002006-06-26 02:48:02 +020044
45#ifdef DEBUG_FULL
Willy Tarreaud7971282006-07-29 18:36:34 +020046 fprintf(stderr,"stream_sock_read : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +020047#endif
48
49 if (fdtab[fd].state != FD_STERROR) {
Willy Tarreaub8949f12007-03-23 22:39:59 +010050 while (read_poll-- > 0)
Willy Tarreaubaaee002006-06-26 02:48:02 +020051 {
52 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
Willy Tarreaue09e0ce2007-03-18 16:31:29 +010053 b->r = b->w = b->lr = b->data;
Willy Tarreaubaaee002006-06-26 02:48:02 +020054 max = b->rlim - b->data;
55 }
56 else if (b->r > b->w) {
57 max = b->rlim - b->r;
58 }
59 else {
60 max = b->w - b->r;
61 /* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore
62 * since it means that the rewrite protection has been removed. This
63 * implies that the if statement can be removed.
64 */
65 if (max > b->rlim - b->data)
66 max = b->rlim - b->data;
67 }
68
69 if (max == 0) { /* not anymore room to store data */
Willy Tarreau2a429502006-10-15 14:52:29 +020070 MY_FD_CLR(fd, StaticReadEvent);
Willy Tarreaubaaee002006-06-26 02:48:02 +020071 break;
72 }
73
74#ifndef MSG_NOSIGNAL
75 {
76 int skerr;
77 socklen_t lskerr = sizeof(skerr);
78
Willy Tarreauc6423482006-10-15 14:59:03 +020079 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
80 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +020081 ret = -1;
82 else
83 ret = recv(fd, b->r, max, 0);
84 }
85#else
86 ret = recv(fd, b->r, max, MSG_NOSIGNAL);
87#endif
88 if (ret > 0) {
89 b->r += ret;
90 b->l += ret;
Willy Tarreau0f9f5052006-07-29 17:39:25 +020091 b->flags |= BF_PARTIAL_READ;
Willy Tarreaubaaee002006-06-26 02:48:02 +020092
93 if (b->r == b->data + BUFSIZE) {
94 b->r = b->data; /* wrap around the buffer */
95 }
96
97 b->total += ret;
Willy Tarreau9641e8f2007-03-23 23:02:09 +010098
99 /* generally if we read something smaller than the 1 or 2 MSS,
100 * it means that it's not worth trying to read again.
101 */
102 if (ret < MIN_RET_FOR_READ_LOOP)
103 break;
104 if (!read_poll)
105 break;
106
Willy Tarreaubaaee002006-06-26 02:48:02 +0200107 /* we hope to read more data or to get a close on next round */
108 continue;
109 }
110 else if (ret == 0) {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200111 b->flags |= BF_READ_NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200112 break;
113 }
114 else if (errno == EAGAIN) {/* ignore EAGAIN */
115 break;
116 }
117 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200118 b->flags |= BF_READ_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200119 fdtab[fd].state = FD_STERROR;
120 break;
121 }
122 } /* while(1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200123 }
124 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200125 b->flags |= BF_READ_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200126 fdtab[fd].state = FD_STERROR;
127 }
128
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200129 if (b->flags & BF_READ_STATUS) {
Willy Tarreau2a429502006-10-15 14:52:29 +0200130 if (b->rto && MY_FD_ISSET(fd, StaticReadEvent))
Willy Tarreaud7971282006-07-29 18:36:34 +0200131 tv_delayfrom(&b->rex, &now, b->rto);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200132 else
Willy Tarreaud7971282006-07-29 18:36:34 +0200133 tv_eternity(&b->rex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200134
Willy Tarreaud7971282006-07-29 18:36:34 +0200135 task_wakeup(&rq, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200136 }
137
138 return 0;
139}
140
141
142/*
Willy Tarreauf8306d52006-07-29 19:01:31 +0200143 * this function is called on a write event from a stream socket.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200144 * It returns 0.
145 */
Willy Tarreauf8306d52006-07-29 19:01:31 +0200146int stream_sock_write(int fd) {
Willy Tarreau54469402006-07-29 16:59:06 +0200147 struct buffer *b = fdtab[fd].cb[DIR_WR].b;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200148 int ret, max;
149
150#ifdef DEBUG_FULL
Willy Tarreauf8306d52006-07-29 19:01:31 +0200151 fprintf(stderr,"stream_sock_write : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200152#endif
153
154 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
Willy Tarreaue09e0ce2007-03-18 16:31:29 +0100155 b->r = b->w = b->lr = b->data;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200156 max = 0;
157 }
158 else if (b->r > b->w) {
159 max = b->r - b->w;
160 }
161 else
162 max = b->data + BUFSIZE - b->w;
163
164 if (fdtab[fd].state != FD_STERROR) {
165 if (max == 0) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200166 /* may be we have received a connection acknowledgement in TCP mode without data */
167 if (fdtab[fd].state == FD_STCONN) {
168 int skerr;
169 socklen_t lskerr = sizeof(skerr);
Willy Tarreauc6423482006-10-15 14:59:03 +0200170 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
171 if (ret == -1 || skerr) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200172 b->flags |= BF_WRITE_ERROR;
173 fdtab[fd].state = FD_STERROR;
174 task_wakeup(&rq, fdtab[fd].owner);
175 tv_eternity(&b->wex);
Willy Tarreau2a429502006-10-15 14:52:29 +0200176 MY_FD_CLR(fd, StaticWriteEvent);
Willy Tarreauf8306d52006-07-29 19:01:31 +0200177 return 0;
178 }
179 }
180
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200181 b->flags |= BF_WRITE_NULL;
Willy Tarreauf8306d52006-07-29 19:01:31 +0200182 task_wakeup(&rq, fdtab[fd].owner);
183 fdtab[fd].state = FD_STREADY;
Willy Tarreaud7971282006-07-29 18:36:34 +0200184 tv_eternity(&b->wex);
Willy Tarreau2a429502006-10-15 14:52:29 +0200185 MY_FD_CLR(fd, StaticWriteEvent);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200186 return 0;
187 }
188
189#ifndef MSG_NOSIGNAL
190 {
191 int skerr;
192 socklen_t lskerr = sizeof(skerr);
193
Willy Tarreauc6423482006-10-15 14:59:03 +0200194 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
195 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200196 ret = -1;
197 else
198 ret = send(fd, b->w, max, MSG_DONTWAIT);
199 }
200#else
201 ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
202#endif
203
204 if (ret > 0) {
205 b->l -= ret;
206 b->w += ret;
207
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200208 b->flags |= BF_PARTIAL_WRITE;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200209
210 if (b->w == b->data + BUFSIZE) {
211 b->w = b->data; /* wrap around the buffer */
212 }
213 }
214 else if (ret == 0) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200215 /* nothing written, just pretend we were never called */
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200216 // b->flags |= BF_WRITE_NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200217 return 0;
218 }
219 else if (errno == EAGAIN) /* ignore EAGAIN */
220 return 0;
221 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200222 b->flags |= BF_WRITE_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200223 fdtab[fd].state = FD_STERROR;
224 }
225 }
226 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200227 b->flags |= BF_WRITE_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200228 fdtab[fd].state = FD_STERROR;
229 }
230
Willy Tarreaud7971282006-07-29 18:36:34 +0200231 if (b->wto) {
232 tv_delayfrom(&b->wex, &now, b->wto);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200233 /* FIXME: to prevent the client from expiring read timeouts during writes,
234 * we refresh it. A solution would be to merge read+write timeouts into a
235 * unique one, although that needs some study particularly on full-duplex
236 * TCP connections. */
Willy Tarreaud7971282006-07-29 18:36:34 +0200237 b->rex = b->wex;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200238 }
239 else
Willy Tarreaud7971282006-07-29 18:36:34 +0200240 tv_eternity(&b->wex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200241
Willy Tarreauf8306d52006-07-29 19:01:31 +0200242 task_wakeup(&rq, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200243 return 0;
244}
245
Willy Tarreaubaaee002006-06-26 02:48:02 +0200246
247
248/*
249 * Local variables:
250 * c-indent-level: 8
251 * c-basic-offset: 8
252 * End:
253 */