blob: 4e081172500819f5f1388e14e5b562c0816bb064 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Functions operating on SOCK_STREAM and buffers.
3 *
Willy Tarreaue09e0ce2007-03-18 16:31:29 +01004 * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17
18#include <sys/socket.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020023#include <common/config.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020024#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020025
Willy Tarreaubaaee002006-06-26 02:48:02 +020026#include <types/buffers.h>
27#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028#include <types/polling.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
30#include <proto/client.h>
31#include <proto/fd.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020032#include <proto/stream_sock.h>
33#include <proto/task.h>
34
35
36/*
Willy Tarreaud7971282006-07-29 18:36:34 +020037 * this function is called on a read event from a stream socket.
Willy Tarreaubaaee002006-06-26 02:48:02 +020038 * It returns 0.
39 */
Willy Tarreaud7971282006-07-29 18:36:34 +020040int stream_sock_read(int fd) {
Willy Tarreau54469402006-07-29 16:59:06 +020041 struct buffer *b = fdtab[fd].cb[DIR_RD].b;
Willy Tarreaubaaee002006-06-26 02:48:02 +020042 int ret, max;
Willy Tarreaub8949f12007-03-23 22:39:59 +010043 int read_poll = MAX_READ_POLL_LOOPS;
Willy Tarreaubaaee002006-06-26 02:48:02 +020044
45#ifdef DEBUG_FULL
Willy Tarreaud7971282006-07-29 18:36:34 +020046 fprintf(stderr,"stream_sock_read : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +020047#endif
48
49 if (fdtab[fd].state != FD_STERROR) {
Willy Tarreaub8949f12007-03-23 22:39:59 +010050 while (read_poll-- > 0)
Willy Tarreaubaaee002006-06-26 02:48:02 +020051 {
52 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
Willy Tarreaue09e0ce2007-03-18 16:31:29 +010053 b->r = b->w = b->lr = b->data;
Willy Tarreaubaaee002006-06-26 02:48:02 +020054 max = b->rlim - b->data;
55 }
56 else if (b->r > b->w) {
57 max = b->rlim - b->r;
58 }
59 else {
60 max = b->w - b->r;
61 /* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore
62 * since it means that the rewrite protection has been removed. This
63 * implies that the if statement can be removed.
64 */
65 if (max > b->rlim - b->data)
66 max = b->rlim - b->data;
67 }
68
69 if (max == 0) { /* not anymore room to store data */
Willy Tarreau2a429502006-10-15 14:52:29 +020070 MY_FD_CLR(fd, StaticReadEvent);
Willy Tarreaubaaee002006-06-26 02:48:02 +020071 break;
72 }
73
74#ifndef MSG_NOSIGNAL
75 {
76 int skerr;
77 socklen_t lskerr = sizeof(skerr);
78
Willy Tarreauc6423482006-10-15 14:59:03 +020079 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
80 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +020081 ret = -1;
82 else
83 ret = recv(fd, b->r, max, 0);
84 }
85#else
86 ret = recv(fd, b->r, max, MSG_NOSIGNAL);
87#endif
88 if (ret > 0) {
89 b->r += ret;
90 b->l += ret;
Willy Tarreau0f9f5052006-07-29 17:39:25 +020091 b->flags |= BF_PARTIAL_READ;
Willy Tarreaubaaee002006-06-26 02:48:02 +020092
93 if (b->r == b->data + BUFSIZE) {
94 b->r = b->data; /* wrap around the buffer */
95 }
96
97 b->total += ret;
98 /* we hope to read more data or to get a close on next round */
99 continue;
100 }
101 else if (ret == 0) {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200102 b->flags |= BF_READ_NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200103 break;
104 }
105 else if (errno == EAGAIN) {/* ignore EAGAIN */
106 break;
107 }
108 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200109 b->flags |= BF_READ_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200110 fdtab[fd].state = FD_STERROR;
111 break;
112 }
113 } /* while(1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200114 }
115 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200116 b->flags |= BF_READ_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200117 fdtab[fd].state = FD_STERROR;
118 }
119
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200120 if (b->flags & BF_READ_STATUS) {
Willy Tarreau2a429502006-10-15 14:52:29 +0200121 if (b->rto && MY_FD_ISSET(fd, StaticReadEvent))
Willy Tarreaud7971282006-07-29 18:36:34 +0200122 tv_delayfrom(&b->rex, &now, b->rto);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200123 else
Willy Tarreaud7971282006-07-29 18:36:34 +0200124 tv_eternity(&b->rex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200125
Willy Tarreaud7971282006-07-29 18:36:34 +0200126 task_wakeup(&rq, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200127 }
128
129 return 0;
130}
131
132
133/*
Willy Tarreauf8306d52006-07-29 19:01:31 +0200134 * this function is called on a write event from a stream socket.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200135 * It returns 0.
136 */
Willy Tarreauf8306d52006-07-29 19:01:31 +0200137int stream_sock_write(int fd) {
Willy Tarreau54469402006-07-29 16:59:06 +0200138 struct buffer *b = fdtab[fd].cb[DIR_WR].b;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200139 int ret, max;
140
141#ifdef DEBUG_FULL
Willy Tarreauf8306d52006-07-29 19:01:31 +0200142 fprintf(stderr,"stream_sock_write : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200143#endif
144
145 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
Willy Tarreaue09e0ce2007-03-18 16:31:29 +0100146 b->r = b->w = b->lr = b->data;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200147 max = 0;
148 }
149 else if (b->r > b->w) {
150 max = b->r - b->w;
151 }
152 else
153 max = b->data + BUFSIZE - b->w;
154
155 if (fdtab[fd].state != FD_STERROR) {
156 if (max == 0) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200157 /* may be we have received a connection acknowledgement in TCP mode without data */
158 if (fdtab[fd].state == FD_STCONN) {
159 int skerr;
160 socklen_t lskerr = sizeof(skerr);
Willy Tarreauc6423482006-10-15 14:59:03 +0200161 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
162 if (ret == -1 || skerr) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200163 b->flags |= BF_WRITE_ERROR;
164 fdtab[fd].state = FD_STERROR;
165 task_wakeup(&rq, fdtab[fd].owner);
166 tv_eternity(&b->wex);
Willy Tarreau2a429502006-10-15 14:52:29 +0200167 MY_FD_CLR(fd, StaticWriteEvent);
Willy Tarreauf8306d52006-07-29 19:01:31 +0200168 return 0;
169 }
170 }
171
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200172 b->flags |= BF_WRITE_NULL;
Willy Tarreauf8306d52006-07-29 19:01:31 +0200173 task_wakeup(&rq, fdtab[fd].owner);
174 fdtab[fd].state = FD_STREADY;
Willy Tarreaud7971282006-07-29 18:36:34 +0200175 tv_eternity(&b->wex);
Willy Tarreau2a429502006-10-15 14:52:29 +0200176 MY_FD_CLR(fd, StaticWriteEvent);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200177 return 0;
178 }
179
180#ifndef MSG_NOSIGNAL
181 {
182 int skerr;
183 socklen_t lskerr = sizeof(skerr);
184
Willy Tarreauc6423482006-10-15 14:59:03 +0200185 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
186 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200187 ret = -1;
188 else
189 ret = send(fd, b->w, max, MSG_DONTWAIT);
190 }
191#else
192 ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
193#endif
194
195 if (ret > 0) {
196 b->l -= ret;
197 b->w += ret;
198
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200199 b->flags |= BF_PARTIAL_WRITE;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200200
201 if (b->w == b->data + BUFSIZE) {
202 b->w = b->data; /* wrap around the buffer */
203 }
204 }
205 else if (ret == 0) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200206 /* nothing written, just pretend we were never called */
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200207 // b->flags |= BF_WRITE_NULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200208 return 0;
209 }
210 else if (errno == EAGAIN) /* ignore EAGAIN */
211 return 0;
212 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200213 b->flags |= BF_WRITE_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200214 fdtab[fd].state = FD_STERROR;
215 }
216 }
217 else {
Willy Tarreau0f9f5052006-07-29 17:39:25 +0200218 b->flags |= BF_WRITE_ERROR;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200219 fdtab[fd].state = FD_STERROR;
220 }
221
Willy Tarreaud7971282006-07-29 18:36:34 +0200222 if (b->wto) {
223 tv_delayfrom(&b->wex, &now, b->wto);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200224 /* FIXME: to prevent the client from expiring read timeouts during writes,
225 * we refresh it. A solution would be to merge read+write timeouts into a
226 * unique one, although that needs some study particularly on full-duplex
227 * TCP connections. */
Willy Tarreaud7971282006-07-29 18:36:34 +0200228 b->rex = b->wex;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200229 }
230 else
Willy Tarreaud7971282006-07-29 18:36:34 +0200231 tv_eternity(&b->wex);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200232
Willy Tarreauf8306d52006-07-29 19:01:31 +0200233 task_wakeup(&rq, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200234 return 0;
235}
236
Willy Tarreaubaaee002006-06-26 02:48:02 +0200237
238
239/*
240 * Local variables:
241 * c-indent-level: 8
242 * c-basic-offset: 8
243 * End:
244 */