blob: 13918a73f593a1dd9fb37664ecaa4e3c7d0f5569 [file] [log] [blame]
Willy Tarreau214c2032009-02-20 11:02:32 +01001/*
2 * fast fgets() replacement for log parsing
3 *
4 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * This function manages its own buffer and returns a pointer to that buffer
12 * in order to avoid expensive memory copies. It also checks for line breaks
13 * 32 bits at a time. It could be improved a lot using mmap() but we would
14 * not be allowed to replace trailing \n with zeroes and we would be limited
15 * to small log files on 32-bit machines.
16 *
17 */
18
19#include <stdlib.h>
20#include <string.h>
21#include <stdio.h>
22#include <unistd.h>
23
24// return 1 if the integer contains at least one zero byte
25static inline unsigned int has_zero(unsigned int x)
26{
27 if (!(x & 0xFF000000U) ||
28 !(x & 0xFF0000U) ||
29 !(x & 0xFF00U) ||
30 !(x & 0xFFU))
31 return 1;
32 return 0;
33}
34
35
36#define FGETS2_BUFSIZE (256*1024)
37const char *fgets2(FILE *stream)
38{
39 static char buffer[FGETS2_BUFSIZE + 5];
40 static char *end = buffer;
41 static char *line = buffer;
42
43 char *next;
44 int ret;
45
46 next = line;
47
48 while (1) {
49 /* this is a speed-up, we read 32 bits at once and check for an
50 * LF character there. We stop if found then continue one at a
51 * time.
52 */
53 while (next < end && (((unsigned long)next) & 3) && *next != '\n')
54 next++;
55
56 /* now next is multiple of 4 or equal to end */
57 while (next <= (end-32)) {
58 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
59 break;
60 next += 4;
61 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
62 break;
63 next += 4;
64 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
65 break;
66 next += 4;
67 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
68 break;
69 next += 4;
70 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
71 break;
72 next += 4;
73 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
74 break;
75 next += 4;
76 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
77 break;
78 next += 4;
79 if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
80 break;
81 next += 4;
82 }
83
84 /* we finish if needed. Note that next might be slightly higher
85 * than end here because we might have gone past it above.
86 */
87 while (next < end) {
88 if (*next == '\n') {
89 const char *start = line;
90
91 *next = '\0';
92 line = next + 1;
93 return start;
94 }
95 next++;
96 }
97
98 /* we found an incomplete line. First, let's move the
99 * remaining part of the buffer to the beginning, then
100 * try to complete the buffer with a new read.
101 */
102 if (line > buffer) {
103 if (end != line)
104 memmove(buffer, line, end - line);
105 end = buffer + (end - line);
106 next = end;
107 line = buffer;
108 } else {
109 if (end == buffer + FGETS2_BUFSIZE)
110 return NULL;
111 }
112
113 ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
114
115 if (ret <= 0) {
116 if (end == line)
117 return NULL;
118
119 *end = '\0';
120 return line;
121 }
122
123 end += ret;
124 /* search for '\n' again */
125 }
126}
127
128#ifdef BENCHMARK
129int main() {
130 const char *p;
131 unsigned int lines = 0;
132
133 while ((p=fgets2(stdin)))
134 lines++;
135 printf("lines=%d\n", lines);
136 return 0;
137}
138#endif