blob: 409d6026a35e64d208e554bc9430f221f1464db4 [file] [log] [blame]
Willy Tarreau214c2032009-02-20 11:02:32 +01001/*
2 * fast fgets() replacement for log parsing
3 *
4 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * This function manages its own buffer and returns a pointer to that buffer
12 * in order to avoid expensive memory copies. It also checks for line breaks
13 * 32 bits at a time. It could be improved a lot using mmap() but we would
14 * not be allowed to replace trailing \n with zeroes and we would be limited
15 * to small log files on 32-bit machines.
16 *
17 */
18
19#include <stdlib.h>
20#include <string.h>
21#include <stdio.h>
22#include <unistd.h>
23
24// return 1 if the integer contains at least one zero byte
25static inline unsigned int has_zero(unsigned int x)
26{
27 if (!(x & 0xFF000000U) ||
28 !(x & 0xFF0000U) ||
29 !(x & 0xFF00U) ||
30 !(x & 0xFFU))
31 return 1;
32 return 0;
33}
34
35static inline unsigned int has_zero64(unsigned long long x)
36{
37 unsigned long long x2;
38
39 x2 = x & (x >> 8);
40 /* no need to split it further */
41 if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL))
42 return 0; // approx 11/12 return here
43
44 if (!(x & 0xff00000000000000ULL) ||
45 !(x & 0xff000000000000ULL) ||
46 !(x & 0xff0000000000ULL) ||
47 !(x & 0xff00000000ULL) ||
48 !(x & 0xff000000UL) ||
49 !(x & 0xff0000UL) ||
50 !(x & 0xff00UL) ||
51 !(x & 0xffUL))
52 return 1; // approx 1/3 of the remaining return here
53
54 return 0;
55}
56
57#define FGETS2_BUFSIZE (256*1024)
58const char *fgets2(FILE *stream)
59{
60 static char buffer[FGETS2_BUFSIZE + 5];
61 static char *end = buffer;
62 static char *line = buffer;
63
64 char *next;
65 int ret;
66
67 next = line;
68
69 while (1) {
70 /* this is a speed-up, we read 32 bits at once and check for an
71 * LF character there. We stop if found then continue one at a
72 * time.
73 */
74 while (next < end && (((unsigned long)next) & 7) && *next != '\n')
75 next++;
76
77 /* now next is multiple of 4 or equal to end */
78 while (next <= (end-32)) {
79 if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
80 break;
81 next += 8;
82 if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
83 break;
84 next += 8;
85 if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
86 break;
87 next += 8;
88 if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
89 break;
90 next += 8;
91 }
92
93 /* we finish if needed. Note that next might be slightly higher
94 * than end here because we might have gone past it above.
95 */
96 while (next < end) {
97 if (*next == '\n') {
98 const char *start = line;
99
100 *next = '\0';
101 line = next + 1;
102 return start;
103 }
104 next++;
105 }
106
107 /* we found an incomplete line. First, let's move the
108 * remaining part of the buffer to the beginning, then
109 * try to complete the buffer with a new read.
110 */
111 if (line > buffer) {
112 if (end != line)
113 memmove(buffer, line, end - line);
114 end = buffer + (end - line);
115 next = end;
116 line = buffer;
117 } else {
118 if (end == buffer + FGETS2_BUFSIZE)
119 return NULL;
120 }
121
122 ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
123
124 if (ret <= 0) {
125 if (end == line)
126 return NULL;
127
128 *end = '\0';
129 return line;
130 }
131
132 end += ret;
133 /* search for '\n' again */
134 }
135}
136
137#ifdef BENCHMARK
138int main() {
139 const char *p;
140 unsigned int lines = 0;
141
142 while ((p=fgets2(stdin)))
143 lines++;
144 printf("lines=%d\n", lines);
145 return 0;
146}
147#endif