blob: 45eec7576b55a4f04d81d1d0732ae1062478cff8 [file] [log] [blame]
Willy Tarreau72c28532009-01-22 18:56:50 +01001/*
Willy Tarreaud8fc1102010-09-12 17:56:16 +02002 * haproxy log statistics reporter
Willy Tarreau72c28532009-01-22 18:56:50 +01003 *
Willy Tarreau8a09b662012-10-10 10:26:22 +02004 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
Willy Tarreau72c28532009-01-22 18:56:50 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau72c28532009-01-22 18:56:50 +010013#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
18#include <string.h>
19#include <unistd.h>
20#include <ctype.h>
Olivier Burgarde97b9042014-05-22 16:44:59 +020021#include <time.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010022
Willy Tarreaue9f4d672021-11-08 10:02:52 +010023#include <haproxy/compiler.h>
24
Willy Tarreau8d2b7772020-05-27 10:58:19 +020025#include <import/eb32tree.h>
26#include <import/eb64tree.h>
27#include <import/ebistree.h>
28#include <import/ebsttree.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010029
Willy Tarreaud2201062010-05-27 18:17:30 +020030#define SOURCE_FIELD 5
Willy Tarreau72c28532009-01-22 18:56:50 +010031#define ACCEPT_FIELD 6
Willy Tarreaud2201062010-05-27 18:17:30 +020032#define SERVER_FIELD 8
Willy Tarreau72c28532009-01-22 18:56:50 +010033#define TIME_FIELD 9
34#define STATUS_FIELD 10
Baptiste61aaad02012-09-08 23:10:03 +020035#define BYTES_SENT_FIELD 11
Willy Tarreaud8fc1102010-09-12 17:56:16 +020036#define TERM_CODES_FIELD 14
Willy Tarreau72c28532009-01-22 18:56:50 +010037#define CONN_FIELD 15
Willy Tarreau08911ff2011-10-13 13:28:36 +020038#define QUEUE_LEN_FIELD 16
Willy Tarreauabe45b62010-10-28 20:33:46 +020039#define METH_FIELD 17
40#define URL_FIELD 18
Willy Tarreau72c28532009-01-22 18:56:50 +010041#define MAXLINE 16384
42#define QBITS 4
43
Willy Tarreaudf6f0d12011-07-10 18:15:08 +020044#define SEP(c) ((unsigned char)(c) <= ' ')
45#define SKIP_CHAR(p,c) do { while (1) { int __c = (unsigned char)*p++; if (__c == c) break; if (__c <= ' ') { p--; break; } } } while (0)
Willy Tarreau72c28532009-01-22 18:56:50 +010046
47/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
48static struct eb_root timers[5] = {
49 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
50 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
51};
52
53struct timer {
54 struct eb32_node node;
55 unsigned int count;
56};
57
Willy Tarreaud2201062010-05-27 18:17:30 +020058struct srv_st {
59 unsigned int st_cnt[6]; /* 0xx to 5xx */
60 unsigned int nb_ct, nb_rt, nb_ok;
61 unsigned long long cum_ct, cum_rt;
62 struct ebmb_node node;
63 /* don't put anything else here, the server name will be there */
64};
Willy Tarreau72c28532009-01-22 18:56:50 +010065
Willy Tarreauabe45b62010-10-28 20:33:46 +020066struct url_stat {
67 union {
68 struct ebpt_node url;
69 struct eb64_node val;
70 } node;
71 char *url;
72 unsigned long long total_time; /* sum(all reqs' times) */
73 unsigned long long total_time_ok; /* sum(all OK reqs' times) */
Baptiste61aaad02012-09-08 23:10:03 +020074 unsigned long long total_bytes_sent; /* sum(all bytes sent) */
Willy Tarreauabe45b62010-10-28 20:33:46 +020075 unsigned int nb_err, nb_req;
76};
77
Willy Tarreau72c28532009-01-22 18:56:50 +010078#define FILT_COUNT_ONLY 0x01
79#define FILT_INVERT 0x02
80#define FILT_QUIET 0x04
81#define FILT_ERRORS_ONLY 0x08
82#define FILT_ACC_DELAY 0x10
83#define FILT_ACC_COUNT 0x20
84#define FILT_GRAPH_TIMERS 0x40
Willy Tarreau214c2032009-02-20 11:02:32 +010085#define FILT_PERCENTILE 0x80
Willy Tarreau5bdfd962009-10-14 15:16:29 +020086#define FILT_TIME_RESP 0x100
87
88#define FILT_INVERT_ERRORS 0x200
89#define FILT_INVERT_TIME_RESP 0x400
Willy Tarreau72c28532009-01-22 18:56:50 +010090
Willy Tarreau0f423a72010-05-03 10:50:54 +020091#define FILT_COUNT_STATUS 0x800
Willy Tarreaud2201062010-05-27 18:17:30 +020092#define FILT_COUNT_SRV_STATUS 0x1000
Willy Tarreaud8fc1102010-09-12 17:56:16 +020093#define FILT_COUNT_TERM_CODES 0x2000
Willy Tarreau0f423a72010-05-03 10:50:54 +020094
Willy Tarreauabe45b62010-10-28 20:33:46 +020095#define FILT_COUNT_URL_ONLY 0x004000
96#define FILT_COUNT_URL_COUNT 0x008000
97#define FILT_COUNT_URL_ERR 0x010000
98#define FILT_COUNT_URL_TTOT 0x020000
99#define FILT_COUNT_URL_TAVG 0x040000
100#define FILT_COUNT_URL_TTOTO 0x080000
101#define FILT_COUNT_URL_TAVGO 0x100000
Willy Tarreauabe45b62010-10-28 20:33:46 +0200102
Willy Tarreau70c428f2011-07-10 17:27:40 +0200103#define FILT_HTTP_ONLY 0x200000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200104#define FILT_TERM_CODE_NAME 0x400000
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200105#define FILT_INVERT_TERM_CODE_NAME 0x800000
Willy Tarreau70c428f2011-07-10 17:27:40 +0200106
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200107#define FILT_HTTP_STATUS 0x1000000
108#define FILT_INVERT_HTTP_STATUS 0x2000000
Willy Tarreau08911ff2011-10-13 13:28:36 +0200109#define FILT_QUEUE_ONLY 0x4000000
110#define FILT_QUEUE_SRV_ONLY 0x8000000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200111
Baptiste61aaad02012-09-08 23:10:03 +0200112#define FILT_COUNT_URL_BAVG 0x10000000
113#define FILT_COUNT_URL_BTOT 0x20000000
114
115#define FILT_COUNT_URL_ANY (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
116 FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \
117 FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
118
Willy Tarreau8a09b662012-10-10 10:26:22 +0200119#define FILT_COUNT_COOK_CODES 0x40000000
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100120#define FILT_COUNT_IP_COUNT 0x80000000
Willy Tarreau8a09b662012-10-10 10:26:22 +0200121
Tim Duesterhus24b8d692021-10-18 12:12:02 +0200122#define FILT2_TIMESTAMP 0x01
123#define FILT2_PRESERVE_QUERY 0x02
Tim Duesterhus66255f72021-10-28 17:24:02 +0200124#define FILT2_EXTRACT_CAPTURE 0x04
Olivier Burgarde97b9042014-05-22 16:44:59 +0200125
Willy Tarreau72c28532009-01-22 18:56:50 +0100126unsigned int filter = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200127unsigned int filter2 = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100128unsigned int filter_invert = 0;
Willy Tarreau214c2032009-02-20 11:02:32 +0100129const char *line;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200130int linenum = 0;
131int parse_err = 0;
132int lines_out = 0;
Willy Tarreau667c9052012-10-10 16:49:28 +0200133int lines_max = -1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100134
Willy Tarreau214c2032009-02-20 11:02:32 +0100135const char *fgets2(FILE *stream);
Willy Tarreau72c28532009-01-22 18:56:50 +0100136
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200137void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100138void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200139void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau8a09b662012-10-10 10:26:22 +0200140void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200141void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr);
142void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
143void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
144void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
Tim Duesterhus66255f72021-10-28 17:24:02 +0200145void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200146void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
147
Willy Tarreau615674c2012-01-23 08:15:51 +0100148void usage(FILE *output, const char *msg)
Willy Tarreau72c28532009-01-22 18:56:50 +0100149{
Willy Tarreau615674c2012-01-23 08:15:51 +0100150 fprintf(output,
Willy Tarreau72c28532009-01-22 18:56:50 +0100151 "%s"
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100152 "Usage:\n"
153 " halog [-h|--help] for long help\n"
154 " halog [input_filters]* [modifiers]* [output_format] < log\n"
155 " inp = [-e|-E] [-H] [-Q|-QS] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
156 " [-hs|-HS [min][:[max]]] [-tcn|-TCN <termcode>] [-time [min][:[max]]]\n"
157 " mod = [-q] [-v] [-m <lines>] [-s <skipflds>] [-query]\n"
158 " out = {-c|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-hdr <block>:<field>|\n"
159 " -cc|-gt|-pct|-st|-tc|-srv|-ic}\n"
Willy Tarreau72c28532009-01-22 18:56:50 +0100160 "\n",
161 msg ? msg : ""
162 );
Willy Tarreau615674c2012-01-23 08:15:51 +0100163}
164
165void die(const char *msg)
166{
167 usage(stderr, msg);
Willy Tarreau72c28532009-01-22 18:56:50 +0100168 exit(1);
169}
170
Willy Tarreau615674c2012-01-23 08:15:51 +0100171void help()
172{
173 usage(stdout, NULL);
174 printf(
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100175 "Input filters - several filters may be combined\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100176 " -H only match lines containing HTTP logs (ignore TCP)\n"
177 " -E only match lines without any error (no 5xx status)\n"
178 " -e only match lines with errors (status 5xx or negative)\n"
179 " -rt|-RT <time> only match response times larger|smaller than <time>\n"
180 " -Q|-QS only match queued requests (any queue|server queue)\n"
181 " -tcn|-TCN <code> only match requests with/without termination code <code>\n"
182 " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n"
183 " within min..max. Any of them may be omitted. Exact\n"
184 " code is checked for if no ':' is specified.\n"
Olivier Burgarde97b9042014-05-22 16:44:59 +0200185 " -time <[min][:max]> only match requests recorded between timestamps.\n"
186 " Any of them may be omitted.\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100187 "Modifiers\n"
188 " -v invert the input filtering condition\n"
189 " -q don't report errors/warnings\n"
Willy Tarreau667c9052012-10-10 16:49:28 +0200190 " -m <lines> limit output to the first <lines> lines\n"
Tim Duesterhuse0992582021-10-28 15:55:49 +0200191 " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n"
192 " you can also use -n to start from earlier then field %d\n"
Tim Duesterhus385338b2021-10-28 16:36:03 +0200193 " -query preserve the query string for per-URL (-u*) statistics\n"
Tim Duesterhuse0992582021-10-28 15:55:49 +0200194 "\n"
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100195 "Output format - only one may be used at a time\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100196 " -c only report the number of lines that would have been printed\n"
197 " -pct output connect and response times percentiles\n"
198 " -st output number of requests per HTTP status code\n"
Willy Tarreau8a09b662012-10-10 10:26:22 +0200199 " -cc output number of requests per cookie code (2 chars)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100200 " -tc output number of requests per termination code (2 chars)\n"
201 " -srv output statistics per server (time, requests, errors)\n"
Aleksandar Lazi6112f5c2020-05-15 22:58:30 +0200202 " -ic output statistics per ip count (time, requests, errors)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100203 " -u* output statistics per URL (time, requests, errors)\n"
204 " Additional characters indicate the output sorting key :\n"
205 " -u : by URL, -uc : request count, -ue : error count\n"
Willy Tarreau4201df72012-10-10 14:57:35 +0200206 " -ua : average response time, -ut : average total time\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100207 " -uao, -uto: average times computed on valid ('OK') requests\n"
Tim Duesterhus66255f72021-10-28 17:24:02 +0200208 " -uba, -ubt: average bytes returned, total bytes returned\n"
209 " -hdr output captured header at the given <block>:<field>\n",
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +0200210 (int)SOURCE_FIELD, (int)SOURCE_FIELD
Willy Tarreau615674c2012-01-23 08:15:51 +0100211 );
212 exit(0);
213}
214
Willy Tarreau72c28532009-01-22 18:56:50 +0100215
216/* return pointer to first char not part of current field starting at <p>. */
Willy Tarreauf9042062011-09-10 12:26:35 +0200217
218#if defined(__i386__)
219/* this one is always faster on 32-bits */
220static inline const char *field_stop(const char *p)
221{
222 asm(
223 /* Look for spaces */
224 "4: \n\t"
225 "inc %0 \n\t"
226 "cmpb $0x20, -1(%0) \n\t"
227 "ja 4b \n\t"
228 "jz 3f \n\t"
229
230 /* we only get there for control chars 0..31. Leave if we find '\0' */
231 "cmpb $0x0, -1(%0) \n\t"
232 "jnz 4b \n\t"
233
234 /* return %0-1 = position of the last char we checked */
235 "3: \n\t"
236 "dec %0 \n\t"
237 : "=r" (p)
238 : "0" (p)
239 );
240 return p;
241}
242#else
Willy Tarreau72c28532009-01-22 18:56:50 +0100243const char *field_stop(const char *p)
244{
245 unsigned char c;
246
247 while (1) {
248 c = *(p++);
249 if (c > ' ')
250 continue;
Willy Tarreau14389e72011-07-10 22:11:17 +0200251 if (c == ' ' || c == 0)
Willy Tarreau72c28532009-01-22 18:56:50 +0100252 break;
253 }
254 return p - 1;
255}
Willy Tarreauf9042062011-09-10 12:26:35 +0200256#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100257
Willy Tarreaue9f4d672021-11-08 10:02:52 +0100258/* return non-zero if the argument contains at least one zero byte. See principle above. */
259static inline __attribute__((unused)) unsigned long long has_zero64(unsigned long long x)
260{
261 unsigned long long y;
262
263 y = x - 0x0101010101010101ULL; /* generate a carry */
264 y &= ~x; /* clear the bits that were already set */
265 return y & 0x8080808080808080ULL;
266}
267
Willy Tarreau72c28532009-01-22 18:56:50 +0100268/* return field <field> (starting from 1) in string <p>. Only consider
269 * contiguous spaces (or tabs) as one delimiter. May return pointer to
270 * last char if field is not found. Equivalent to awk '{print $field}'.
271 */
272const char *field_start(const char *p, int field)
273{
Willy Tarreauf9042062011-09-10 12:26:35 +0200274#ifndef PREFER_ASM
Willy Tarreau72c28532009-01-22 18:56:50 +0100275 unsigned char c;
276 while (1) {
277 /* skip spaces */
278 while (1) {
Willy Tarreauf9042062011-09-10 12:26:35 +0200279 c = *(p++);
Willy Tarreau72c28532009-01-22 18:56:50 +0100280 if (!c) /* end of line */
Willy Tarreauf9042062011-09-10 12:26:35 +0200281 return p-1;
Willy Tarreaufc76bbc2021-11-08 09:58:22 +0100282 if (c == ' ')
283 continue;
Willy Tarreau72c28532009-01-22 18:56:50 +0100284 /* other char => new field */
285 break;
Willy Tarreau72c28532009-01-22 18:56:50 +0100286 }
287
288 /* start of field */
289 field--;
290 if (!field)
Willy Tarreauf9042062011-09-10 12:26:35 +0200291 return p-1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100292
293 /* skip this field */
294 while (1) {
Willy Tarreaue9f4d672021-11-08 10:02:52 +0100295#if defined(HA_UNALIGNED_LE64)
296 unsigned long long l = *(unsigned long long *)p;
297 if (!has_zero64(l)) {
298 l ^= 0x2020202020202020;
299 l = has_zero64(l);
300 if (!l) {
301 p += 8;
302 continue;
303 }
304 /* there is at least one space, find it and
305 * skip it now. The lowest byte in <l> with
306 * a 0x80 is the right one, but checking for
307 * it remains slower than testing each byte,
308 * probably due to the numerous short fields.
309 */
310 while (*(p++) != ' ')
311 ;
312 break;
313 }
314#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100315 c = *(p++);
Willy Tarreau72c28532009-01-22 18:56:50 +0100316 if (c == '\0')
Willy Tarreauf9042062011-09-10 12:26:35 +0200317 return p - 1;
Willy Tarreaufc76bbc2021-11-08 09:58:22 +0100318 if (c == ' ')
319 break;
Willy Tarreau72c28532009-01-22 18:56:50 +0100320 }
321 }
Willy Tarreauf9042062011-09-10 12:26:35 +0200322#else
323 /* This version works optimally on i386 and x86_64 but the code above
324 * shows similar performance. However, depending on the version of GCC
325 * used, inlining rules change and it may have difficulties to make
326 * efficient use of this code at other locations and could result in
327 * worse performance (eg: gcc 4.4). You may want to experience.
328 */
329 asm(
330 /* skip spaces */
331 "1: \n\t"
332 "inc %0 \n\t"
333 "cmpb $0x20, -1(%0) \n\t"
334 "ja 2f \n\t"
335 "jz 1b \n\t"
336
337 /* we only get there for control chars 0..31. Leave if we find '\0' */
338 "cmpb $0x0, -1(%0) \n\t"
339 "jz 3f \n\t"
340
341 /* start of field at [%0-1]. Check if we need to skip more fields */
342 "2: \n\t"
343 "dec %1 \n\t"
344 "jz 3f \n\t"
345
346 /* Look for spaces */
347 "4: \n\t"
348 "inc %0 \n\t"
349 "cmpb $0x20, -1(%0) \n\t"
350 "jz 1b \n\t"
351 "ja 4b \n\t"
352
353 /* we only get there for control chars 0..31. Leave if we find '\0' */
354 "cmpb $0x0, -1(%0) \n\t"
355 "jnz 4b \n\t"
356
357 /* return %0-1 = position of the last char we checked */
358 "3: \n\t"
359 "dec %0 \n\t"
360 : "=r" (p)
361 : "r" (field), "0" (p)
362 );
363 return p;
364#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100365}
366
367/* keep only the <bits> higher bits of <i> */
368static inline unsigned int quantify_u32(unsigned int i, int bits)
369{
370 int high;
371
372 if (!bits)
373 return 0;
374
375 if (i)
376 high = fls_auto(i); // 1 to 32
377 else
378 high = 0;
379
380 if (high <= bits)
381 return i;
382
383 return i & ~((1 << (high - bits)) - 1);
384}
385
386/* keep only the <bits> higher bits of the absolute value of <i>, as well as
387 * its sign. */
388static inline int quantify(int i, int bits)
389{
390 if (i >= 0)
391 return quantify_u32(i, bits);
392 else
393 return -quantify_u32(-i, bits);
394}
395
396/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
397 * in <alloc>. It may be NULL, in which case the function will allocate it
398 * itself. It will be reset to NULL once consumed. The caller is responsible
399 * for freeing the node once not used anymore. The node where the value was
400 * inserted is returned.
401 */
402struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
403{
404 struct timer *t = *alloc;
405 struct eb32_node *n;
406
407 if (!t) {
408 t = calloc(sizeof(*t), 1);
409 if (unlikely(!t)) {
410 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
411 exit(1);
412 }
413 }
414 t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits
415
416 n = eb32i_insert(r, &t->node);
417 if (n == &t->node)
418 t = NULL; /* node inserted, will malloc next time */
419
420 *alloc = t;
421 return container_of(n, struct timer, node);
422}
423
424/* Insert value value <v> into tree <r>. A pre-allocated node must be passed
425 * in <alloc>. It may be NULL, in which case the function will allocate it
426 * itself. It will be reset to NULL once consumed. The caller is responsible
427 * for freeing the node once not used anymore. The node where the value was
428 * inserted is returned.
429 */
430struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
431{
432 struct timer *t = *alloc;
433 struct eb32_node *n;
434
435 if (!t) {
436 t = calloc(sizeof(*t), 1);
437 if (unlikely(!t)) {
438 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
439 exit(1);
440 }
441 }
442 t->node.key = v;
443
444 n = eb32i_insert(r, &t->node);
445 if (n == &t->node)
446 t = NULL; /* node inserted, will malloc next time */
447
448 *alloc = t;
449 return container_of(n, struct timer, node);
450}
451
452int str2ic(const char *s)
453{
454 int i = 0;
455 int j, k;
456
457 if (*s != '-') {
458 /* positive number */
459 while (1) {
460 j = (*s++) - '0';
461 k = i * 10;
462 if ((unsigned)j > 9)
463 break;
464 i = k + j;
465 }
466 } else {
467 /* negative number */
468 s++;
469 while (1) {
470 j = (*s++) - '0';
471 k = i * 10;
472 if ((unsigned)j > 9)
473 break;
474 i = k - j;
475 }
476 }
477
478 return i;
479}
480
481
Willy Tarreau72c28532009-01-22 18:56:50 +0100482/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
483 * the day in milliseconds. It returns -1 for all unparsable values. The parser
484 * looks ugly but gcc emits far better code that way.
485 */
486int convert_date(const char *field)
487{
488 unsigned int h, m, s, ms;
489 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600490 const char *e;
Willy Tarreau72c28532009-01-22 18:56:50 +0100491
492 h = m = s = ms = 0;
493 e = field;
494
495 /* skip the date */
496 while (1) {
497 c = *(e++);
498 if (c == ':')
499 break;
500 if (!c)
501 goto out_err;
502 }
503
504 /* hour + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100505 while (1) {
506 c = *(e++) - '0';
507 if (c > 9)
508 break;
509 h = h * 10 + c;
510 }
511 if (c == (unsigned char)(0 - '0'))
512 goto out_err;
513
514 /* minute + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100515 while (1) {
516 c = *(e++) - '0';
517 if (c > 9)
518 break;
519 m = m * 10 + c;
520 }
521 if (c == (unsigned char)(0 - '0'))
522 goto out_err;
523
524 /* second + '.' or ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100525 while (1) {
526 c = *(e++) - '0';
527 if (c > 9)
528 break;
529 s = s * 10 + c;
530 }
531 if (c == (unsigned char)(0 - '0'))
532 goto out_err;
533
534 /* if there's a '.', we have milliseconds */
535 if (c == (unsigned char)('.' - '0')) {
536 /* millisecond second + ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100537 while (1) {
538 c = *(e++) - '0';
539 if (c > 9)
540 break;
541 ms = ms * 10 + c;
542 }
543 if (c == (unsigned char)(0 - '0'))
544 goto out_err;
545 }
546 return (((h * 60) + m) * 60 + s) * 1000 + ms;
547 out_err:
548 return -1;
549}
550
Olivier Burgarde97b9042014-05-22 16:44:59 +0200551/* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp.
552 * It returns -1 for all unparsable values. The parser
553 * looks ugly but gcc emits far better code that way.
554 */
555int convert_date_to_timestamp(const char *field)
556{
557 unsigned int d, mo, y, h, m, s;
558 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600559 const char *e;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200560 time_t rawtime;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200561 static struct tm * timeinfo;
562 static int last_res;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200563
564 d = mo = y = h = m = s = 0;
565 e = field;
566
Tim Duesterhus785b84b2021-11-04 21:04:24 +0100567 e++; // remove '['
568
Olivier Burgarde97b9042014-05-22 16:44:59 +0200569 /* day + '/' */
570 while (1) {
571 c = *(e++) - '0';
572 if (c > 9)
573 break;
574 d = d * 10 + c;
575 if (c == (unsigned char)(0 - '0'))
576 goto out_err;
577 }
578
579 /* month + '/' */
580 c = *(e++);
581 if (c =='F') {
582 mo = 2;
583 e = e+3;
584 } else if (c =='S') {
585 mo = 9;
586 e = e+3;
587 } else if (c =='O') {
588 mo = 10;
589 e = e+3;
590 } else if (c =='N') {
591 mo = 11;
592 e = e+3;
593 } else if (c == 'D') {
594 mo = 12;
595 e = e+3;
596 } else if (c == 'A') {
597 c = *(e++);
598 if (c == 'p') {
599 mo = 4;
600 e = e+2;
601 } else if (c == 'u') {
602 mo = 8;
603 e = e+2;
604 } else
605 goto out_err;
606 } else if (c == 'J') {
607 c = *(e++);
608 if (c == 'a') {
609 mo = 1;
610 e = e+2;
611 } else if (c == 'u') {
612 c = *(e++);
613 if (c == 'n') {
614 mo = 6;
615 e = e+1;
616 } else if (c == 'l') {
617 mo = 7;
618 e++;
619 }
620 } else
621 goto out_err;
622 } else if (c == 'M') {
623 e++;
624 c = *(e++);
625 if (c == 'r') {
626 mo = 3;
627 e = e+1;
628 } else if (c == 'y') {
629 mo = 5;
630 e = e+1;
631 } else
632 goto out_err;
633 } else
634 goto out_err;
635
636 /* year + ':' */
637 while (1) {
638 c = *(e++) - '0';
639 if (c > 9)
640 break;
641 y = y * 10 + c;
642 if (c == (unsigned char)(0 - '0'))
643 goto out_err;
644 }
645
646 /* hour + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200647 while (1) {
648 c = *(e++) - '0';
649 if (c > 9)
650 break;
651 h = h * 10 + c;
652 }
653 if (c == (unsigned char)(0 - '0'))
654 goto out_err;
655
656 /* minute + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200657 while (1) {
658 c = *(e++) - '0';
659 if (c > 9)
660 break;
661 m = m * 10 + c;
662 }
663 if (c == (unsigned char)(0 - '0'))
664 goto out_err;
665
666 /* second + '.' or ']' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200667 while (1) {
668 c = *(e++) - '0';
669 if (c > 9)
670 break;
671 s = s * 10 + c;
672 }
673
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200674 if (likely(timeinfo)) {
Willy Tarreau03ca6052020-12-21 08:40:04 +0100675 if ((unsigned)timeinfo->tm_min == m &&
676 (unsigned)timeinfo->tm_hour == h &&
677 (unsigned)timeinfo->tm_mday == d &&
678 (unsigned)timeinfo->tm_mon == mo - 1 &&
679 (unsigned)timeinfo->tm_year == y - 1900)
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200680 return last_res + s;
681 }
682 else {
683 time(&rawtime);
684 timeinfo = localtime(&rawtime);
685 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200686
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200687 timeinfo->tm_sec = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200688 timeinfo->tm_min = m;
689 timeinfo->tm_hour = h;
690 timeinfo->tm_mday = d;
691 timeinfo->tm_mon = mo - 1;
692 timeinfo->tm_year = y - 1900;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200693 last_res = mktime(timeinfo);
Olivier Burgarde97b9042014-05-22 16:44:59 +0200694
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200695 return last_res + s;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200696 out_err:
697 return -1;
698}
699
Willy Tarreau72c28532009-01-22 18:56:50 +0100700void truncated_line(int linenum, const char *line)
701{
702 if (!(filter & FILT_QUIET))
703 fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
704}
705
706int main(int argc, char **argv)
707{
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600708 const char *b, *p, *time_field, *accept_field, *source_field;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200709 const char *filter_term_code_name = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100710 const char *output_file = NULL;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600711 int f, last;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200712 struct timer *t = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100713 struct eb32_node *n;
Willy Tarreauabe45b62010-10-28 20:33:46 +0200714 struct url_stat *ustat = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100715 int val, test;
Willy Tarreauc8746532014-05-28 23:05:07 +0200716 unsigned int uval;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100717 unsigned int filter_acc_delay = 0, filter_acc_count = 0;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200718 int filter_time_resp = 0;
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200719 int filt_http_status_low = 0, filt_http_status_high = 0;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100720 unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
Tim Duesterhus66255f72021-10-28 17:24:02 +0200721 unsigned int filt2_capture_block = 0, filt2_capture_field = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100722 int skip_fields = 1;
723
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200724 void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
725
Willy Tarreau72c28532009-01-22 18:56:50 +0100726 argc--; argv++;
727 while (argc > 0) {
728 if (*argv[0] != '-')
729 break;
730
731 if (strcmp(argv[0], "-ad") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200732 if (argc < 2) die("missing option for -ad\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100733 argc--; argv++;
734 filter |= FILT_ACC_DELAY;
735 filter_acc_delay = atol(*argv);
736 }
737 else if (strcmp(argv[0], "-ac") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200738 if (argc < 2) die("missing option for -ac\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100739 argc--; argv++;
740 filter |= FILT_ACC_COUNT;
741 filter_acc_count = atol(*argv);
742 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200743 else if (strcmp(argv[0], "-rt") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200744 if (argc < 2) die("missing option for -rt\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200745 argc--; argv++;
746 filter |= FILT_TIME_RESP;
747 filter_time_resp = atol(*argv);
748 }
749 else if (strcmp(argv[0], "-RT") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200750 if (argc < 2) die("missing option for -RT\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200751 argc--; argv++;
752 filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP;
753 filter_time_resp = atol(*argv);
754 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100755 else if (strcmp(argv[0], "-s") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200756 if (argc < 2) die("missing option for -s\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100757 argc--; argv++;
758 skip_fields = atol(*argv);
759 }
Willy Tarreau667c9052012-10-10 16:49:28 +0200760 else if (strcmp(argv[0], "-m") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200761 if (argc < 2) die("missing option for -m\n");
Willy Tarreau667c9052012-10-10 16:49:28 +0200762 argc--; argv++;
763 lines_max = atol(*argv);
764 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100765 else if (strcmp(argv[0], "-e") == 0)
766 filter |= FILT_ERRORS_ONLY;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200767 else if (strcmp(argv[0], "-E") == 0)
768 filter |= FILT_ERRORS_ONLY | FILT_INVERT_ERRORS;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200769 else if (strcmp(argv[0], "-H") == 0)
770 filter |= FILT_HTTP_ONLY;
Willy Tarreau08911ff2011-10-13 13:28:36 +0200771 else if (strcmp(argv[0], "-Q") == 0)
772 filter |= FILT_QUEUE_ONLY;
773 else if (strcmp(argv[0], "-QS") == 0)
774 filter |= FILT_QUEUE_SRV_ONLY;
Willy Tarreau72c28532009-01-22 18:56:50 +0100775 else if (strcmp(argv[0], "-c") == 0)
776 filter |= FILT_COUNT_ONLY;
777 else if (strcmp(argv[0], "-q") == 0)
778 filter |= FILT_QUIET;
779 else if (strcmp(argv[0], "-v") == 0)
780 filter_invert = !filter_invert;
781 else if (strcmp(argv[0], "-gt") == 0)
782 filter |= FILT_GRAPH_TIMERS;
Willy Tarreau214c2032009-02-20 11:02:32 +0100783 else if (strcmp(argv[0], "-pct") == 0)
784 filter |= FILT_PERCENTILE;
Willy Tarreau0f423a72010-05-03 10:50:54 +0200785 else if (strcmp(argv[0], "-st") == 0)
786 filter |= FILT_COUNT_STATUS;
Willy Tarreaud2201062010-05-27 18:17:30 +0200787 else if (strcmp(argv[0], "-srv") == 0)
788 filter |= FILT_COUNT_SRV_STATUS;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200789 else if (strcmp(argv[0], "-cc") == 0)
790 filter |= FILT_COUNT_COOK_CODES;
Willy Tarreaud8fc1102010-09-12 17:56:16 +0200791 else if (strcmp(argv[0], "-tc") == 0)
792 filter |= FILT_COUNT_TERM_CODES;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200793 else if (strcmp(argv[0], "-tcn") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200794 if (argc < 2) die("missing option for -tcn\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200795 argc--; argv++;
796 filter |= FILT_TERM_CODE_NAME;
797 filter_term_code_name = *argv;
798 }
799 else if (strcmp(argv[0], "-TCN") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200800 if (argc < 2) die("missing option for -TCN\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200801 argc--; argv++;
802 filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME;
803 filter_term_code_name = *argv;
804 }
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200805 else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) {
806 char *sep, *str;
807
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200808 if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n");
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200809 filter |= FILT_HTTP_STATUS;
810 if (argv[0][1] == 'H')
811 filter |= FILT_INVERT_HTTP_STATUS;
812
813 argc--; argv++;
814 str = *argv;
815 sep = strchr(str, ':'); /* [min]:[max] */
816 if (!sep)
817 sep = str; /* make max point to min */
818 else
819 *sep++ = 0;
820 filt_http_status_low = *str ? atol(str) : 0;
821 filt_http_status_high = *sep ? atol(sep) : 65535;
822 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200823 else if (strcmp(argv[0], "-time") == 0) {
824 char *sep, *str;
825
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200826 if (argc < 2) die("missing option for -time ([min]:[max])\n");
Olivier Burgarde97b9042014-05-22 16:44:59 +0200827 filter2 |= FILT2_TIMESTAMP;
828
829 argc--; argv++;
830 str = *argv;
831 sep = strchr(str, ':'); /* [min]:[max] */
832 filt2_timestamp_low = *str ? atol(str) : 0;
833 if (!sep)
834 filt2_timestamp_high = 0xFFFFFFFF;
835 else
836 filt2_timestamp_high = atol(++sep);
837 }
Willy Tarreauabe45b62010-10-28 20:33:46 +0200838 else if (strcmp(argv[0], "-u") == 0)
839 filter |= FILT_COUNT_URL_ONLY;
840 else if (strcmp(argv[0], "-uc") == 0)
841 filter |= FILT_COUNT_URL_COUNT;
842 else if (strcmp(argv[0], "-ue") == 0)
843 filter |= FILT_COUNT_URL_ERR;
844 else if (strcmp(argv[0], "-ua") == 0)
845 filter |= FILT_COUNT_URL_TAVG;
846 else if (strcmp(argv[0], "-ut") == 0)
847 filter |= FILT_COUNT_URL_TTOT;
848 else if (strcmp(argv[0], "-uao") == 0)
849 filter |= FILT_COUNT_URL_TAVGO;
850 else if (strcmp(argv[0], "-uto") == 0)
851 filter |= FILT_COUNT_URL_TTOTO;
Baptiste61aaad02012-09-08 23:10:03 +0200852 else if (strcmp(argv[0], "-uba") == 0)
853 filter |= FILT_COUNT_URL_BAVG;
854 else if (strcmp(argv[0], "-ubt") == 0)
855 filter |= FILT_COUNT_URL_BTOT;
Tim Duesterhus385338b2021-10-28 16:36:03 +0200856 else if (strcmp(argv[0], "-query") == 0)
Tim Duesterhus24b8d692021-10-18 12:12:02 +0200857 filter2 |= FILT2_PRESERVE_QUERY;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100858 else if (strcmp(argv[0], "-ic") == 0)
859 filter |= FILT_COUNT_IP_COUNT;
Tim Duesterhus66255f72021-10-28 17:24:02 +0200860 else if (strcmp(argv[0], "-hdr") == 0) {
861 char *sep, *str;
862
863 if (argc < 2) die("missing option for -hdr (<block>:<field>)\n");
864 filter2 |= FILT2_EXTRACT_CAPTURE;
865
866 argc--; argv++;
867 str = *argv;
868 sep = strchr(str, ':');
869 if (!sep)
870 die("missing colon in -hdr (<block>:<field>)\n");
871 else
872 *sep++ = 0;
873
874 filt2_capture_block = *str ? atol(str) : 1;
875 filt2_capture_field = *sep ? atol(sep) : 1;
876
877 if (filt2_capture_block < 1 || filt2_capture_field < 1)
878 die("block and field must be at least 1 for -hdr (<block>:<field>)\n");
879 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100880 else if (strcmp(argv[0], "-o") == 0) {
881 if (output_file)
882 die("Fatal: output file name already specified.\n");
883 if (argc < 2)
884 die("Fatal: missing output file name.\n");
885 output_file = argv[1];
886 }
Willy Tarreau615674c2012-01-23 08:15:51 +0100887 else if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0)
888 help();
Willy Tarreau72c28532009-01-22 18:56:50 +0100889 argc--;
890 argv++;
891 }
892
Tim Duesterhus66255f72021-10-28 17:24:02 +0200893 if (!filter && !filter2)
Willy Tarreau72c28532009-01-22 18:56:50 +0100894 die("No action specified.\n");
895
896 if (filter & FILT_ACC_COUNT && !filter_acc_count)
897 filter_acc_count=1;
898
899 if (filter & FILT_ACC_DELAY && !filter_acc_delay)
900 filter_acc_delay = 1;
901
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200902
903 /* by default, all lines are printed */
904 line_filter = filter_output_line;
905 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))
906 line_filter = filter_accept_holes;
907 else if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))
908 line_filter = filter_graphs;
909 else if (filter & FILT_COUNT_STATUS)
910 line_filter = filter_count_status;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200911 else if (filter & FILT_COUNT_COOK_CODES)
912 line_filter = filter_count_cook_codes;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200913 else if (filter & FILT_COUNT_TERM_CODES)
914 line_filter = filter_count_term_codes;
915 else if (filter & FILT_COUNT_SRV_STATUS)
916 line_filter = filter_count_srv_status;
917 else if (filter & FILT_COUNT_URL_ANY)
918 line_filter = filter_count_url;
919 else if (filter & FILT_COUNT_ONLY)
920 line_filter = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100921
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200922#if defined(POSIX_FADV_SEQUENTIAL)
923 /* around 20% performance improvement is observed on Linux with this
Joseph Herlant42172bd2018-11-09 18:02:35 -0800924 * on cold-cache. Surprisingly, WILLNEED is less performant. Don't
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200925 * use NOREUSE as it flushes the cache and prevents easy data
926 * manipulation on logs!
927 */
928 posix_fadvise(0, 0, 0, POSIX_FADV_SEQUENTIAL);
929#endif
930
Willy Tarreaua1629a52012-11-13 20:48:15 +0100931 if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200932 !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) &&
933 !(filter2 & (FILT2_TIMESTAMP))) {
Willy Tarreaua1629a52012-11-13 20:48:15 +0100934 /* read the whole file at once first, ignore it if inverted output */
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100935 if (!filter_invert)
Willy Tarreaua1629a52012-11-13 20:48:15 +0100936 while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL)
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100937 lines_out++;
938
939 goto skip_filters;
940 }
941
Willy Tarreau214c2032009-02-20 11:02:32 +0100942 while ((line = fgets2(stdin)) != NULL) {
Willy Tarreau72c28532009-01-22 18:56:50 +0100943 linenum++;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200944 time_field = NULL; accept_field = NULL;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100945 source_field = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100946
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200947 test = 1;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200948
949 /* for any line we process, we first ensure that there is a field
950 * looking like the accept date field (beginning with a '[').
951 */
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100952 if (filter & FILT_COUNT_IP_COUNT) {
953 /* we need the IP first */
954 source_field = field_start(line, SOURCE_FIELD + skip_fields);
955 accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1);
956 }
957 else
958 accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
959
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200960 if (unlikely(*accept_field != '[')) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200961 parse_err++;
962 continue;
963 }
964
965 /* the day of month field is begin 01 and 31 */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200966 if (accept_field[1] < '0' || accept_field[1] > '3') {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200967 parse_err++;
968 continue;
969 }
970
Olivier Burgarde97b9042014-05-22 16:44:59 +0200971 if (filter2 & FILT2_TIMESTAMP) {
972 uval = convert_date_to_timestamp(accept_field);
973 test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ;
974 }
975
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200976 if (filter & FILT_HTTP_ONLY) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200977 /* only report lines with at least 4 timers */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200978 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200979 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200980 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200981 truncated_line(linenum, line);
982 continue;
983 }
Willy Tarreau70c428f2011-07-10 17:27:40 +0200984 }
985
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600986 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +0200987 /* we have field TIME_FIELD in [time_field]..[e-1] */
988 p = time_field;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200989 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +0200990 while (!SEP(*p)) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200991 if (++f == 4)
992 break;
993 SKIP_CHAR(p, '/');
994 }
995 test &= (f >= 4);
996 }
997
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200998 if (filter & FILT_TIME_RESP) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200999 int tps;
1000
1001 /* only report lines with response times larger than filter_time_resp */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001002 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +02001003 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001004 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +02001005 truncated_line(linenum, line);
1006 continue;
1007 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001008 }
1009
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001010 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +02001011 /* we have field TIME_FIELD in [time_field]..[e-1], let's check only the response time */
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001012
Willy Tarreau758a6ea2011-07-10 18:53:44 +02001013 p = time_field;
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001014 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +02001015 while (!SEP(*p)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001016 tps = str2ic(p);
1017 if (tps < 0) {
1018 tps = -1;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001019 }
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001020 if (++f == 4)
1021 break;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001022 SKIP_CHAR(p, '/');
1023 }
1024
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001025 if (unlikely(f < 4)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001026 parse_err++;
1027 continue;
1028 }
1029
1030 test &= (tps >= filter_time_resp) ^ !!(filter & FILT_INVERT_TIME_RESP);
1031 }
1032
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001033 if (filter & (FILT_ERRORS_ONLY | FILT_HTTP_STATUS)) {
1034 /* Check both error codes (-1, 5xx) and status code ranges */
Willy Tarreau26deaf52011-07-10 19:47:48 +02001035 if (time_field)
1036 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1037 else
1038 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1039
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001040 if (unlikely(!*b)) {
Willy Tarreau72c28532009-01-22 18:56:50 +01001041 truncated_line(linenum, line);
1042 continue;
1043 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001044
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001045 val = str2ic(b);
1046 if (filter & FILT_ERRORS_ONLY)
1047 test &= (val < 0 || (val >= 500 && val <= 599)) ^ !!(filter & FILT_INVERT_ERRORS);
1048
1049 if (filter & FILT_HTTP_STATUS)
1050 test &= (val >= filt_http_status_low && val <= filt_http_status_high) ^ !!(filter & FILT_INVERT_HTTP_STATUS);
Willy Tarreau72c28532009-01-22 18:56:50 +01001051 }
1052
Willy Tarreau08911ff2011-10-13 13:28:36 +02001053 if (filter & (FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY)) {
1054 /* Check if the server's queue is non-nul */
1055 if (time_field)
1056 b = field_start(time_field, QUEUE_LEN_FIELD - TIME_FIELD + 1);
1057 else
1058 b = field_start(accept_field, QUEUE_LEN_FIELD - ACCEPT_FIELD + 1);
1059
1060 if (unlikely(!*b)) {
1061 truncated_line(linenum, line);
1062 continue;
1063 }
1064
1065 if (*b == '0') {
1066 if (filter & FILT_QUEUE_SRV_ONLY) {
1067 test = 0;
1068 }
1069 else {
1070 do {
1071 b++;
1072 if (*b == '/') {
1073 b++;
1074 break;
1075 }
1076 } while (*b);
1077 test &= ((unsigned char)(*b - '1') < 9);
1078 }
1079 }
1080 }
1081
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +02001082 if (filter & FILT_TERM_CODE_NAME) {
1083 /* only report corresponding termination code name */
1084 if (time_field)
1085 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1086 else
1087 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1088
1089 if (unlikely(!*b)) {
1090 truncated_line(linenum, line);
1091 continue;
1092 }
1093
1094 test &= (b[0] == filter_term_code_name[0] && b[1] == filter_term_code_name[1]) ^ !!(filter & FILT_INVERT_TERM_CODE_NAME);
1095 }
1096
1097
Willy Tarreau0f423a72010-05-03 10:50:54 +02001098 test ^= filter_invert;
1099 if (!test)
1100 continue;
1101
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001102 /************** here we process inputs *******************/
Willy Tarreau72c28532009-01-22 18:56:50 +01001103
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001104 if (line_filter) {
1105 if (filter & FILT_COUNT_IP_COUNT)
1106 filter_count_ip(source_field, accept_field, time_field, &t);
Tim Duesterhus66255f72021-10-28 17:24:02 +02001107 else if (filter2 & FILT2_EXTRACT_CAPTURE)
1108 filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field);
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001109 else
1110 line_filter(accept_field, time_field, &t);
1111 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001112 else
Willy Tarreaua1629a52012-11-13 20:48:15 +01001113 lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */
1114 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001115 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001116 }
Willy Tarreauabe45b62010-10-28 20:33:46 +02001117
Willy Tarreaue1a908c2012-01-03 09:23:03 +01001118 skip_filters:
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001119 /*****************************************************
1120 * Here we've finished reading all input. Depending on the
1121 * filters, we may still have some analysis to run on the
1122 * collected data and to output data in a new format.
1123 *************************************************** */
Willy Tarreau72c28532009-01-22 18:56:50 +01001124
1125 if (t)
1126 free(t);
1127
1128 if (filter & FILT_COUNT_ONLY) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001129 printf("%d\n", lines_out);
Willy Tarreau72c28532009-01-22 18:56:50 +01001130 exit(0);
1131 }
1132
Willy Tarreau72c28532009-01-22 18:56:50 +01001133 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
1134 /* sort and count all timers. Output will look like this :
1135 * <accept_date> <delta_ms from previous one> <nb entries>
1136 */
1137 n = eb32_first(&timers[0]);
1138
1139 if (n)
1140 last = n->key;
1141 while (n) {
1142 unsigned int d, h, m, s, ms;
1143
1144 t = container_of(n, struct timer, node);
1145 h = n->key;
1146 d = h - last;
1147 last = h;
1148
1149 if (d >= filter_acc_delay && t->count >= filter_acc_count) {
1150 ms = h % 1000; h = h / 1000;
1151 s = h % 60; h = h / 60;
1152 m = h % 60; h = h / 60;
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001153 printf("%02u:%02u:%02u.%03u %d %u %u\n", h, m, s, ms, last, d, t->count);
Willy Tarreau667c9052012-10-10 16:49:28 +02001154 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001155 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001156 break;
Willy Tarreau72c28532009-01-22 18:56:50 +01001157 }
1158 n = eb32_next(n);
1159 }
1160 }
1161 else if (filter & FILT_GRAPH_TIMERS) {
1162 /* sort all timers */
1163 for (f = 0; f < 5; f++) {
1164 struct eb32_node *n;
Willy Tarreau72c28532009-01-22 18:56:50 +01001165
Willy Tarreau72c28532009-01-22 18:56:50 +01001166 n = eb32_first(&timers[f]);
1167 while (n) {
1168 int i;
1169 double d;
Tim Duesterhus785b84b2021-11-04 21:04:24 +01001170 int val;
Willy Tarreau72c28532009-01-22 18:56:50 +01001171
1172 t = container_of(n, struct timer, node);
1173 last = n->key;
1174 val = t->count;
1175
1176 i = (last < 0) ? -last : last;
1177 i = fls_auto(i) - QBITS;
1178
1179 if (i > 0)
1180 d = val / (double)(1 << i);
1181 else
1182 d = val;
1183
Willy Tarreaua1629a52012-11-13 20:48:15 +01001184 if (d > 0.0)
Willy Tarreau72c28532009-01-22 18:56:50 +01001185 printf("%d %d %f\n", f, last, d+1.0);
Willy Tarreau72c28532009-01-22 18:56:50 +01001186
1187 n = eb32_next(n);
1188 }
Willy Tarreau214c2032009-02-20 11:02:32 +01001189 }
1190 }
1191 else if (filter & FILT_PERCENTILE) {
1192 /* report timers by percentile :
1193 * <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
1194 * We don't count errs.
1195 */
1196 struct eb32_node *n[5];
1197 unsigned long cum[5];
1198 double step;
1199
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001200 if (!lines_out)
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001201 goto empty;
1202
Willy Tarreau214c2032009-02-20 11:02:32 +01001203 for (f = 1; f < 5; f++) {
1204 n[f] = eb32_first(&timers[f]);
1205 cum[f] = container_of(n[f], struct timer, node)->count;
1206 }
1207
1208 for (step = 1; step <= 1000;) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001209 unsigned int thres = lines_out * (step / 1000.0);
Willy Tarreau214c2032009-02-20 11:02:32 +01001210
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001211 printf("%3.1f %u ", step/10.0, thres);
Willy Tarreau214c2032009-02-20 11:02:32 +01001212 for (f = 1; f < 5; f++) {
1213 struct eb32_node *next;
1214 while (cum[f] < thres) {
1215 /* need to find other keys */
1216 next = eb32_next(n[f]);
1217 if (!next)
1218 break;
1219 n[f] = next;
1220 cum[f] += container_of(next, struct timer, node)->count;
1221 }
1222
1223 /* value still within $step % of total */
1224 printf("%d ", n[f]->key);
1225 }
1226 putchar('\n');
1227 if (step >= 100 && step < 900)
1228 step += 50; // jump 5% by 5% between those steps.
1229 else if (step >= 20 && step < 980)
1230 step += 10;
1231 else
1232 step += 1;
Willy Tarreau72c28532009-01-22 18:56:50 +01001233 }
1234 }
Willy Tarreau0f423a72010-05-03 10:50:54 +02001235 else if (filter & FILT_COUNT_STATUS) {
1236 /* output all statuses in the form of <status> <occurrences> */
1237 n = eb32_first(&timers[0]);
1238 while (n) {
1239 t = container_of(n, struct timer, node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001240 printf("%d %u\n", n->key, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001241 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001242 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001243 break;
Willy Tarreau0f423a72010-05-03 10:50:54 +02001244 n = eb32_next(n);
1245 }
1246 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001247 else if (filter & FILT_COUNT_SRV_STATUS) {
Willy Tarreaud2201062010-05-27 18:17:30 +02001248 struct ebmb_node *srv_node;
1249 struct srv_st *srv;
1250
1251 printf("#srv_name 1xx 2xx 3xx 4xx 5xx other tot_req req_ok pct_ok avg_ct avg_rt\n");
1252
1253 srv_node = ebmb_first(&timers[0]);
1254 while (srv_node) {
1255 int tot_rq;
1256
1257 srv = container_of(srv_node, struct srv_st, node);
1258
1259 tot_rq = 0;
1260 for (f = 0; f <= 5; f++)
1261 tot_rq += srv->st_cnt[f];
1262
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001263 printf("%s %u %u %u %u %u %u %d %u %.1f %d %d\n",
Willy Tarreaud2201062010-05-27 18:17:30 +02001264 srv_node->key, srv->st_cnt[1], srv->st_cnt[2],
1265 srv->st_cnt[3], srv->st_cnt[4], srv->st_cnt[5], srv->st_cnt[0],
1266 tot_rq,
1267 srv->nb_ok, (double)srv->nb_ok * 100.0 / (tot_rq?tot_rq:1),
1268 (int)(srv->cum_ct / (srv->nb_ct?srv->nb_ct:1)), (int)(srv->cum_rt / (srv->nb_rt?srv->nb_rt:1)));
1269 srv_node = ebmb_next(srv_node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001270 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001271 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001272 break;
Willy Tarreaud2201062010-05-27 18:17:30 +02001273 }
1274 }
Willy Tarreau8a09b662012-10-10 10:26:22 +02001275 else if (filter & (FILT_COUNT_TERM_CODES|FILT_COUNT_COOK_CODES)) {
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001276 /* output all statuses in the form of <code> <occurrences> */
1277 n = eb32_first(&timers[0]);
1278 while (n) {
1279 t = container_of(n, struct timer, node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001280 printf("%c%c %u\n", (n->key >> 8), (n->key) & 255, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001281 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001282 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001283 break;
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001284 n = eb32_next(n);
1285 }
1286 }
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001287 else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) {
Willy Tarreauabe45b62010-10-28 20:33:46 +02001288 struct eb_node *node, *next;
1289
1290 if (!(filter & FILT_COUNT_URL_ONLY)) {
1291 /* we have to sort on another criterion. We'll use timers[1] for the
1292 * destination tree.
1293 */
1294
1295 timers[1] = EB_ROOT; /* reconfigure to accept duplicates */
1296 for (node = eb_first(&timers[0]); node; node = next) {
1297 next = eb_next(node);
1298 eb_delete(node);
1299
1300 ustat = container_of(node, struct url_stat, node.url.node);
1301
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001302 if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT))
Willy Tarreauabe45b62010-10-28 20:33:46 +02001303 ustat->node.val.key = ustat->nb_req;
1304 else if (filter & FILT_COUNT_URL_ERR)
1305 ustat->node.val.key = ustat->nb_err;
1306 else if (filter & FILT_COUNT_URL_TTOT)
1307 ustat->node.val.key = ustat->total_time;
1308 else if (filter & FILT_COUNT_URL_TAVG)
1309 ustat->node.val.key = ustat->nb_req ? ustat->total_time / ustat->nb_req : 0;
1310 else if (filter & FILT_COUNT_URL_TTOTO)
1311 ustat->node.val.key = ustat->total_time_ok;
1312 else if (filter & FILT_COUNT_URL_TAVGO)
1313 ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0;
Baptiste61aaad02012-09-08 23:10:03 +02001314 else if (filter & FILT_COUNT_URL_BAVG)
1315 ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0;
1316 else if (filter & FILT_COUNT_URL_BTOT)
1317 ustat->node.val.key = ustat->total_bytes_sent;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001318 else
1319 ustat->node.val.key = 0;
1320
1321 eb64_insert(&timers[1], &ustat->node.val);
1322 }
1323 /* switch trees */
1324 timers[0] = timers[1];
1325 }
1326
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001327 if (FILT_COUNT_IP_COUNT)
1328 printf("#req err ttot tavg oktot okavg bavg btot src\n");
1329 else
1330 printf("#req err ttot tavg oktot okavg bavg btot url\n");
Willy Tarreauabe45b62010-10-28 20:33:46 +02001331
1332 /* scan the tree in its reverse sorting order */
1333 node = eb_last(&timers[0]);
1334 while (node) {
1335 ustat = container_of(node, struct url_stat, node.url.node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001336 printf("%u %u %llu %llu %llu %llu %llu %llu %s\n",
Willy Tarreauabe45b62010-10-28 20:33:46 +02001337 ustat->nb_req,
1338 ustat->nb_err,
1339 ustat->total_time,
1340 ustat->nb_req ? ustat->total_time / ustat->nb_req : 0,
1341 ustat->total_time_ok,
1342 (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0,
Baptiste61aaad02012-09-08 23:10:03 +02001343 ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0,
1344 ustat->total_bytes_sent,
Willy Tarreauabe45b62010-10-28 20:33:46 +02001345 ustat->url);
1346
1347 node = eb_prev(node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001348 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001349 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001350 break;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001351 }
1352 }
Willy Tarreaud2201062010-05-27 18:17:30 +02001353
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001354 empty:
Willy Tarreau72c28532009-01-22 18:56:50 +01001355 if (!(filter & FILT_QUIET))
1356 fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001357 linenum, lines_out, parse_err);
Willy Tarreau72c28532009-01-22 18:56:50 +01001358 exit(0);
1359}
1360
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001361void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr)
1362{
1363 puts(line);
1364 lines_out++;
1365}
1366
Tim Duesterhus66255f72021-10-28 17:24:02 +02001367void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field)
1368{
1369 const char *e, *f;
1370
1371 if (time_field)
1372 e = field_start(time_field, METH_FIELD - TIME_FIELD + 1);
1373 else
1374 e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1);
1375
1376 while (block-- > 0) {
1377 /* Scan until the start of a capture block ('{') until the URL ('"'). */
1378 while ((*e != '"' && *e != '{') && *e) {
1379 /* Note: some syslog servers escape quotes ! */
1380 if (*e == '\\' && e[1] == '"')
1381 break;
1382
1383 e = field_start(e, 2);
1384 }
1385
1386 if (unlikely(!*e)) {
1387 truncated_line(linenum, line);
1388 return;
1389 }
1390
1391 /* We reached the URL, no more captures will follow. */
1392 if (*e != '{') {
1393 puts("");
1394 lines_out++;
1395 return;
1396 }
1397
1398 /* e points the the opening brace of the capture block. */
1399
1400 e++;
1401 }
1402
1403 /* We are in the first field of the selected capture block. */
1404
1405 while (--field > 0) {
1406 while ((*e != '|' && *e != '}') && *e)
1407 e++;
1408
1409 if (unlikely(!*e)) {
1410 truncated_line(linenum, line);
1411 return;
1412 }
1413
1414 if (*e != '|') {
1415 puts("");
1416 lines_out++;
1417 return;
1418 }
1419
1420 /* e points to the pipe. */
1421
1422 e++;
1423 }
1424
1425 f = e;
1426
1427 while ((*f != '|' && *f != '}') && *f)
1428 f++;
1429
1430 if (unlikely(!*f)) {
1431 truncated_line(linenum, line);
1432 return;
1433 }
1434
1435 fwrite(e, f - e, 1, stdout);
1436 putchar('\n');
1437 lines_out++;
1438}
1439
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001440void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
1441{
1442 struct timer *t2;
1443 int val;
1444
1445 val = convert_date(accept_field);
1446 if (unlikely(val < 0)) {
1447 truncated_line(linenum, line);
1448 return;
1449 }
1450
1451 t2 = insert_value(&timers[0], tptr, val);
1452 t2->count++;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001453 return;
1454}
1455
1456void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr)
1457{
1458 struct timer *t2;
1459 const char *b;
1460 int val;
1461
1462 if (time_field)
1463 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1464 else
1465 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1466
1467 if (unlikely(!*b)) {
1468 truncated_line(linenum, line);
1469 return;
1470 }
1471
1472 val = str2ic(b);
1473
1474 t2 = insert_value(&timers[0], tptr, val);
1475 t2->count++;
1476}
1477
Willy Tarreau8a09b662012-10-10 10:26:22 +02001478void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1479{
1480 struct timer *t2;
1481 const char *b;
1482 int val;
1483
1484 if (time_field)
1485 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1486 else
1487 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1488
1489 if (unlikely(!*b)) {
1490 truncated_line(linenum, line);
1491 return;
1492 }
1493
1494 val = 256 * b[2] + b[3];
1495
1496 t2 = insert_value(&timers[0], tptr, val);
1497 t2->count++;
1498}
1499
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001500void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1501{
1502 struct timer *t2;
1503 const char *b;
1504 int val;
1505
1506 if (time_field)
1507 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1508 else
1509 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1510
1511 if (unlikely(!*b)) {
1512 truncated_line(linenum, line);
1513 return;
1514 }
1515
1516 val = 256 * b[0] + b[1];
1517
1518 t2 = insert_value(&timers[0], tptr, val);
1519 t2->count++;
1520}
1521
1522void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr)
1523{
1524 const char *b, *e, *p;
1525 int f, err, array[5];
1526 struct ebmb_node *srv_node;
1527 struct srv_st *srv;
1528 int val;
1529
1530 /* the server field is before the status field, so let's
1531 * parse them in the proper order.
1532 */
1533 b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1);
1534 if (unlikely(!*b)) {
1535 truncated_line(linenum, line);
1536 return;
1537 }
1538
1539 e = field_stop(b + 1); /* we have the server name in [b]..[e-1] */
1540
1541 /* the chance that a server name already exists is extremely high,
1542 * so let's perform a normal lookup first.
1543 */
1544 srv_node = ebst_lookup_len(&timers[0], b, e - b);
1545 srv = container_of(srv_node, struct srv_st, node);
1546
1547 if (!srv_node) {
1548 /* server not yet in the tree, let's create it */
1549 srv = (void *)calloc(1, sizeof(struct srv_st) + e - b + 1);
1550 srv_node = &srv->node;
1551 memcpy(&srv_node->key, b, e - b);
1552 srv_node->key[e - b] = '\0';
1553 ebst_insert(&timers[0], srv_node);
1554 }
1555
1556 /* let's collect the connect and response times */
1557 if (!time_field) {
1558 time_field = field_start(e, TIME_FIELD - SERVER_FIELD);
1559 if (unlikely(!*time_field)) {
1560 truncated_line(linenum, line);
1561 return;
1562 }
1563 }
1564
1565 e = field_stop(time_field + 1);
1566 /* we have field TIME_FIELD in [time_field]..[e-1] */
1567
1568 p = time_field;
1569 err = 0;
1570 f = 0;
1571 while (!SEP(*p)) {
1572 array[f] = str2ic(p);
1573 if (array[f] < 0) {
1574 array[f] = -1;
1575 err = 1;
1576 }
1577 if (++f == 5)
1578 break;
1579 SKIP_CHAR(p, '/');
1580 }
1581
1582 if (unlikely(f < 5)){
1583 parse_err++;
1584 return;
1585 }
1586
1587 /* OK we have our timers in array[2,3] */
1588 if (!err)
1589 srv->nb_ok++;
1590
1591 if (array[2] >= 0) {
1592 srv->cum_ct += array[2];
1593 srv->nb_ct++;
1594 }
1595
1596 if (array[3] >= 0) {
1597 srv->cum_rt += array[3];
1598 srv->nb_rt++;
1599 }
1600
1601 /* we're interested in the 5 HTTP status classes (1xx ... 5xx), and
1602 * the invalid ones which will be reported as 0.
1603 */
1604 b = field_start(e, STATUS_FIELD - TIME_FIELD);
1605 if (unlikely(!*b)) {
1606 truncated_line(linenum, line);
1607 return;
1608 }
1609
1610 val = 0;
1611 if (*b >= '1' && *b <= '5')
1612 val = *b - '0';
1613
1614 srv->st_cnt[val]++;
1615}
1616
1617void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr)
1618{
1619 struct url_stat *ustat = NULL;
1620 struct ebpt_node *ebpt_old;
1621 const char *b, *e;
1622 int f, err, array[5];
Baptiste61aaad02012-09-08 23:10:03 +02001623 int val;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001624
1625 /* let's collect the response time */
1626 if (!time_field) {
1627 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1628 if (unlikely(!*time_field)) {
1629 truncated_line(linenum, line);
1630 return;
1631 }
1632 }
1633
1634 /* we have the field TIME_FIELD starting at <time_field>. We'll
1635 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1636 */
1637 e = time_field; err = 0; f = 0;
1638 while (!SEP(*e)) {
1639 array[f] = str2ic(e);
1640 if (array[f] < 0) {
1641 array[f] = -1;
1642 err = 1;
1643 }
1644 if (++f == 5)
1645 break;
1646 SKIP_CHAR(e, '/');
1647 }
1648 if (f < 5) {
1649 parse_err++;
1650 return;
1651 }
1652
1653 /* OK we have our timers in array[3], and err is >0 if at
1654 * least one -1 was seen. <e> points to the first char of
1655 * the last timer. Let's prepare a new node with that.
1656 */
1657 if (unlikely(!ustat))
1658 ustat = calloc(1, sizeof(*ustat));
1659
1660 ustat->nb_err = err;
1661 ustat->nb_req = 1;
1662
1663 /* use array[4] = total time in case of error */
1664 ustat->total_time = (array[3] >= 0) ? array[3] : array[4];
1665 ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0;
1666
Baptiste61aaad02012-09-08 23:10:03 +02001667 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1668 val = str2ic(e);
1669 ustat->total_bytes_sent = val;
1670
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001671 /* the line may be truncated because of a bad request or anything like this,
1672 * without a method. Also, if it does not begin with an quote, let's skip to
1673 * the next field because it's a capture. Let's fall back to the "method" itself
1674 * if there's nothing else.
1675 */
Baptiste61aaad02012-09-08 23:10:03 +02001676 e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001677 while (*e != '"' && *e) {
1678 /* Note: some syslog servers escape quotes ! */
1679 if (*e == '\\' && e[1] == '"')
1680 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001681 e = field_start(e, 2);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001682 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001683
1684 if (unlikely(!*e)) {
1685 truncated_line(linenum, line);
Ilya Shipitsin4473a2e2017-09-22 22:33:16 +05001686 free(ustat);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001687 return;
1688 }
1689
1690 b = field_start(e, URL_FIELD - METH_FIELD + 1); // avg 40 ns per line
1691 if (!*b)
1692 b = e;
1693
1694 /* stop at end of field or first ';' or '?', takes avg 64 ns per line */
1695 e = b;
1696 do {
Tim Duesterhus24b8d692021-10-18 12:12:02 +02001697 if (*e == ' '||
1698 (!(filter2 & FILT2_PRESERVE_QUERY) && (*e == '?' || *e == ';'))) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001699 *(char *)e = 0;
1700 break;
1701 }
1702 e++;
1703 } while (*e);
1704
1705 /* now instead of copying the URL for a simple lookup, we'll link
1706 * to it from the node we're trying to insert. If it returns a
1707 * different value, it was already there. Otherwise we just have
1708 * to dynamically realloc an entry using strdup().
1709 */
1710 ustat->node.url.key = (char *)b;
1711 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1712
1713 if (ebpt_old != &ustat->node.url) {
1714 struct url_stat *ustat_old;
1715 /* node was already there, let's update previous one */
1716 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1717 ustat_old->nb_req ++;
1718 ustat_old->nb_err += ustat->nb_err;
1719 ustat_old->total_time += ustat->total_time;
1720 ustat_old->total_time_ok += ustat->total_time_ok;
Baptiste61aaad02012-09-08 23:10:03 +02001721 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001722 } else {
1723 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1724 ustat = NULL; /* node was used */
1725 }
1726}
1727
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001728void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr)
1729{
1730 struct url_stat *ustat = NULL;
1731 struct ebpt_node *ebpt_old;
1732 const char *b, *e;
1733 int f, err, array[5];
1734 int val;
1735
1736 /* let's collect the response time */
1737 if (!time_field) {
1738 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1739 if (unlikely(!*time_field)) {
1740 truncated_line(linenum, line);
1741 return;
1742 }
1743 }
1744
1745 /* we have the field TIME_FIELD starting at <time_field>. We'll
1746 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1747 */
1748 e = time_field; err = 0; f = 0;
1749 while (!SEP(*e)) {
1750 if (f == 0 || f == 4) {
1751 array[f] = str2ic(e);
1752 if (array[f] < 0) {
1753 array[f] = -1;
1754 err = 1;
1755 }
1756 }
1757 if (++f == 5)
1758 break;
1759 SKIP_CHAR(e, '/');
1760 }
1761 if (f < 5) {
1762 parse_err++;
1763 return;
1764 }
1765
1766 /* OK we have our timers in array[0], and err is >0 if at
1767 * least one -1 was seen. <e> points to the first char of
1768 * the last timer. Let's prepare a new node with that.
1769 */
1770 if (unlikely(!ustat))
1771 ustat = calloc(1, sizeof(*ustat));
1772
1773 ustat->nb_err = err;
1774 ustat->nb_req = 1;
1775
1776 /* use array[4] = total time in case of error */
1777 ustat->total_time = (array[0] >= 0) ? array[0] : array[4];
1778 ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0;
1779
1780 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1781 val = str2ic(e);
1782 ustat->total_bytes_sent = val;
1783
1784 /* the source might be IPv4 or IPv6, so we always strip the port by
1785 * removing the last colon.
1786 */
1787 b = source_field;
1788 e = field_stop(b + 1);
1789 while (e > b && e[-1] != ':')
1790 e--;
1791 *(char *)(e - 1) = '\0';
1792
1793 /* now instead of copying the src for a simple lookup, we'll link
1794 * to it from the node we're trying to insert. If it returns a
1795 * different value, it was already there. Otherwise we just have
1796 * to dynamically realloc an entry using strdup(). We're using the
1797 * <url> field of the node to store the source address.
1798 */
1799 ustat->node.url.key = (char *)b;
1800 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1801
1802 if (ebpt_old != &ustat->node.url) {
1803 struct url_stat *ustat_old;
1804 /* node was already there, let's update previous one */
1805 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1806 ustat_old->nb_req ++;
1807 ustat_old->nb_err += ustat->nb_err;
1808 ustat_old->total_time += ustat->total_time;
1809 ustat_old->total_time_ok += ustat->total_time_ok;
1810 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
1811 } else {
1812 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1813 ustat = NULL; /* node was used */
1814 }
1815}
1816
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001817void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr)
1818{
1819 struct timer *t2;
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001820 const char *p;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001821 int f, err, array[5];
1822
1823 if (!time_field) {
1824 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
1825 if (unlikely(!*time_field)) {
1826 truncated_line(linenum, line);
1827 return;
1828 }
1829 }
1830
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001831 field_stop(time_field + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001832 /* we have field TIME_FIELD in [time_field]..[e-1] */
1833
1834 p = time_field;
1835 err = 0;
1836 f = 0;
1837 while (!SEP(*p)) {
1838 array[f] = str2ic(p);
1839 if (array[f] < 0) {
1840 array[f] = -1;
1841 err = 1;
1842 }
1843 if (++f == 5)
1844 break;
1845 SKIP_CHAR(p, '/');
1846 }
1847
1848 if (unlikely(f < 5)) {
1849 parse_err++;
1850 return;
1851 }
1852
1853 /* if we find at least one negative time, we count one error
1854 * with a time equal to the total session time. This will
1855 * emphasize quantum timing effects associated to known
1856 * timeouts. Note that on some buggy machines, it is possible
1857 * that the total time is negative, hence the reason to reset
1858 * it.
1859 */
1860
1861 if (filter & FILT_GRAPH_TIMERS) {
1862 if (err) {
1863 if (array[4] < 0)
1864 array[4] = -1;
1865 t2 = insert_timer(&timers[0], tptr, array[4]); // total time
1866 t2->count++;
1867 } else {
1868 int v;
1869
1870 t2 = insert_timer(&timers[1], tptr, array[0]); t2->count++; // req
1871 t2 = insert_timer(&timers[2], tptr, array[2]); t2->count++; // conn
1872 t2 = insert_timer(&timers[3], tptr, array[3]); t2->count++; // resp
1873
1874 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1875 if (v < 0 && !(filter & FILT_QUIET))
1876 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1877 line, array[0], array[1], array[2], array[3], array[4], v);
1878 t2 = insert_timer(&timers[4], tptr, v); t2->count++;
1879 lines_out++;
1880 }
1881 } else { /* percentile */
1882 if (err) {
1883 if (array[4] < 0)
1884 array[4] = -1;
1885 t2 = insert_value(&timers[0], tptr, array[4]); // total time
1886 t2->count++;
1887 } else {
1888 int v;
1889
1890 t2 = insert_value(&timers[1], tptr, array[0]); t2->count++; // req
1891 t2 = insert_value(&timers[2], tptr, array[2]); t2->count++; // conn
1892 t2 = insert_value(&timers[3], tptr, array[3]); t2->count++; // resp
1893
1894 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1895 if (v < 0 && !(filter & FILT_QUIET))
1896 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1897 line, array[0], array[1], array[2], array[3], array[4], v);
1898 t2 = insert_value(&timers[4], tptr, v); t2->count++;
1899 lines_out++;
1900 }
1901 }
1902}
1903
1904
Willy Tarreau72c28532009-01-22 18:56:50 +01001905/*
1906 * Local variables:
1907 * c-indent-level: 8
1908 * c-basic-offset: 8
1909 * End:
1910 */