blob: f2b9dd7e70b61754ae515d19aa30f84371e213c7 [file] [log] [blame]
Willy Tarreau72c28532009-01-22 18:56:50 +01001/*
Willy Tarreaud8fc1102010-09-12 17:56:16 +02002 * haproxy log statistics reporter
Willy Tarreau72c28532009-01-22 18:56:50 +01003 *
Willy Tarreau8a09b662012-10-10 10:26:22 +02004 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
Willy Tarreau72c28532009-01-22 18:56:50 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau72c28532009-01-22 18:56:50 +010013#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
18#include <string.h>
19#include <unistd.h>
20#include <ctype.h>
Olivier Burgarde97b9042014-05-22 16:44:59 +020021#include <time.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010022
Willy Tarreaue9f4d672021-11-08 10:02:52 +010023#include <haproxy/compiler.h>
24
Willy Tarreau8d2b7772020-05-27 10:58:19 +020025#include <import/eb32tree.h>
26#include <import/eb64tree.h>
27#include <import/ebistree.h>
28#include <import/ebsttree.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010029
Willy Tarreaud2201062010-05-27 18:17:30 +020030#define SOURCE_FIELD 5
Willy Tarreau72c28532009-01-22 18:56:50 +010031#define ACCEPT_FIELD 6
Willy Tarreaud2201062010-05-27 18:17:30 +020032#define SERVER_FIELD 8
Willy Tarreau72c28532009-01-22 18:56:50 +010033#define TIME_FIELD 9
34#define STATUS_FIELD 10
Baptiste61aaad02012-09-08 23:10:03 +020035#define BYTES_SENT_FIELD 11
Willy Tarreaud8fc1102010-09-12 17:56:16 +020036#define TERM_CODES_FIELD 14
Willy Tarreau72c28532009-01-22 18:56:50 +010037#define CONN_FIELD 15
Willy Tarreau08911ff2011-10-13 13:28:36 +020038#define QUEUE_LEN_FIELD 16
Willy Tarreauabe45b62010-10-28 20:33:46 +020039#define METH_FIELD 17
40#define URL_FIELD 18
Willy Tarreau72c28532009-01-22 18:56:50 +010041#define MAXLINE 16384
42#define QBITS 4
43
Willy Tarreaudf6f0d12011-07-10 18:15:08 +020044#define SEP(c) ((unsigned char)(c) <= ' ')
45#define SKIP_CHAR(p,c) do { while (1) { int __c = (unsigned char)*p++; if (__c == c) break; if (__c <= ' ') { p--; break; } } } while (0)
Willy Tarreau72c28532009-01-22 18:56:50 +010046
47/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
48static struct eb_root timers[5] = {
49 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
50 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
51};
52
53struct timer {
54 struct eb32_node node;
55 unsigned int count;
56};
57
Willy Tarreaud2201062010-05-27 18:17:30 +020058struct srv_st {
59 unsigned int st_cnt[6]; /* 0xx to 5xx */
60 unsigned int nb_ct, nb_rt, nb_ok;
61 unsigned long long cum_ct, cum_rt;
62 struct ebmb_node node;
63 /* don't put anything else here, the server name will be there */
64};
Willy Tarreau72c28532009-01-22 18:56:50 +010065
Willy Tarreauabe45b62010-10-28 20:33:46 +020066struct url_stat {
67 union {
68 struct ebpt_node url;
69 struct eb64_node val;
70 } node;
71 char *url;
72 unsigned long long total_time; /* sum(all reqs' times) */
73 unsigned long long total_time_ok; /* sum(all OK reqs' times) */
Baptiste61aaad02012-09-08 23:10:03 +020074 unsigned long long total_bytes_sent; /* sum(all bytes sent) */
Willy Tarreauabe45b62010-10-28 20:33:46 +020075 unsigned int nb_err, nb_req;
76};
77
Willy Tarreau72c28532009-01-22 18:56:50 +010078#define FILT_COUNT_ONLY 0x01
79#define FILT_INVERT 0x02
80#define FILT_QUIET 0x04
81#define FILT_ERRORS_ONLY 0x08
82#define FILT_ACC_DELAY 0x10
83#define FILT_ACC_COUNT 0x20
84#define FILT_GRAPH_TIMERS 0x40
Willy Tarreau214c2032009-02-20 11:02:32 +010085#define FILT_PERCENTILE 0x80
Willy Tarreau5bdfd962009-10-14 15:16:29 +020086#define FILT_TIME_RESP 0x100
87
88#define FILT_INVERT_ERRORS 0x200
89#define FILT_INVERT_TIME_RESP 0x400
Willy Tarreau72c28532009-01-22 18:56:50 +010090
Willy Tarreau0f423a72010-05-03 10:50:54 +020091#define FILT_COUNT_STATUS 0x800
Willy Tarreaud2201062010-05-27 18:17:30 +020092#define FILT_COUNT_SRV_STATUS 0x1000
Willy Tarreaud8fc1102010-09-12 17:56:16 +020093#define FILT_COUNT_TERM_CODES 0x2000
Willy Tarreau0f423a72010-05-03 10:50:54 +020094
Willy Tarreauabe45b62010-10-28 20:33:46 +020095#define FILT_COUNT_URL_ONLY 0x004000
96#define FILT_COUNT_URL_COUNT 0x008000
97#define FILT_COUNT_URL_ERR 0x010000
98#define FILT_COUNT_URL_TTOT 0x020000
99#define FILT_COUNT_URL_TAVG 0x040000
100#define FILT_COUNT_URL_TTOTO 0x080000
101#define FILT_COUNT_URL_TAVGO 0x100000
Willy Tarreauabe45b62010-10-28 20:33:46 +0200102
Willy Tarreau70c428f2011-07-10 17:27:40 +0200103#define FILT_HTTP_ONLY 0x200000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200104#define FILT_TERM_CODE_NAME 0x400000
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200105#define FILT_INVERT_TERM_CODE_NAME 0x800000
Willy Tarreau70c428f2011-07-10 17:27:40 +0200106
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200107#define FILT_HTTP_STATUS 0x1000000
108#define FILT_INVERT_HTTP_STATUS 0x2000000
Willy Tarreau08911ff2011-10-13 13:28:36 +0200109#define FILT_QUEUE_ONLY 0x4000000
110#define FILT_QUEUE_SRV_ONLY 0x8000000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200111
Baptiste61aaad02012-09-08 23:10:03 +0200112#define FILT_COUNT_URL_BAVG 0x10000000
113#define FILT_COUNT_URL_BTOT 0x20000000
114
115#define FILT_COUNT_URL_ANY (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
116 FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \
117 FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
118
Willy Tarreau8a09b662012-10-10 10:26:22 +0200119#define FILT_COUNT_COOK_CODES 0x40000000
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100120#define FILT_COUNT_IP_COUNT 0x80000000
Willy Tarreau8a09b662012-10-10 10:26:22 +0200121
Tim Duesterhus24b8d692021-10-18 12:12:02 +0200122#define FILT2_TIMESTAMP 0x01
123#define FILT2_PRESERVE_QUERY 0x02
Tim Duesterhus66255f72021-10-28 17:24:02 +0200124#define FILT2_EXTRACT_CAPTURE 0x04
Olivier Burgarde97b9042014-05-22 16:44:59 +0200125
Willy Tarreau72c28532009-01-22 18:56:50 +0100126unsigned int filter = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200127unsigned int filter2 = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100128unsigned int filter_invert = 0;
Willy Tarreau214c2032009-02-20 11:02:32 +0100129const char *line;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200130int linenum = 0;
131int parse_err = 0;
132int lines_out = 0;
Willy Tarreau667c9052012-10-10 16:49:28 +0200133int lines_max = -1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100134
Willy Tarreau214c2032009-02-20 11:02:32 +0100135const char *fgets2(FILE *stream);
Willy Tarreau72c28532009-01-22 18:56:50 +0100136
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200137void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100138void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200139void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau8a09b662012-10-10 10:26:22 +0200140void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200141void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr);
142void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
143void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
144void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
Tim Duesterhus66255f72021-10-28 17:24:02 +0200145void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200146void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
147
Willy Tarreau615674c2012-01-23 08:15:51 +0100148void usage(FILE *output, const char *msg)
Willy Tarreau72c28532009-01-22 18:56:50 +0100149{
Willy Tarreau615674c2012-01-23 08:15:51 +0100150 fprintf(output,
Willy Tarreau72c28532009-01-22 18:56:50 +0100151 "%s"
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100152 "Usage:\n"
153 " halog [-h|--help] for long help\n"
154 " halog [input_filters]* [modifiers]* [output_format] < log\n"
155 " inp = [-e|-E] [-H] [-Q|-QS] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
156 " [-hs|-HS [min][:[max]]] [-tcn|-TCN <termcode>] [-time [min][:[max]]]\n"
157 " mod = [-q] [-v] [-m <lines>] [-s <skipflds>] [-query]\n"
158 " out = {-c|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-hdr <block>:<field>|\n"
159 " -cc|-gt|-pct|-st|-tc|-srv|-ic}\n"
Willy Tarreau72c28532009-01-22 18:56:50 +0100160 "\n",
161 msg ? msg : ""
162 );
Willy Tarreau615674c2012-01-23 08:15:51 +0100163}
164
165void die(const char *msg)
166{
167 usage(stderr, msg);
Willy Tarreau72c28532009-01-22 18:56:50 +0100168 exit(1);
169}
170
Willy Tarreau615674c2012-01-23 08:15:51 +0100171void help()
172{
173 usage(stdout, NULL);
174 printf(
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100175 "Input filters - several filters may be combined\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100176 " -H only match lines containing HTTP logs (ignore TCP)\n"
177 " -E only match lines without any error (no 5xx status)\n"
178 " -e only match lines with errors (status 5xx or negative)\n"
179 " -rt|-RT <time> only match response times larger|smaller than <time>\n"
180 " -Q|-QS only match queued requests (any queue|server queue)\n"
181 " -tcn|-TCN <code> only match requests with/without termination code <code>\n"
182 " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n"
183 " within min..max. Any of them may be omitted. Exact\n"
184 " code is checked for if no ':' is specified.\n"
Olivier Burgarde97b9042014-05-22 16:44:59 +0200185 " -time <[min][:max]> only match requests recorded between timestamps.\n"
186 " Any of them may be omitted.\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100187 "Modifiers\n"
188 " -v invert the input filtering condition\n"
189 " -q don't report errors/warnings\n"
Willy Tarreau667c9052012-10-10 16:49:28 +0200190 " -m <lines> limit output to the first <lines> lines\n"
Tim Duesterhuse0992582021-10-28 15:55:49 +0200191 " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n"
192 " you can also use -n to start from earlier then field %d\n"
Tim Duesterhus385338b2021-10-28 16:36:03 +0200193 " -query preserve the query string for per-URL (-u*) statistics\n"
Tim Duesterhuse0992582021-10-28 15:55:49 +0200194 "\n"
Willy Tarreau87e7eaf2021-11-08 08:37:40 +0100195 "Output format - only one may be used at a time\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100196 " -c only report the number of lines that would have been printed\n"
197 " -pct output connect and response times percentiles\n"
198 " -st output number of requests per HTTP status code\n"
Willy Tarreau8a09b662012-10-10 10:26:22 +0200199 " -cc output number of requests per cookie code (2 chars)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100200 " -tc output number of requests per termination code (2 chars)\n"
201 " -srv output statistics per server (time, requests, errors)\n"
Aleksandar Lazi6112f5c2020-05-15 22:58:30 +0200202 " -ic output statistics per ip count (time, requests, errors)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100203 " -u* output statistics per URL (time, requests, errors)\n"
204 " Additional characters indicate the output sorting key :\n"
205 " -u : by URL, -uc : request count, -ue : error count\n"
Willy Tarreau4201df72012-10-10 14:57:35 +0200206 " -ua : average response time, -ut : average total time\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100207 " -uao, -uto: average times computed on valid ('OK') requests\n"
Tim Duesterhus66255f72021-10-28 17:24:02 +0200208 " -uba, -ubt: average bytes returned, total bytes returned\n"
Willy Tarreau8be893d2022-11-25 11:10:19 +0100209 " -hdr output captured header at the given <block>:<field>\n"
210 " -ac <count> -ad <delay>:\n"
Willy Tarreau286199c2022-11-25 09:40:06 +0100211 " Report periods corresponding to a grouped accept of <count> requests at\n"
212 " the same millisecond after a delay of at least <ad> milliseconds with no\n"
213 " incoming accept (used to spot network outages). Output format contains:\n"
Willy Tarreau8be893d2022-11-25 11:10:19 +0100214 " <accept_date> <date_ms> <delta_ms from previous one> <nb_entries>\n",
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +0200215 (int)SOURCE_FIELD, (int)SOURCE_FIELD
Willy Tarreau615674c2012-01-23 08:15:51 +0100216 );
217 exit(0);
218}
219
Willy Tarreau72c28532009-01-22 18:56:50 +0100220
221/* return pointer to first char not part of current field starting at <p>. */
Willy Tarreauf9042062011-09-10 12:26:35 +0200222
223#if defined(__i386__)
224/* this one is always faster on 32-bits */
225static inline const char *field_stop(const char *p)
226{
227 asm(
228 /* Look for spaces */
229 "4: \n\t"
230 "inc %0 \n\t"
231 "cmpb $0x20, -1(%0) \n\t"
232 "ja 4b \n\t"
233 "jz 3f \n\t"
234
235 /* we only get there for control chars 0..31. Leave if we find '\0' */
236 "cmpb $0x0, -1(%0) \n\t"
237 "jnz 4b \n\t"
238
239 /* return %0-1 = position of the last char we checked */
240 "3: \n\t"
241 "dec %0 \n\t"
242 : "=r" (p)
243 : "0" (p)
244 );
245 return p;
246}
247#else
Willy Tarreau72c28532009-01-22 18:56:50 +0100248const char *field_stop(const char *p)
249{
250 unsigned char c;
251
252 while (1) {
253 c = *(p++);
254 if (c > ' ')
255 continue;
Willy Tarreau14389e72011-07-10 22:11:17 +0200256 if (c == ' ' || c == 0)
Willy Tarreau72c28532009-01-22 18:56:50 +0100257 break;
258 }
259 return p - 1;
260}
Willy Tarreauf9042062011-09-10 12:26:35 +0200261#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100262
Willy Tarreaue9f4d672021-11-08 10:02:52 +0100263/* return non-zero if the argument contains at least one zero byte. See principle above. */
264static inline __attribute__((unused)) unsigned long long has_zero64(unsigned long long x)
265{
266 unsigned long long y;
267
268 y = x - 0x0101010101010101ULL; /* generate a carry */
269 y &= ~x; /* clear the bits that were already set */
270 return y & 0x8080808080808080ULL;
271}
272
Willy Tarreau72c28532009-01-22 18:56:50 +0100273/* return field <field> (starting from 1) in string <p>. Only consider
274 * contiguous spaces (or tabs) as one delimiter. May return pointer to
275 * last char if field is not found. Equivalent to awk '{print $field}'.
276 */
277const char *field_start(const char *p, int field)
278{
Willy Tarreauf9042062011-09-10 12:26:35 +0200279#ifndef PREFER_ASM
Willy Tarreau72c28532009-01-22 18:56:50 +0100280 unsigned char c;
281 while (1) {
282 /* skip spaces */
283 while (1) {
Willy Tarreauf9042062011-09-10 12:26:35 +0200284 c = *(p++);
Willy Tarreau72c28532009-01-22 18:56:50 +0100285 if (!c) /* end of line */
Willy Tarreauf9042062011-09-10 12:26:35 +0200286 return p-1;
Willy Tarreaufc76bbc2021-11-08 09:58:22 +0100287 if (c == ' ')
288 continue;
Willy Tarreau72c28532009-01-22 18:56:50 +0100289 /* other char => new field */
290 break;
Willy Tarreau72c28532009-01-22 18:56:50 +0100291 }
292
293 /* start of field */
294 field--;
295 if (!field)
Willy Tarreauf9042062011-09-10 12:26:35 +0200296 return p-1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100297
298 /* skip this field */
299 while (1) {
Willy Tarreaue9f4d672021-11-08 10:02:52 +0100300#if defined(HA_UNALIGNED_LE64)
301 unsigned long long l = *(unsigned long long *)p;
302 if (!has_zero64(l)) {
303 l ^= 0x2020202020202020;
304 l = has_zero64(l);
305 if (!l) {
306 p += 8;
307 continue;
308 }
309 /* there is at least one space, find it and
310 * skip it now. The lowest byte in <l> with
311 * a 0x80 is the right one, but checking for
312 * it remains slower than testing each byte,
313 * probably due to the numerous short fields.
314 */
315 while (*(p++) != ' ')
316 ;
317 break;
318 }
319#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100320 c = *(p++);
Willy Tarreau72c28532009-01-22 18:56:50 +0100321 if (c == '\0')
Willy Tarreauf9042062011-09-10 12:26:35 +0200322 return p - 1;
Willy Tarreaufc76bbc2021-11-08 09:58:22 +0100323 if (c == ' ')
324 break;
Willy Tarreau72c28532009-01-22 18:56:50 +0100325 }
326 }
Willy Tarreauf9042062011-09-10 12:26:35 +0200327#else
328 /* This version works optimally on i386 and x86_64 but the code above
329 * shows similar performance. However, depending on the version of GCC
330 * used, inlining rules change and it may have difficulties to make
331 * efficient use of this code at other locations and could result in
332 * worse performance (eg: gcc 4.4). You may want to experience.
333 */
334 asm(
335 /* skip spaces */
336 "1: \n\t"
337 "inc %0 \n\t"
338 "cmpb $0x20, -1(%0) \n\t"
339 "ja 2f \n\t"
340 "jz 1b \n\t"
341
342 /* we only get there for control chars 0..31. Leave if we find '\0' */
343 "cmpb $0x0, -1(%0) \n\t"
344 "jz 3f \n\t"
345
346 /* start of field at [%0-1]. Check if we need to skip more fields */
347 "2: \n\t"
348 "dec %1 \n\t"
349 "jz 3f \n\t"
350
351 /* Look for spaces */
352 "4: \n\t"
353 "inc %0 \n\t"
354 "cmpb $0x20, -1(%0) \n\t"
355 "jz 1b \n\t"
356 "ja 4b \n\t"
357
358 /* we only get there for control chars 0..31. Leave if we find '\0' */
359 "cmpb $0x0, -1(%0) \n\t"
360 "jnz 4b \n\t"
361
362 /* return %0-1 = position of the last char we checked */
363 "3: \n\t"
364 "dec %0 \n\t"
365 : "=r" (p)
366 : "r" (field), "0" (p)
367 );
368 return p;
369#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100370}
371
372/* keep only the <bits> higher bits of <i> */
373static inline unsigned int quantify_u32(unsigned int i, int bits)
374{
375 int high;
376
377 if (!bits)
378 return 0;
379
380 if (i)
381 high = fls_auto(i); // 1 to 32
382 else
383 high = 0;
384
385 if (high <= bits)
386 return i;
387
388 return i & ~((1 << (high - bits)) - 1);
389}
390
391/* keep only the <bits> higher bits of the absolute value of <i>, as well as
392 * its sign. */
393static inline int quantify(int i, int bits)
394{
395 if (i >= 0)
396 return quantify_u32(i, bits);
397 else
398 return -quantify_u32(-i, bits);
399}
400
401/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
402 * in <alloc>. It may be NULL, in which case the function will allocate it
403 * itself. It will be reset to NULL once consumed. The caller is responsible
404 * for freeing the node once not used anymore. The node where the value was
405 * inserted is returned.
406 */
407struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
408{
409 struct timer *t = *alloc;
410 struct eb32_node *n;
411
412 if (!t) {
413 t = calloc(sizeof(*t), 1);
414 if (unlikely(!t)) {
415 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
416 exit(1);
417 }
418 }
419 t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits
420
421 n = eb32i_insert(r, &t->node);
422 if (n == &t->node)
423 t = NULL; /* node inserted, will malloc next time */
424
425 *alloc = t;
426 return container_of(n, struct timer, node);
427}
428
429/* Insert value value <v> into tree <r>. A pre-allocated node must be passed
430 * in <alloc>. It may be NULL, in which case the function will allocate it
431 * itself. It will be reset to NULL once consumed. The caller is responsible
432 * for freeing the node once not used anymore. The node where the value was
433 * inserted is returned.
434 */
435struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
436{
437 struct timer *t = *alloc;
438 struct eb32_node *n;
439
440 if (!t) {
441 t = calloc(sizeof(*t), 1);
442 if (unlikely(!t)) {
443 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
444 exit(1);
445 }
446 }
447 t->node.key = v;
448
449 n = eb32i_insert(r, &t->node);
450 if (n == &t->node)
451 t = NULL; /* node inserted, will malloc next time */
452
453 *alloc = t;
454 return container_of(n, struct timer, node);
455}
456
457int str2ic(const char *s)
458{
459 int i = 0;
460 int j, k;
461
462 if (*s != '-') {
463 /* positive number */
464 while (1) {
465 j = (*s++) - '0';
466 k = i * 10;
467 if ((unsigned)j > 9)
468 break;
469 i = k + j;
470 }
471 } else {
472 /* negative number */
473 s++;
474 while (1) {
475 j = (*s++) - '0';
476 k = i * 10;
477 if ((unsigned)j > 9)
478 break;
479 i = k - j;
480 }
481 }
482
483 return i;
484}
485
486
Willy Tarreau72c28532009-01-22 18:56:50 +0100487/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
488 * the day in milliseconds. It returns -1 for all unparsable values. The parser
489 * looks ugly but gcc emits far better code that way.
490 */
491int convert_date(const char *field)
492{
493 unsigned int h, m, s, ms;
494 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600495 const char *e;
Willy Tarreau72c28532009-01-22 18:56:50 +0100496
497 h = m = s = ms = 0;
498 e = field;
499
500 /* skip the date */
501 while (1) {
502 c = *(e++);
503 if (c == ':')
504 break;
505 if (!c)
506 goto out_err;
507 }
508
509 /* hour + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100510 while (1) {
511 c = *(e++) - '0';
512 if (c > 9)
513 break;
514 h = h * 10 + c;
515 }
516 if (c == (unsigned char)(0 - '0'))
517 goto out_err;
518
519 /* minute + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100520 while (1) {
521 c = *(e++) - '0';
522 if (c > 9)
523 break;
524 m = m * 10 + c;
525 }
526 if (c == (unsigned char)(0 - '0'))
527 goto out_err;
528
529 /* second + '.' or ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100530 while (1) {
531 c = *(e++) - '0';
532 if (c > 9)
533 break;
534 s = s * 10 + c;
535 }
536 if (c == (unsigned char)(0 - '0'))
537 goto out_err;
538
539 /* if there's a '.', we have milliseconds */
540 if (c == (unsigned char)('.' - '0')) {
541 /* millisecond second + ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100542 while (1) {
543 c = *(e++) - '0';
544 if (c > 9)
545 break;
546 ms = ms * 10 + c;
547 }
548 if (c == (unsigned char)(0 - '0'))
549 goto out_err;
550 }
551 return (((h * 60) + m) * 60 + s) * 1000 + ms;
552 out_err:
553 return -1;
554}
555
Olivier Burgarde97b9042014-05-22 16:44:59 +0200556/* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp.
557 * It returns -1 for all unparsable values. The parser
558 * looks ugly but gcc emits far better code that way.
559 */
560int convert_date_to_timestamp(const char *field)
561{
562 unsigned int d, mo, y, h, m, s;
563 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600564 const char *e;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200565 time_t rawtime;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200566 static struct tm * timeinfo;
567 static int last_res;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200568
569 d = mo = y = h = m = s = 0;
570 e = field;
571
Tim Duesterhus785b84b2021-11-04 21:04:24 +0100572 e++; // remove '['
573
Olivier Burgarde97b9042014-05-22 16:44:59 +0200574 /* day + '/' */
575 while (1) {
576 c = *(e++) - '0';
577 if (c > 9)
578 break;
579 d = d * 10 + c;
580 if (c == (unsigned char)(0 - '0'))
581 goto out_err;
582 }
583
584 /* month + '/' */
585 c = *(e++);
586 if (c =='F') {
587 mo = 2;
588 e = e+3;
589 } else if (c =='S') {
590 mo = 9;
591 e = e+3;
592 } else if (c =='O') {
593 mo = 10;
594 e = e+3;
595 } else if (c =='N') {
596 mo = 11;
597 e = e+3;
598 } else if (c == 'D') {
599 mo = 12;
600 e = e+3;
601 } else if (c == 'A') {
602 c = *(e++);
603 if (c == 'p') {
604 mo = 4;
605 e = e+2;
606 } else if (c == 'u') {
607 mo = 8;
608 e = e+2;
609 } else
610 goto out_err;
611 } else if (c == 'J') {
612 c = *(e++);
613 if (c == 'a') {
614 mo = 1;
615 e = e+2;
616 } else if (c == 'u') {
617 c = *(e++);
618 if (c == 'n') {
619 mo = 6;
620 e = e+1;
621 } else if (c == 'l') {
622 mo = 7;
623 e++;
624 }
625 } else
626 goto out_err;
627 } else if (c == 'M') {
628 e++;
629 c = *(e++);
630 if (c == 'r') {
631 mo = 3;
632 e = e+1;
633 } else if (c == 'y') {
634 mo = 5;
635 e = e+1;
636 } else
637 goto out_err;
638 } else
639 goto out_err;
640
641 /* year + ':' */
642 while (1) {
643 c = *(e++) - '0';
644 if (c > 9)
645 break;
646 y = y * 10 + c;
647 if (c == (unsigned char)(0 - '0'))
648 goto out_err;
649 }
650
651 /* hour + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200652 while (1) {
653 c = *(e++) - '0';
654 if (c > 9)
655 break;
656 h = h * 10 + c;
657 }
658 if (c == (unsigned char)(0 - '0'))
659 goto out_err;
660
661 /* minute + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200662 while (1) {
663 c = *(e++) - '0';
664 if (c > 9)
665 break;
666 m = m * 10 + c;
667 }
668 if (c == (unsigned char)(0 - '0'))
669 goto out_err;
670
671 /* second + '.' or ']' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200672 while (1) {
673 c = *(e++) - '0';
674 if (c > 9)
675 break;
676 s = s * 10 + c;
677 }
678
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200679 if (likely(timeinfo)) {
Willy Tarreau03ca6052020-12-21 08:40:04 +0100680 if ((unsigned)timeinfo->tm_min == m &&
681 (unsigned)timeinfo->tm_hour == h &&
682 (unsigned)timeinfo->tm_mday == d &&
683 (unsigned)timeinfo->tm_mon == mo - 1 &&
684 (unsigned)timeinfo->tm_year == y - 1900)
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200685 return last_res + s;
686 }
687 else {
688 time(&rawtime);
689 timeinfo = localtime(&rawtime);
690 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200691
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200692 timeinfo->tm_sec = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200693 timeinfo->tm_min = m;
694 timeinfo->tm_hour = h;
695 timeinfo->tm_mday = d;
696 timeinfo->tm_mon = mo - 1;
697 timeinfo->tm_year = y - 1900;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200698 last_res = mktime(timeinfo);
Olivier Burgarde97b9042014-05-22 16:44:59 +0200699
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200700 return last_res + s;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200701 out_err:
702 return -1;
703}
704
Willy Tarreau72c28532009-01-22 18:56:50 +0100705void truncated_line(int linenum, const char *line)
706{
707 if (!(filter & FILT_QUIET))
708 fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
709}
710
711int main(int argc, char **argv)
712{
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600713 const char *b, *p, *time_field, *accept_field, *source_field;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200714 const char *filter_term_code_name = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100715 const char *output_file = NULL;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600716 int f, last;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200717 struct timer *t = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100718 struct eb32_node *n;
Willy Tarreauabe45b62010-10-28 20:33:46 +0200719 struct url_stat *ustat = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100720 int val, test;
Willy Tarreauc8746532014-05-28 23:05:07 +0200721 unsigned int uval;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100722 unsigned int filter_acc_delay = 0, filter_acc_count = 0;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200723 int filter_time_resp = 0;
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200724 int filt_http_status_low = 0, filt_http_status_high = 0;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100725 unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
Tim Duesterhus66255f72021-10-28 17:24:02 +0200726 unsigned int filt2_capture_block = 0, filt2_capture_field = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100727 int skip_fields = 1;
728
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200729 void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
730
Willy Tarreau72c28532009-01-22 18:56:50 +0100731 argc--; argv++;
732 while (argc > 0) {
733 if (*argv[0] != '-')
734 break;
735
736 if (strcmp(argv[0], "-ad") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200737 if (argc < 2) die("missing option for -ad\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100738 argc--; argv++;
739 filter |= FILT_ACC_DELAY;
740 filter_acc_delay = atol(*argv);
741 }
742 else if (strcmp(argv[0], "-ac") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200743 if (argc < 2) die("missing option for -ac\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100744 argc--; argv++;
745 filter |= FILT_ACC_COUNT;
746 filter_acc_count = atol(*argv);
747 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200748 else if (strcmp(argv[0], "-rt") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200749 if (argc < 2) die("missing option for -rt\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200750 argc--; argv++;
751 filter |= FILT_TIME_RESP;
752 filter_time_resp = atol(*argv);
753 }
754 else if (strcmp(argv[0], "-RT") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200755 if (argc < 2) die("missing option for -RT\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200756 argc--; argv++;
757 filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP;
758 filter_time_resp = atol(*argv);
759 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100760 else if (strcmp(argv[0], "-s") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200761 if (argc < 2) die("missing option for -s\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100762 argc--; argv++;
763 skip_fields = atol(*argv);
764 }
Willy Tarreau667c9052012-10-10 16:49:28 +0200765 else if (strcmp(argv[0], "-m") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200766 if (argc < 2) die("missing option for -m\n");
Willy Tarreau667c9052012-10-10 16:49:28 +0200767 argc--; argv++;
768 lines_max = atol(*argv);
769 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100770 else if (strcmp(argv[0], "-e") == 0)
771 filter |= FILT_ERRORS_ONLY;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200772 else if (strcmp(argv[0], "-E") == 0)
773 filter |= FILT_ERRORS_ONLY | FILT_INVERT_ERRORS;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200774 else if (strcmp(argv[0], "-H") == 0)
775 filter |= FILT_HTTP_ONLY;
Willy Tarreau08911ff2011-10-13 13:28:36 +0200776 else if (strcmp(argv[0], "-Q") == 0)
777 filter |= FILT_QUEUE_ONLY;
778 else if (strcmp(argv[0], "-QS") == 0)
779 filter |= FILT_QUEUE_SRV_ONLY;
Willy Tarreau72c28532009-01-22 18:56:50 +0100780 else if (strcmp(argv[0], "-c") == 0)
781 filter |= FILT_COUNT_ONLY;
782 else if (strcmp(argv[0], "-q") == 0)
783 filter |= FILT_QUIET;
784 else if (strcmp(argv[0], "-v") == 0)
785 filter_invert = !filter_invert;
786 else if (strcmp(argv[0], "-gt") == 0)
787 filter |= FILT_GRAPH_TIMERS;
Willy Tarreau214c2032009-02-20 11:02:32 +0100788 else if (strcmp(argv[0], "-pct") == 0)
789 filter |= FILT_PERCENTILE;
Willy Tarreau0f423a72010-05-03 10:50:54 +0200790 else if (strcmp(argv[0], "-st") == 0)
791 filter |= FILT_COUNT_STATUS;
Willy Tarreaud2201062010-05-27 18:17:30 +0200792 else if (strcmp(argv[0], "-srv") == 0)
793 filter |= FILT_COUNT_SRV_STATUS;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200794 else if (strcmp(argv[0], "-cc") == 0)
795 filter |= FILT_COUNT_COOK_CODES;
Willy Tarreaud8fc1102010-09-12 17:56:16 +0200796 else if (strcmp(argv[0], "-tc") == 0)
797 filter |= FILT_COUNT_TERM_CODES;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200798 else if (strcmp(argv[0], "-tcn") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200799 if (argc < 2) die("missing option for -tcn\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200800 argc--; argv++;
801 filter |= FILT_TERM_CODE_NAME;
802 filter_term_code_name = *argv;
803 }
804 else if (strcmp(argv[0], "-TCN") == 0) {
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200805 if (argc < 2) die("missing option for -TCN\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200806 argc--; argv++;
807 filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME;
808 filter_term_code_name = *argv;
809 }
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200810 else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) {
811 char *sep, *str;
812
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200813 if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n");
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200814 filter |= FILT_HTTP_STATUS;
815 if (argv[0][1] == 'H')
816 filter |= FILT_INVERT_HTTP_STATUS;
817
818 argc--; argv++;
819 str = *argv;
820 sep = strchr(str, ':'); /* [min]:[max] */
821 if (!sep)
822 sep = str; /* make max point to min */
823 else
824 *sep++ = 0;
825 filt_http_status_low = *str ? atol(str) : 0;
826 filt_http_status_high = *sep ? atol(sep) : 65535;
827 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200828 else if (strcmp(argv[0], "-time") == 0) {
829 char *sep, *str;
830
Tim Duesterhusa02c7b82021-10-28 17:06:23 +0200831 if (argc < 2) die("missing option for -time ([min]:[max])\n");
Olivier Burgarde97b9042014-05-22 16:44:59 +0200832 filter2 |= FILT2_TIMESTAMP;
833
834 argc--; argv++;
835 str = *argv;
836 sep = strchr(str, ':'); /* [min]:[max] */
837 filt2_timestamp_low = *str ? atol(str) : 0;
838 if (!sep)
839 filt2_timestamp_high = 0xFFFFFFFF;
840 else
841 filt2_timestamp_high = atol(++sep);
842 }
Willy Tarreauabe45b62010-10-28 20:33:46 +0200843 else if (strcmp(argv[0], "-u") == 0)
844 filter |= FILT_COUNT_URL_ONLY;
845 else if (strcmp(argv[0], "-uc") == 0)
846 filter |= FILT_COUNT_URL_COUNT;
847 else if (strcmp(argv[0], "-ue") == 0)
848 filter |= FILT_COUNT_URL_ERR;
849 else if (strcmp(argv[0], "-ua") == 0)
850 filter |= FILT_COUNT_URL_TAVG;
851 else if (strcmp(argv[0], "-ut") == 0)
852 filter |= FILT_COUNT_URL_TTOT;
853 else if (strcmp(argv[0], "-uao") == 0)
854 filter |= FILT_COUNT_URL_TAVGO;
855 else if (strcmp(argv[0], "-uto") == 0)
856 filter |= FILT_COUNT_URL_TTOTO;
Baptiste61aaad02012-09-08 23:10:03 +0200857 else if (strcmp(argv[0], "-uba") == 0)
858 filter |= FILT_COUNT_URL_BAVG;
859 else if (strcmp(argv[0], "-ubt") == 0)
860 filter |= FILT_COUNT_URL_BTOT;
Tim Duesterhus385338b2021-10-28 16:36:03 +0200861 else if (strcmp(argv[0], "-query") == 0)
Tim Duesterhus24b8d692021-10-18 12:12:02 +0200862 filter2 |= FILT2_PRESERVE_QUERY;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100863 else if (strcmp(argv[0], "-ic") == 0)
864 filter |= FILT_COUNT_IP_COUNT;
Tim Duesterhus66255f72021-10-28 17:24:02 +0200865 else if (strcmp(argv[0], "-hdr") == 0) {
866 char *sep, *str;
867
868 if (argc < 2) die("missing option for -hdr (<block>:<field>)\n");
869 filter2 |= FILT2_EXTRACT_CAPTURE;
870
871 argc--; argv++;
872 str = *argv;
873 sep = strchr(str, ':');
874 if (!sep)
875 die("missing colon in -hdr (<block>:<field>)\n");
876 else
877 *sep++ = 0;
878
879 filt2_capture_block = *str ? atol(str) : 1;
880 filt2_capture_field = *sep ? atol(sep) : 1;
881
882 if (filt2_capture_block < 1 || filt2_capture_field < 1)
883 die("block and field must be at least 1 for -hdr (<block>:<field>)\n");
884 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100885 else if (strcmp(argv[0], "-o") == 0) {
886 if (output_file)
887 die("Fatal: output file name already specified.\n");
888 if (argc < 2)
889 die("Fatal: missing output file name.\n");
890 output_file = argv[1];
891 }
Willy Tarreau615674c2012-01-23 08:15:51 +0100892 else if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0)
893 help();
Willy Tarreau72c28532009-01-22 18:56:50 +0100894 argc--;
895 argv++;
896 }
897
Tim Duesterhus66255f72021-10-28 17:24:02 +0200898 if (!filter && !filter2)
Willy Tarreau72c28532009-01-22 18:56:50 +0100899 die("No action specified.\n");
900
901 if (filter & FILT_ACC_COUNT && !filter_acc_count)
902 filter_acc_count=1;
903
904 if (filter & FILT_ACC_DELAY && !filter_acc_delay)
905 filter_acc_delay = 1;
906
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200907
908 /* by default, all lines are printed */
909 line_filter = filter_output_line;
910 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))
911 line_filter = filter_accept_holes;
912 else if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))
913 line_filter = filter_graphs;
914 else if (filter & FILT_COUNT_STATUS)
915 line_filter = filter_count_status;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200916 else if (filter & FILT_COUNT_COOK_CODES)
917 line_filter = filter_count_cook_codes;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200918 else if (filter & FILT_COUNT_TERM_CODES)
919 line_filter = filter_count_term_codes;
920 else if (filter & FILT_COUNT_SRV_STATUS)
921 line_filter = filter_count_srv_status;
922 else if (filter & FILT_COUNT_URL_ANY)
923 line_filter = filter_count_url;
924 else if (filter & FILT_COUNT_ONLY)
925 line_filter = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100926
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200927#if defined(POSIX_FADV_SEQUENTIAL)
928 /* around 20% performance improvement is observed on Linux with this
Joseph Herlant42172bd2018-11-09 18:02:35 -0800929 * on cold-cache. Surprisingly, WILLNEED is less performant. Don't
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200930 * use NOREUSE as it flushes the cache and prevents easy data
931 * manipulation on logs!
932 */
933 posix_fadvise(0, 0, 0, POSIX_FADV_SEQUENTIAL);
934#endif
935
Willy Tarreaua1629a52012-11-13 20:48:15 +0100936 if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200937 !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) &&
938 !(filter2 & (FILT2_TIMESTAMP))) {
Willy Tarreaua1629a52012-11-13 20:48:15 +0100939 /* read the whole file at once first, ignore it if inverted output */
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100940 if (!filter_invert)
Willy Tarreaua1629a52012-11-13 20:48:15 +0100941 while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL)
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100942 lines_out++;
943
944 goto skip_filters;
945 }
946
Willy Tarreau214c2032009-02-20 11:02:32 +0100947 while ((line = fgets2(stdin)) != NULL) {
Willy Tarreau72c28532009-01-22 18:56:50 +0100948 linenum++;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200949 time_field = NULL; accept_field = NULL;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100950 source_field = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100951
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200952 test = 1;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200953
954 /* for any line we process, we first ensure that there is a field
955 * looking like the accept date field (beginning with a '[').
956 */
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100957 if (filter & FILT_COUNT_IP_COUNT) {
958 /* we need the IP first */
959 source_field = field_start(line, SOURCE_FIELD + skip_fields);
960 accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1);
961 }
962 else
963 accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
964
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200965 if (unlikely(*accept_field != '[')) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200966 parse_err++;
967 continue;
968 }
969
970 /* the day of month field is begin 01 and 31 */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200971 if (accept_field[1] < '0' || accept_field[1] > '3') {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200972 parse_err++;
973 continue;
974 }
975
Olivier Burgarde97b9042014-05-22 16:44:59 +0200976 if (filter2 & FILT2_TIMESTAMP) {
977 uval = convert_date_to_timestamp(accept_field);
978 test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ;
979 }
980
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200981 if (filter & FILT_HTTP_ONLY) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200982 /* only report lines with at least 4 timers */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200983 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200984 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200985 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200986 truncated_line(linenum, line);
987 continue;
988 }
Willy Tarreau70c428f2011-07-10 17:27:40 +0200989 }
990
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600991 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +0200992 /* we have field TIME_FIELD in [time_field]..[e-1] */
993 p = time_field;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200994 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +0200995 while (!SEP(*p)) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200996 if (++f == 4)
997 break;
998 SKIP_CHAR(p, '/');
999 }
1000 test &= (f >= 4);
1001 }
1002
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001003 if (filter & FILT_TIME_RESP) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001004 int tps;
1005
1006 /* only report lines with response times larger than filter_time_resp */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001007 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +02001008 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001009 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +02001010 truncated_line(linenum, line);
1011 continue;
1012 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001013 }
1014
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001015 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +02001016 /* we have field TIME_FIELD in [time_field]..[e-1], let's check only the response time */
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001017
Willy Tarreau758a6ea2011-07-10 18:53:44 +02001018 p = time_field;
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001019 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +02001020 while (!SEP(*p)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001021 tps = str2ic(p);
1022 if (tps < 0) {
1023 tps = -1;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001024 }
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001025 if (++f == 4)
1026 break;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001027 SKIP_CHAR(p, '/');
1028 }
1029
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001030 if (unlikely(f < 4)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001031 parse_err++;
1032 continue;
1033 }
1034
1035 test &= (tps >= filter_time_resp) ^ !!(filter & FILT_INVERT_TIME_RESP);
1036 }
1037
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001038 if (filter & (FILT_ERRORS_ONLY | FILT_HTTP_STATUS)) {
1039 /* Check both error codes (-1, 5xx) and status code ranges */
Willy Tarreau26deaf52011-07-10 19:47:48 +02001040 if (time_field)
1041 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1042 else
1043 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1044
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001045 if (unlikely(!*b)) {
Willy Tarreau72c28532009-01-22 18:56:50 +01001046 truncated_line(linenum, line);
1047 continue;
1048 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001049
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001050 val = str2ic(b);
1051 if (filter & FILT_ERRORS_ONLY)
1052 test &= (val < 0 || (val >= 500 && val <= 599)) ^ !!(filter & FILT_INVERT_ERRORS);
1053
1054 if (filter & FILT_HTTP_STATUS)
1055 test &= (val >= filt_http_status_low && val <= filt_http_status_high) ^ !!(filter & FILT_INVERT_HTTP_STATUS);
Willy Tarreau72c28532009-01-22 18:56:50 +01001056 }
1057
Willy Tarreau08911ff2011-10-13 13:28:36 +02001058 if (filter & (FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY)) {
1059 /* Check if the server's queue is non-nul */
1060 if (time_field)
1061 b = field_start(time_field, QUEUE_LEN_FIELD - TIME_FIELD + 1);
1062 else
1063 b = field_start(accept_field, QUEUE_LEN_FIELD - ACCEPT_FIELD + 1);
1064
1065 if (unlikely(!*b)) {
1066 truncated_line(linenum, line);
1067 continue;
1068 }
1069
1070 if (*b == '0') {
1071 if (filter & FILT_QUEUE_SRV_ONLY) {
1072 test = 0;
1073 }
1074 else {
1075 do {
1076 b++;
1077 if (*b == '/') {
1078 b++;
1079 break;
1080 }
1081 } while (*b);
1082 test &= ((unsigned char)(*b - '1') < 9);
1083 }
1084 }
1085 }
1086
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +02001087 if (filter & FILT_TERM_CODE_NAME) {
1088 /* only report corresponding termination code name */
1089 if (time_field)
1090 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1091 else
1092 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1093
1094 if (unlikely(!*b)) {
1095 truncated_line(linenum, line);
1096 continue;
1097 }
1098
1099 test &= (b[0] == filter_term_code_name[0] && b[1] == filter_term_code_name[1]) ^ !!(filter & FILT_INVERT_TERM_CODE_NAME);
1100 }
1101
1102
Willy Tarreau0f423a72010-05-03 10:50:54 +02001103 test ^= filter_invert;
1104 if (!test)
1105 continue;
1106
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001107 /************** here we process inputs *******************/
Willy Tarreau72c28532009-01-22 18:56:50 +01001108
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001109 if (line_filter) {
1110 if (filter & FILT_COUNT_IP_COUNT)
1111 filter_count_ip(source_field, accept_field, time_field, &t);
Tim Duesterhus66255f72021-10-28 17:24:02 +02001112 else if (filter2 & FILT2_EXTRACT_CAPTURE)
1113 filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field);
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001114 else
1115 line_filter(accept_field, time_field, &t);
1116 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001117 else
Willy Tarreaua1629a52012-11-13 20:48:15 +01001118 lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */
1119 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001120 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001121 }
Willy Tarreauabe45b62010-10-28 20:33:46 +02001122
Willy Tarreaue1a908c2012-01-03 09:23:03 +01001123 skip_filters:
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001124 /*****************************************************
1125 * Here we've finished reading all input. Depending on the
1126 * filters, we may still have some analysis to run on the
1127 * collected data and to output data in a new format.
1128 *************************************************** */
Willy Tarreau72c28532009-01-22 18:56:50 +01001129
1130 if (t)
1131 free(t);
1132
1133 if (filter & FILT_COUNT_ONLY) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001134 printf("%d\n", lines_out);
Willy Tarreau72c28532009-01-22 18:56:50 +01001135 exit(0);
1136 }
1137
Willy Tarreau72c28532009-01-22 18:56:50 +01001138 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
1139 /* sort and count all timers. Output will look like this :
1140 * <accept_date> <delta_ms from previous one> <nb entries>
1141 */
1142 n = eb32_first(&timers[0]);
1143
1144 if (n)
1145 last = n->key;
1146 while (n) {
1147 unsigned int d, h, m, s, ms;
1148
1149 t = container_of(n, struct timer, node);
1150 h = n->key;
1151 d = h - last;
1152 last = h;
1153
1154 if (d >= filter_acc_delay && t->count >= filter_acc_count) {
1155 ms = h % 1000; h = h / 1000;
1156 s = h % 60; h = h / 60;
1157 m = h % 60; h = h / 60;
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001158 printf("%02u:%02u:%02u.%03u %d %u %u\n", h, m, s, ms, last, d, t->count);
Willy Tarreau667c9052012-10-10 16:49:28 +02001159 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001160 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001161 break;
Willy Tarreau72c28532009-01-22 18:56:50 +01001162 }
1163 n = eb32_next(n);
1164 }
1165 }
1166 else if (filter & FILT_GRAPH_TIMERS) {
1167 /* sort all timers */
1168 for (f = 0; f < 5; f++) {
1169 struct eb32_node *n;
Willy Tarreau72c28532009-01-22 18:56:50 +01001170
Willy Tarreau72c28532009-01-22 18:56:50 +01001171 n = eb32_first(&timers[f]);
1172 while (n) {
1173 int i;
1174 double d;
Tim Duesterhus785b84b2021-11-04 21:04:24 +01001175 int val;
Willy Tarreau72c28532009-01-22 18:56:50 +01001176
1177 t = container_of(n, struct timer, node);
1178 last = n->key;
1179 val = t->count;
1180
1181 i = (last < 0) ? -last : last;
1182 i = fls_auto(i) - QBITS;
1183
1184 if (i > 0)
1185 d = val / (double)(1 << i);
1186 else
1187 d = val;
1188
Willy Tarreaua1629a52012-11-13 20:48:15 +01001189 if (d > 0.0)
Willy Tarreau72c28532009-01-22 18:56:50 +01001190 printf("%d %d %f\n", f, last, d+1.0);
Willy Tarreau72c28532009-01-22 18:56:50 +01001191
1192 n = eb32_next(n);
1193 }
Willy Tarreau214c2032009-02-20 11:02:32 +01001194 }
1195 }
1196 else if (filter & FILT_PERCENTILE) {
1197 /* report timers by percentile :
1198 * <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
1199 * We don't count errs.
1200 */
1201 struct eb32_node *n[5];
1202 unsigned long cum[5];
1203 double step;
1204
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001205 if (!lines_out)
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001206 goto empty;
1207
Willy Tarreau214c2032009-02-20 11:02:32 +01001208 for (f = 1; f < 5; f++) {
1209 n[f] = eb32_first(&timers[f]);
1210 cum[f] = container_of(n[f], struct timer, node)->count;
1211 }
1212
1213 for (step = 1; step <= 1000;) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001214 unsigned int thres = lines_out * (step / 1000.0);
Willy Tarreau214c2032009-02-20 11:02:32 +01001215
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001216 printf("%3.1f %u ", step/10.0, thres);
Willy Tarreau214c2032009-02-20 11:02:32 +01001217 for (f = 1; f < 5; f++) {
1218 struct eb32_node *next;
1219 while (cum[f] < thres) {
1220 /* need to find other keys */
1221 next = eb32_next(n[f]);
1222 if (!next)
1223 break;
1224 n[f] = next;
1225 cum[f] += container_of(next, struct timer, node)->count;
1226 }
1227
1228 /* value still within $step % of total */
1229 printf("%d ", n[f]->key);
1230 }
1231 putchar('\n');
1232 if (step >= 100 && step < 900)
1233 step += 50; // jump 5% by 5% between those steps.
1234 else if (step >= 20 && step < 980)
1235 step += 10;
1236 else
1237 step += 1;
Willy Tarreau72c28532009-01-22 18:56:50 +01001238 }
1239 }
Willy Tarreau0f423a72010-05-03 10:50:54 +02001240 else if (filter & FILT_COUNT_STATUS) {
1241 /* output all statuses in the form of <status> <occurrences> */
1242 n = eb32_first(&timers[0]);
1243 while (n) {
1244 t = container_of(n, struct timer, node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001245 printf("%d %u\n", n->key, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001246 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001247 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001248 break;
Willy Tarreau0f423a72010-05-03 10:50:54 +02001249 n = eb32_next(n);
1250 }
1251 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001252 else if (filter & FILT_COUNT_SRV_STATUS) {
Willy Tarreaud2201062010-05-27 18:17:30 +02001253 struct ebmb_node *srv_node;
1254 struct srv_st *srv;
1255
1256 printf("#srv_name 1xx 2xx 3xx 4xx 5xx other tot_req req_ok pct_ok avg_ct avg_rt\n");
1257
1258 srv_node = ebmb_first(&timers[0]);
1259 while (srv_node) {
1260 int tot_rq;
1261
1262 srv = container_of(srv_node, struct srv_st, node);
1263
1264 tot_rq = 0;
1265 for (f = 0; f <= 5; f++)
1266 tot_rq += srv->st_cnt[f];
1267
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001268 printf("%s %u %u %u %u %u %u %d %u %.1f %d %d\n",
Willy Tarreaud2201062010-05-27 18:17:30 +02001269 srv_node->key, srv->st_cnt[1], srv->st_cnt[2],
1270 srv->st_cnt[3], srv->st_cnt[4], srv->st_cnt[5], srv->st_cnt[0],
1271 tot_rq,
1272 srv->nb_ok, (double)srv->nb_ok * 100.0 / (tot_rq?tot_rq:1),
1273 (int)(srv->cum_ct / (srv->nb_ct?srv->nb_ct:1)), (int)(srv->cum_rt / (srv->nb_rt?srv->nb_rt:1)));
1274 srv_node = ebmb_next(srv_node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001275 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001276 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001277 break;
Willy Tarreaud2201062010-05-27 18:17:30 +02001278 }
1279 }
Willy Tarreau8a09b662012-10-10 10:26:22 +02001280 else if (filter & (FILT_COUNT_TERM_CODES|FILT_COUNT_COOK_CODES)) {
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001281 /* output all statuses in the form of <code> <occurrences> */
1282 n = eb32_first(&timers[0]);
1283 while (n) {
1284 t = container_of(n, struct timer, node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001285 printf("%c%c %u\n", (n->key >> 8), (n->key) & 255, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001286 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001287 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001288 break;
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001289 n = eb32_next(n);
1290 }
1291 }
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001292 else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) {
Willy Tarreauabe45b62010-10-28 20:33:46 +02001293 struct eb_node *node, *next;
1294
1295 if (!(filter & FILT_COUNT_URL_ONLY)) {
1296 /* we have to sort on another criterion. We'll use timers[1] for the
1297 * destination tree.
1298 */
1299
1300 timers[1] = EB_ROOT; /* reconfigure to accept duplicates */
1301 for (node = eb_first(&timers[0]); node; node = next) {
1302 next = eb_next(node);
1303 eb_delete(node);
1304
1305 ustat = container_of(node, struct url_stat, node.url.node);
1306
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001307 if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT))
Willy Tarreauabe45b62010-10-28 20:33:46 +02001308 ustat->node.val.key = ustat->nb_req;
1309 else if (filter & FILT_COUNT_URL_ERR)
1310 ustat->node.val.key = ustat->nb_err;
1311 else if (filter & FILT_COUNT_URL_TTOT)
1312 ustat->node.val.key = ustat->total_time;
1313 else if (filter & FILT_COUNT_URL_TAVG)
1314 ustat->node.val.key = ustat->nb_req ? ustat->total_time / ustat->nb_req : 0;
1315 else if (filter & FILT_COUNT_URL_TTOTO)
1316 ustat->node.val.key = ustat->total_time_ok;
1317 else if (filter & FILT_COUNT_URL_TAVGO)
1318 ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0;
Baptiste61aaad02012-09-08 23:10:03 +02001319 else if (filter & FILT_COUNT_URL_BAVG)
1320 ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0;
1321 else if (filter & FILT_COUNT_URL_BTOT)
1322 ustat->node.val.key = ustat->total_bytes_sent;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001323 else
1324 ustat->node.val.key = 0;
1325
1326 eb64_insert(&timers[1], &ustat->node.val);
1327 }
1328 /* switch trees */
1329 timers[0] = timers[1];
1330 }
1331
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001332 if (FILT_COUNT_IP_COUNT)
1333 printf("#req err ttot tavg oktot okavg bavg btot src\n");
1334 else
1335 printf("#req err ttot tavg oktot okavg bavg btot url\n");
Willy Tarreauabe45b62010-10-28 20:33:46 +02001336
1337 /* scan the tree in its reverse sorting order */
1338 node = eb_last(&timers[0]);
1339 while (node) {
1340 ustat = container_of(node, struct url_stat, node.url.node);
Willy Tarreaue0b3a8b2022-04-12 08:37:22 +02001341 printf("%u %u %llu %llu %llu %llu %llu %llu %s\n",
Willy Tarreauabe45b62010-10-28 20:33:46 +02001342 ustat->nb_req,
1343 ustat->nb_err,
1344 ustat->total_time,
1345 ustat->nb_req ? ustat->total_time / ustat->nb_req : 0,
1346 ustat->total_time_ok,
1347 (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0,
Baptiste61aaad02012-09-08 23:10:03 +02001348 ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0,
1349 ustat->total_bytes_sent,
Willy Tarreauabe45b62010-10-28 20:33:46 +02001350 ustat->url);
1351
1352 node = eb_prev(node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001353 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001354 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001355 break;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001356 }
1357 }
Willy Tarreaud2201062010-05-27 18:17:30 +02001358
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001359 empty:
Willy Tarreau72c28532009-01-22 18:56:50 +01001360 if (!(filter & FILT_QUIET))
1361 fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001362 linenum, lines_out, parse_err);
Willy Tarreau72c28532009-01-22 18:56:50 +01001363 exit(0);
1364}
1365
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001366void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr)
1367{
1368 puts(line);
1369 lines_out++;
1370}
1371
Tim Duesterhus66255f72021-10-28 17:24:02 +02001372void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field)
1373{
1374 const char *e, *f;
1375
1376 if (time_field)
1377 e = field_start(time_field, METH_FIELD - TIME_FIELD + 1);
1378 else
1379 e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1);
1380
1381 while (block-- > 0) {
1382 /* Scan until the start of a capture block ('{') until the URL ('"'). */
1383 while ((*e != '"' && *e != '{') && *e) {
1384 /* Note: some syslog servers escape quotes ! */
1385 if (*e == '\\' && e[1] == '"')
1386 break;
1387
1388 e = field_start(e, 2);
1389 }
1390
1391 if (unlikely(!*e)) {
1392 truncated_line(linenum, line);
1393 return;
1394 }
1395
1396 /* We reached the URL, no more captures will follow. */
1397 if (*e != '{') {
1398 puts("");
1399 lines_out++;
1400 return;
1401 }
1402
1403 /* e points the the opening brace of the capture block. */
1404
1405 e++;
1406 }
1407
1408 /* We are in the first field of the selected capture block. */
1409
1410 while (--field > 0) {
1411 while ((*e != '|' && *e != '}') && *e)
1412 e++;
1413
1414 if (unlikely(!*e)) {
1415 truncated_line(linenum, line);
1416 return;
1417 }
1418
1419 if (*e != '|') {
1420 puts("");
1421 lines_out++;
1422 return;
1423 }
1424
1425 /* e points to the pipe. */
1426
1427 e++;
1428 }
1429
1430 f = e;
1431
1432 while ((*f != '|' && *f != '}') && *f)
1433 f++;
1434
1435 if (unlikely(!*f)) {
1436 truncated_line(linenum, line);
1437 return;
1438 }
1439
1440 fwrite(e, f - e, 1, stdout);
1441 putchar('\n');
1442 lines_out++;
1443}
1444
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001445void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
1446{
1447 struct timer *t2;
1448 int val;
1449
1450 val = convert_date(accept_field);
1451 if (unlikely(val < 0)) {
1452 truncated_line(linenum, line);
1453 return;
1454 }
1455
1456 t2 = insert_value(&timers[0], tptr, val);
1457 t2->count++;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001458 return;
1459}
1460
1461void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr)
1462{
1463 struct timer *t2;
1464 const char *b;
1465 int val;
1466
1467 if (time_field)
1468 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1469 else
1470 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1471
1472 if (unlikely(!*b)) {
1473 truncated_line(linenum, line);
1474 return;
1475 }
1476
1477 val = str2ic(b);
1478
1479 t2 = insert_value(&timers[0], tptr, val);
1480 t2->count++;
1481}
1482
Willy Tarreau8a09b662012-10-10 10:26:22 +02001483void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1484{
1485 struct timer *t2;
1486 const char *b;
1487 int val;
1488
1489 if (time_field)
1490 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1491 else
1492 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1493
1494 if (unlikely(!*b)) {
1495 truncated_line(linenum, line);
1496 return;
1497 }
1498
1499 val = 256 * b[2] + b[3];
1500
1501 t2 = insert_value(&timers[0], tptr, val);
1502 t2->count++;
1503}
1504
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001505void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1506{
1507 struct timer *t2;
1508 const char *b;
1509 int val;
1510
1511 if (time_field)
1512 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1513 else
1514 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1515
1516 if (unlikely(!*b)) {
1517 truncated_line(linenum, line);
1518 return;
1519 }
1520
1521 val = 256 * b[0] + b[1];
1522
1523 t2 = insert_value(&timers[0], tptr, val);
1524 t2->count++;
1525}
1526
1527void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr)
1528{
1529 const char *b, *e, *p;
1530 int f, err, array[5];
1531 struct ebmb_node *srv_node;
1532 struct srv_st *srv;
1533 int val;
1534
1535 /* the server field is before the status field, so let's
1536 * parse them in the proper order.
1537 */
1538 b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1);
1539 if (unlikely(!*b)) {
1540 truncated_line(linenum, line);
1541 return;
1542 }
1543
1544 e = field_stop(b + 1); /* we have the server name in [b]..[e-1] */
1545
1546 /* the chance that a server name already exists is extremely high,
1547 * so let's perform a normal lookup first.
1548 */
1549 srv_node = ebst_lookup_len(&timers[0], b, e - b);
1550 srv = container_of(srv_node, struct srv_st, node);
1551
1552 if (!srv_node) {
1553 /* server not yet in the tree, let's create it */
1554 srv = (void *)calloc(1, sizeof(struct srv_st) + e - b + 1);
1555 srv_node = &srv->node;
1556 memcpy(&srv_node->key, b, e - b);
1557 srv_node->key[e - b] = '\0';
1558 ebst_insert(&timers[0], srv_node);
1559 }
1560
1561 /* let's collect the connect and response times */
1562 if (!time_field) {
1563 time_field = field_start(e, TIME_FIELD - SERVER_FIELD);
1564 if (unlikely(!*time_field)) {
1565 truncated_line(linenum, line);
1566 return;
1567 }
1568 }
1569
1570 e = field_stop(time_field + 1);
1571 /* we have field TIME_FIELD in [time_field]..[e-1] */
1572
1573 p = time_field;
1574 err = 0;
1575 f = 0;
1576 while (!SEP(*p)) {
1577 array[f] = str2ic(p);
1578 if (array[f] < 0) {
1579 array[f] = -1;
1580 err = 1;
1581 }
1582 if (++f == 5)
1583 break;
1584 SKIP_CHAR(p, '/');
1585 }
1586
1587 if (unlikely(f < 5)){
1588 parse_err++;
1589 return;
1590 }
1591
1592 /* OK we have our timers in array[2,3] */
1593 if (!err)
1594 srv->nb_ok++;
1595
1596 if (array[2] >= 0) {
1597 srv->cum_ct += array[2];
1598 srv->nb_ct++;
1599 }
1600
1601 if (array[3] >= 0) {
1602 srv->cum_rt += array[3];
1603 srv->nb_rt++;
1604 }
1605
1606 /* we're interested in the 5 HTTP status classes (1xx ... 5xx), and
1607 * the invalid ones which will be reported as 0.
1608 */
1609 b = field_start(e, STATUS_FIELD - TIME_FIELD);
1610 if (unlikely(!*b)) {
1611 truncated_line(linenum, line);
1612 return;
1613 }
1614
1615 val = 0;
1616 if (*b >= '1' && *b <= '5')
1617 val = *b - '0';
1618
1619 srv->st_cnt[val]++;
1620}
1621
1622void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr)
1623{
1624 struct url_stat *ustat = NULL;
1625 struct ebpt_node *ebpt_old;
1626 const char *b, *e;
1627 int f, err, array[5];
Baptiste61aaad02012-09-08 23:10:03 +02001628 int val;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001629
1630 /* let's collect the response time */
1631 if (!time_field) {
1632 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1633 if (unlikely(!*time_field)) {
1634 truncated_line(linenum, line);
1635 return;
1636 }
1637 }
1638
1639 /* we have the field TIME_FIELD starting at <time_field>. We'll
1640 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1641 */
1642 e = time_field; err = 0; f = 0;
1643 while (!SEP(*e)) {
1644 array[f] = str2ic(e);
1645 if (array[f] < 0) {
1646 array[f] = -1;
1647 err = 1;
1648 }
1649 if (++f == 5)
1650 break;
1651 SKIP_CHAR(e, '/');
1652 }
1653 if (f < 5) {
1654 parse_err++;
1655 return;
1656 }
1657
1658 /* OK we have our timers in array[3], and err is >0 if at
1659 * least one -1 was seen. <e> points to the first char of
1660 * the last timer. Let's prepare a new node with that.
1661 */
1662 if (unlikely(!ustat))
1663 ustat = calloc(1, sizeof(*ustat));
1664
1665 ustat->nb_err = err;
1666 ustat->nb_req = 1;
1667
1668 /* use array[4] = total time in case of error */
1669 ustat->total_time = (array[3] >= 0) ? array[3] : array[4];
1670 ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0;
1671
Baptiste61aaad02012-09-08 23:10:03 +02001672 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1673 val = str2ic(e);
1674 ustat->total_bytes_sent = val;
1675
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001676 /* the line may be truncated because of a bad request or anything like this,
1677 * without a method. Also, if it does not begin with an quote, let's skip to
1678 * the next field because it's a capture. Let's fall back to the "method" itself
1679 * if there's nothing else.
1680 */
Baptiste61aaad02012-09-08 23:10:03 +02001681 e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001682 while (*e != '"' && *e) {
1683 /* Note: some syslog servers escape quotes ! */
1684 if (*e == '\\' && e[1] == '"')
1685 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001686 e = field_start(e, 2);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001687 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001688
1689 if (unlikely(!*e)) {
1690 truncated_line(linenum, line);
Ilya Shipitsin4473a2e2017-09-22 22:33:16 +05001691 free(ustat);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001692 return;
1693 }
1694
1695 b = field_start(e, URL_FIELD - METH_FIELD + 1); // avg 40 ns per line
1696 if (!*b)
1697 b = e;
1698
1699 /* stop at end of field or first ';' or '?', takes avg 64 ns per line */
1700 e = b;
1701 do {
Tim Duesterhus24b8d692021-10-18 12:12:02 +02001702 if (*e == ' '||
1703 (!(filter2 & FILT2_PRESERVE_QUERY) && (*e == '?' || *e == ';'))) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001704 *(char *)e = 0;
1705 break;
1706 }
1707 e++;
1708 } while (*e);
1709
1710 /* now instead of copying the URL for a simple lookup, we'll link
1711 * to it from the node we're trying to insert. If it returns a
1712 * different value, it was already there. Otherwise we just have
1713 * to dynamically realloc an entry using strdup().
1714 */
1715 ustat->node.url.key = (char *)b;
1716 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1717
1718 if (ebpt_old != &ustat->node.url) {
1719 struct url_stat *ustat_old;
1720 /* node was already there, let's update previous one */
1721 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1722 ustat_old->nb_req ++;
1723 ustat_old->nb_err += ustat->nb_err;
1724 ustat_old->total_time += ustat->total_time;
1725 ustat_old->total_time_ok += ustat->total_time_ok;
Baptiste61aaad02012-09-08 23:10:03 +02001726 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001727 } else {
1728 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1729 ustat = NULL; /* node was used */
1730 }
1731}
1732
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001733void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr)
1734{
1735 struct url_stat *ustat = NULL;
1736 struct ebpt_node *ebpt_old;
1737 const char *b, *e;
1738 int f, err, array[5];
1739 int val;
1740
1741 /* let's collect the response time */
1742 if (!time_field) {
1743 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1744 if (unlikely(!*time_field)) {
1745 truncated_line(linenum, line);
1746 return;
1747 }
1748 }
1749
1750 /* we have the field TIME_FIELD starting at <time_field>. We'll
1751 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1752 */
1753 e = time_field; err = 0; f = 0;
1754 while (!SEP(*e)) {
1755 if (f == 0 || f == 4) {
1756 array[f] = str2ic(e);
1757 if (array[f] < 0) {
1758 array[f] = -1;
1759 err = 1;
1760 }
1761 }
1762 if (++f == 5)
1763 break;
1764 SKIP_CHAR(e, '/');
1765 }
1766 if (f < 5) {
1767 parse_err++;
1768 return;
1769 }
1770
1771 /* OK we have our timers in array[0], and err is >0 if at
1772 * least one -1 was seen. <e> points to the first char of
1773 * the last timer. Let's prepare a new node with that.
1774 */
1775 if (unlikely(!ustat))
1776 ustat = calloc(1, sizeof(*ustat));
1777
1778 ustat->nb_err = err;
1779 ustat->nb_req = 1;
1780
1781 /* use array[4] = total time in case of error */
1782 ustat->total_time = (array[0] >= 0) ? array[0] : array[4];
1783 ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0;
1784
1785 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1786 val = str2ic(e);
1787 ustat->total_bytes_sent = val;
1788
1789 /* the source might be IPv4 or IPv6, so we always strip the port by
1790 * removing the last colon.
1791 */
1792 b = source_field;
1793 e = field_stop(b + 1);
1794 while (e > b && e[-1] != ':')
1795 e--;
1796 *(char *)(e - 1) = '\0';
1797
1798 /* now instead of copying the src for a simple lookup, we'll link
1799 * to it from the node we're trying to insert. If it returns a
1800 * different value, it was already there. Otherwise we just have
1801 * to dynamically realloc an entry using strdup(). We're using the
1802 * <url> field of the node to store the source address.
1803 */
1804 ustat->node.url.key = (char *)b;
1805 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1806
1807 if (ebpt_old != &ustat->node.url) {
1808 struct url_stat *ustat_old;
1809 /* node was already there, let's update previous one */
1810 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1811 ustat_old->nb_req ++;
1812 ustat_old->nb_err += ustat->nb_err;
1813 ustat_old->total_time += ustat->total_time;
1814 ustat_old->total_time_ok += ustat->total_time_ok;
1815 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
1816 } else {
1817 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1818 ustat = NULL; /* node was used */
1819 }
1820}
1821
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001822void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr)
1823{
1824 struct timer *t2;
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001825 const char *p;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001826 int f, err, array[5];
1827
1828 if (!time_field) {
1829 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
1830 if (unlikely(!*time_field)) {
1831 truncated_line(linenum, line);
1832 return;
1833 }
1834 }
1835
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001836 field_stop(time_field + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001837 /* we have field TIME_FIELD in [time_field]..[e-1] */
1838
1839 p = time_field;
1840 err = 0;
1841 f = 0;
1842 while (!SEP(*p)) {
1843 array[f] = str2ic(p);
1844 if (array[f] < 0) {
1845 array[f] = -1;
1846 err = 1;
1847 }
1848 if (++f == 5)
1849 break;
1850 SKIP_CHAR(p, '/');
1851 }
1852
1853 if (unlikely(f < 5)) {
1854 parse_err++;
1855 return;
1856 }
1857
1858 /* if we find at least one negative time, we count one error
1859 * with a time equal to the total session time. This will
1860 * emphasize quantum timing effects associated to known
1861 * timeouts. Note that on some buggy machines, it is possible
1862 * that the total time is negative, hence the reason to reset
1863 * it.
1864 */
1865
1866 if (filter & FILT_GRAPH_TIMERS) {
1867 if (err) {
1868 if (array[4] < 0)
1869 array[4] = -1;
1870 t2 = insert_timer(&timers[0], tptr, array[4]); // total time
1871 t2->count++;
1872 } else {
1873 int v;
1874
1875 t2 = insert_timer(&timers[1], tptr, array[0]); t2->count++; // req
1876 t2 = insert_timer(&timers[2], tptr, array[2]); t2->count++; // conn
1877 t2 = insert_timer(&timers[3], tptr, array[3]); t2->count++; // resp
1878
1879 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1880 if (v < 0 && !(filter & FILT_QUIET))
1881 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1882 line, array[0], array[1], array[2], array[3], array[4], v);
1883 t2 = insert_timer(&timers[4], tptr, v); t2->count++;
1884 lines_out++;
1885 }
1886 } else { /* percentile */
1887 if (err) {
1888 if (array[4] < 0)
1889 array[4] = -1;
1890 t2 = insert_value(&timers[0], tptr, array[4]); // total time
1891 t2->count++;
1892 } else {
1893 int v;
1894
1895 t2 = insert_value(&timers[1], tptr, array[0]); t2->count++; // req
1896 t2 = insert_value(&timers[2], tptr, array[2]); t2->count++; // conn
1897 t2 = insert_value(&timers[3], tptr, array[3]); t2->count++; // resp
1898
1899 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1900 if (v < 0 && !(filter & FILT_QUIET))
1901 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1902 line, array[0], array[1], array[2], array[3], array[4], v);
1903 t2 = insert_value(&timers[4], tptr, v); t2->count++;
1904 lines_out++;
1905 }
1906 }
1907}
1908
1909
Willy Tarreau72c28532009-01-22 18:56:50 +01001910/*
1911 * Local variables:
1912 * c-indent-level: 8
1913 * c-basic-offset: 8
1914 * End:
1915 */