blob: 900cf5d46c6a675128f1ee9a030076d173a7ca70 [file] [log] [blame]
Willy Tarreau72c28532009-01-22 18:56:50 +01001/*
Willy Tarreaud8fc1102010-09-12 17:56:16 +02002 * haproxy log statistics reporter
Willy Tarreau72c28532009-01-22 18:56:50 +01003 *
Willy Tarreau8a09b662012-10-10 10:26:22 +02004 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
Willy Tarreau72c28532009-01-22 18:56:50 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau72c28532009-01-22 18:56:50 +010013#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <syslog.h>
18#include <string.h>
19#include <unistd.h>
20#include <ctype.h>
Olivier Burgarde97b9042014-05-22 16:44:59 +020021#include <time.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010022
Willy Tarreau8d2b7772020-05-27 10:58:19 +020023#include <import/eb32tree.h>
24#include <import/eb64tree.h>
25#include <import/ebistree.h>
26#include <import/ebsttree.h>
Willy Tarreau72c28532009-01-22 18:56:50 +010027
Willy Tarreaud2201062010-05-27 18:17:30 +020028#define SOURCE_FIELD 5
Willy Tarreau72c28532009-01-22 18:56:50 +010029#define ACCEPT_FIELD 6
Willy Tarreaud2201062010-05-27 18:17:30 +020030#define SERVER_FIELD 8
Willy Tarreau72c28532009-01-22 18:56:50 +010031#define TIME_FIELD 9
32#define STATUS_FIELD 10
Baptiste61aaad02012-09-08 23:10:03 +020033#define BYTES_SENT_FIELD 11
Willy Tarreaud8fc1102010-09-12 17:56:16 +020034#define TERM_CODES_FIELD 14
Willy Tarreau72c28532009-01-22 18:56:50 +010035#define CONN_FIELD 15
Willy Tarreau08911ff2011-10-13 13:28:36 +020036#define QUEUE_LEN_FIELD 16
Willy Tarreauabe45b62010-10-28 20:33:46 +020037#define METH_FIELD 17
38#define URL_FIELD 18
Willy Tarreau72c28532009-01-22 18:56:50 +010039#define MAXLINE 16384
40#define QBITS 4
41
Willy Tarreaudf6f0d12011-07-10 18:15:08 +020042#define SEP(c) ((unsigned char)(c) <= ' ')
43#define SKIP_CHAR(p,c) do { while (1) { int __c = (unsigned char)*p++; if (__c == c) break; if (__c <= ' ') { p--; break; } } } while (0)
Willy Tarreau72c28532009-01-22 18:56:50 +010044
45/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
46static struct eb_root timers[5] = {
47 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
48 EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
49};
50
51struct timer {
52 struct eb32_node node;
53 unsigned int count;
54};
55
Willy Tarreaud2201062010-05-27 18:17:30 +020056struct srv_st {
57 unsigned int st_cnt[6]; /* 0xx to 5xx */
58 unsigned int nb_ct, nb_rt, nb_ok;
59 unsigned long long cum_ct, cum_rt;
60 struct ebmb_node node;
61 /* don't put anything else here, the server name will be there */
62};
Willy Tarreau72c28532009-01-22 18:56:50 +010063
Willy Tarreauabe45b62010-10-28 20:33:46 +020064struct url_stat {
65 union {
66 struct ebpt_node url;
67 struct eb64_node val;
68 } node;
69 char *url;
70 unsigned long long total_time; /* sum(all reqs' times) */
71 unsigned long long total_time_ok; /* sum(all OK reqs' times) */
Baptiste61aaad02012-09-08 23:10:03 +020072 unsigned long long total_bytes_sent; /* sum(all bytes sent) */
Willy Tarreauabe45b62010-10-28 20:33:46 +020073 unsigned int nb_err, nb_req;
74};
75
Willy Tarreau72c28532009-01-22 18:56:50 +010076#define FILT_COUNT_ONLY 0x01
77#define FILT_INVERT 0x02
78#define FILT_QUIET 0x04
79#define FILT_ERRORS_ONLY 0x08
80#define FILT_ACC_DELAY 0x10
81#define FILT_ACC_COUNT 0x20
82#define FILT_GRAPH_TIMERS 0x40
Willy Tarreau214c2032009-02-20 11:02:32 +010083#define FILT_PERCENTILE 0x80
Willy Tarreau5bdfd962009-10-14 15:16:29 +020084#define FILT_TIME_RESP 0x100
85
86#define FILT_INVERT_ERRORS 0x200
87#define FILT_INVERT_TIME_RESP 0x400
Willy Tarreau72c28532009-01-22 18:56:50 +010088
Willy Tarreau0f423a72010-05-03 10:50:54 +020089#define FILT_COUNT_STATUS 0x800
Willy Tarreaud2201062010-05-27 18:17:30 +020090#define FILT_COUNT_SRV_STATUS 0x1000
Willy Tarreaud8fc1102010-09-12 17:56:16 +020091#define FILT_COUNT_TERM_CODES 0x2000
Willy Tarreau0f423a72010-05-03 10:50:54 +020092
Willy Tarreauabe45b62010-10-28 20:33:46 +020093#define FILT_COUNT_URL_ONLY 0x004000
94#define FILT_COUNT_URL_COUNT 0x008000
95#define FILT_COUNT_URL_ERR 0x010000
96#define FILT_COUNT_URL_TTOT 0x020000
97#define FILT_COUNT_URL_TAVG 0x040000
98#define FILT_COUNT_URL_TTOTO 0x080000
99#define FILT_COUNT_URL_TAVGO 0x100000
Willy Tarreauabe45b62010-10-28 20:33:46 +0200100
Willy Tarreau70c428f2011-07-10 17:27:40 +0200101#define FILT_HTTP_ONLY 0x200000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200102#define FILT_TERM_CODE_NAME 0x400000
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200103#define FILT_INVERT_TERM_CODE_NAME 0x800000
Willy Tarreau70c428f2011-07-10 17:27:40 +0200104
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200105#define FILT_HTTP_STATUS 0x1000000
106#define FILT_INVERT_HTTP_STATUS 0x2000000
Willy Tarreau08911ff2011-10-13 13:28:36 +0200107#define FILT_QUEUE_ONLY 0x4000000
108#define FILT_QUEUE_SRV_ONLY 0x8000000
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200109
Baptiste61aaad02012-09-08 23:10:03 +0200110#define FILT_COUNT_URL_BAVG 0x10000000
111#define FILT_COUNT_URL_BTOT 0x20000000
112
113#define FILT_COUNT_URL_ANY (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
114 FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \
115 FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
116
Willy Tarreau8a09b662012-10-10 10:26:22 +0200117#define FILT_COUNT_COOK_CODES 0x40000000
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100118#define FILT_COUNT_IP_COUNT 0x80000000
Willy Tarreau8a09b662012-10-10 10:26:22 +0200119
Tim Duesterhusb09bdee2021-10-18 12:12:02 +0200120#define FILT2_TIMESTAMP 0x01
121#define FILT2_PRESERVE_QUERY 0x02
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200122#define FILT2_EXTRACT_CAPTURE 0x04
Olivier Burgarde97b9042014-05-22 16:44:59 +0200123
Willy Tarreau72c28532009-01-22 18:56:50 +0100124unsigned int filter = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200125unsigned int filter2 = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100126unsigned int filter_invert = 0;
Willy Tarreau214c2032009-02-20 11:02:32 +0100127const char *line;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200128int linenum = 0;
129int parse_err = 0;
130int lines_out = 0;
Willy Tarreau667c9052012-10-10 16:49:28 +0200131int lines_max = -1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100132
Willy Tarreau214c2032009-02-20 11:02:32 +0100133const char *fgets2(FILE *stream);
Willy Tarreau72c28532009-01-22 18:56:50 +0100134
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200135void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100136void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200137void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreau8a09b662012-10-10 10:26:22 +0200138void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200139void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr);
140void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
141void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
142void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200143void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200144void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
145
Willy Tarreau615674c2012-01-23 08:15:51 +0100146void usage(FILE *output, const char *msg)
Willy Tarreau72c28532009-01-22 18:56:50 +0100147{
Willy Tarreau615674c2012-01-23 08:15:51 +0100148 fprintf(output,
Willy Tarreau72c28532009-01-22 18:56:50 +0100149 "%s"
Willy Tarreau615674c2012-01-23 08:15:51 +0100150 "Usage: halog [-h|--help] for long help\n"
Willy Tarreau667c9052012-10-10 16:49:28 +0200151 " halog [-q] [-c] [-m <lines>]\n"
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200152 " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic\n"
153 " |-hdr <block>:<field>\n"
154 " }\n"
Tim Duesterhusa8826662021-10-28 16:36:03 +0200155 " [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n"
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200156 " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] <log\n"
Willy Tarreau72c28532009-01-22 18:56:50 +0100157 "\n",
158 msg ? msg : ""
159 );
Willy Tarreau615674c2012-01-23 08:15:51 +0100160}
161
162void die(const char *msg)
163{
164 usage(stderr, msg);
Willy Tarreau72c28532009-01-22 18:56:50 +0100165 exit(1);
166}
167
Willy Tarreau615674c2012-01-23 08:15:51 +0100168void help()
169{
170 usage(stdout, NULL);
171 printf(
172 "Input filters (several filters may be combined) :\n"
173 " -H only match lines containing HTTP logs (ignore TCP)\n"
174 " -E only match lines without any error (no 5xx status)\n"
175 " -e only match lines with errors (status 5xx or negative)\n"
176 " -rt|-RT <time> only match response times larger|smaller than <time>\n"
177 " -Q|-QS only match queued requests (any queue|server queue)\n"
178 " -tcn|-TCN <code> only match requests with/without termination code <code>\n"
179 " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n"
180 " within min..max. Any of them may be omitted. Exact\n"
181 " code is checked for if no ':' is specified.\n"
Olivier Burgarde97b9042014-05-22 16:44:59 +0200182 " -time <[min][:max]> only match requests recorded between timestamps.\n"
183 " Any of them may be omitted.\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100184 "Modifiers\n"
185 " -v invert the input filtering condition\n"
186 " -q don't report errors/warnings\n"
Willy Tarreau667c9052012-10-10 16:49:28 +0200187 " -m <lines> limit output to the first <lines> lines\n"
Tim Duesterhusaefb6302021-10-28 15:55:49 +0200188 " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n"
189 " you can also use -n to start from earlier then field %d\n"
Tim Duesterhusa8826662021-10-28 16:36:03 +0200190 " -query preserve the query string for per-URL (-u*) statistics\n"
Tim Duesterhusaefb6302021-10-28 15:55:49 +0200191 "\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100192 "Output filters - only one may be used at a time\n"
193 " -c only report the number of lines that would have been printed\n"
194 " -pct output connect and response times percentiles\n"
195 " -st output number of requests per HTTP status code\n"
Willy Tarreau8a09b662012-10-10 10:26:22 +0200196 " -cc output number of requests per cookie code (2 chars)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100197 " -tc output number of requests per termination code (2 chars)\n"
198 " -srv output statistics per server (time, requests, errors)\n"
Aleksandar Lazi6112f5c2020-05-15 22:58:30 +0200199 " -ic output statistics per ip count (time, requests, errors)\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100200 " -u* output statistics per URL (time, requests, errors)\n"
201 " Additional characters indicate the output sorting key :\n"
202 " -u : by URL, -uc : request count, -ue : error count\n"
Willy Tarreau4201df72012-10-10 14:57:35 +0200203 " -ua : average response time, -ut : average total time\n"
Willy Tarreau615674c2012-01-23 08:15:51 +0100204 " -uao, -uto: average times computed on valid ('OK') requests\n"
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200205 " -uba, -ubt: average bytes returned, total bytes returned\n"
206 " -hdr output captured header at the given <block>:<field>\n",
Tim Duesterhusaefb6302021-10-28 15:55:49 +0200207 SOURCE_FIELD,SOURCE_FIELD
Willy Tarreau615674c2012-01-23 08:15:51 +0100208 );
209 exit(0);
210}
211
Willy Tarreau72c28532009-01-22 18:56:50 +0100212
213/* return pointer to first char not part of current field starting at <p>. */
Willy Tarreauf9042062011-09-10 12:26:35 +0200214
215#if defined(__i386__)
216/* this one is always faster on 32-bits */
217static inline const char *field_stop(const char *p)
218{
219 asm(
220 /* Look for spaces */
221 "4: \n\t"
222 "inc %0 \n\t"
223 "cmpb $0x20, -1(%0) \n\t"
224 "ja 4b \n\t"
225 "jz 3f \n\t"
226
227 /* we only get there for control chars 0..31. Leave if we find '\0' */
228 "cmpb $0x0, -1(%0) \n\t"
229 "jnz 4b \n\t"
230
231 /* return %0-1 = position of the last char we checked */
232 "3: \n\t"
233 "dec %0 \n\t"
234 : "=r" (p)
235 : "0" (p)
236 );
237 return p;
238}
239#else
Willy Tarreau72c28532009-01-22 18:56:50 +0100240const char *field_stop(const char *p)
241{
242 unsigned char c;
243
244 while (1) {
245 c = *(p++);
246 if (c > ' ')
247 continue;
Willy Tarreau14389e72011-07-10 22:11:17 +0200248 if (c == ' ' || c == 0)
Willy Tarreau72c28532009-01-22 18:56:50 +0100249 break;
250 }
251 return p - 1;
252}
Willy Tarreauf9042062011-09-10 12:26:35 +0200253#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100254
255/* return field <field> (starting from 1) in string <p>. Only consider
256 * contiguous spaces (or tabs) as one delimiter. May return pointer to
257 * last char if field is not found. Equivalent to awk '{print $field}'.
258 */
259const char *field_start(const char *p, int field)
260{
Willy Tarreauf9042062011-09-10 12:26:35 +0200261#ifndef PREFER_ASM
Willy Tarreau72c28532009-01-22 18:56:50 +0100262 unsigned char c;
263 while (1) {
264 /* skip spaces */
265 while (1) {
Willy Tarreauf9042062011-09-10 12:26:35 +0200266 c = *(p++);
Willy Tarreau72c28532009-01-22 18:56:50 +0100267 if (c > ' ')
268 break;
Willy Tarreau14389e72011-07-10 22:11:17 +0200269 if (c == ' ')
Willy Tarreauf9042062011-09-10 12:26:35 +0200270 continue;
Willy Tarreau72c28532009-01-22 18:56:50 +0100271 if (!c) /* end of line */
Willy Tarreauf9042062011-09-10 12:26:35 +0200272 return p-1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100273 /* other char => new field */
274 break;
Willy Tarreau72c28532009-01-22 18:56:50 +0100275 }
276
277 /* start of field */
278 field--;
279 if (!field)
Willy Tarreauf9042062011-09-10 12:26:35 +0200280 return p-1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100281
282 /* skip this field */
283 while (1) {
284 c = *(p++);
Willy Tarreau14389e72011-07-10 22:11:17 +0200285 if (c == ' ')
Willy Tarreau72c28532009-01-22 18:56:50 +0100286 break;
Willy Tarreauf9042062011-09-10 12:26:35 +0200287 if (c > ' ')
288 continue;
Willy Tarreau72c28532009-01-22 18:56:50 +0100289 if (c == '\0')
Willy Tarreauf9042062011-09-10 12:26:35 +0200290 return p - 1;
Willy Tarreau72c28532009-01-22 18:56:50 +0100291 }
292 }
Willy Tarreauf9042062011-09-10 12:26:35 +0200293#else
294 /* This version works optimally on i386 and x86_64 but the code above
295 * shows similar performance. However, depending on the version of GCC
296 * used, inlining rules change and it may have difficulties to make
297 * efficient use of this code at other locations and could result in
298 * worse performance (eg: gcc 4.4). You may want to experience.
299 */
300 asm(
301 /* skip spaces */
302 "1: \n\t"
303 "inc %0 \n\t"
304 "cmpb $0x20, -1(%0) \n\t"
305 "ja 2f \n\t"
306 "jz 1b \n\t"
307
308 /* we only get there for control chars 0..31. Leave if we find '\0' */
309 "cmpb $0x0, -1(%0) \n\t"
310 "jz 3f \n\t"
311
312 /* start of field at [%0-1]. Check if we need to skip more fields */
313 "2: \n\t"
314 "dec %1 \n\t"
315 "jz 3f \n\t"
316
317 /* Look for spaces */
318 "4: \n\t"
319 "inc %0 \n\t"
320 "cmpb $0x20, -1(%0) \n\t"
321 "jz 1b \n\t"
322 "ja 4b \n\t"
323
324 /* we only get there for control chars 0..31. Leave if we find '\0' */
325 "cmpb $0x0, -1(%0) \n\t"
326 "jnz 4b \n\t"
327
328 /* return %0-1 = position of the last char we checked */
329 "3: \n\t"
330 "dec %0 \n\t"
331 : "=r" (p)
332 : "r" (field), "0" (p)
333 );
334 return p;
335#endif
Willy Tarreau72c28532009-01-22 18:56:50 +0100336}
337
338/* keep only the <bits> higher bits of <i> */
339static inline unsigned int quantify_u32(unsigned int i, int bits)
340{
341 int high;
342
343 if (!bits)
344 return 0;
345
346 if (i)
347 high = fls_auto(i); // 1 to 32
348 else
349 high = 0;
350
351 if (high <= bits)
352 return i;
353
354 return i & ~((1 << (high - bits)) - 1);
355}
356
357/* keep only the <bits> higher bits of the absolute value of <i>, as well as
358 * its sign. */
359static inline int quantify(int i, int bits)
360{
361 if (i >= 0)
362 return quantify_u32(i, bits);
363 else
364 return -quantify_u32(-i, bits);
365}
366
367/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
368 * in <alloc>. It may be NULL, in which case the function will allocate it
369 * itself. It will be reset to NULL once consumed. The caller is responsible
370 * for freeing the node once not used anymore. The node where the value was
371 * inserted is returned.
372 */
373struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
374{
375 struct timer *t = *alloc;
376 struct eb32_node *n;
377
378 if (!t) {
379 t = calloc(sizeof(*t), 1);
380 if (unlikely(!t)) {
381 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
382 exit(1);
383 }
384 }
385 t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits
386
387 n = eb32i_insert(r, &t->node);
388 if (n == &t->node)
389 t = NULL; /* node inserted, will malloc next time */
390
391 *alloc = t;
392 return container_of(n, struct timer, node);
393}
394
395/* Insert value value <v> into tree <r>. A pre-allocated node must be passed
396 * in <alloc>. It may be NULL, in which case the function will allocate it
397 * itself. It will be reset to NULL once consumed. The caller is responsible
398 * for freeing the node once not used anymore. The node where the value was
399 * inserted is returned.
400 */
401struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
402{
403 struct timer *t = *alloc;
404 struct eb32_node *n;
405
406 if (!t) {
407 t = calloc(sizeof(*t), 1);
408 if (unlikely(!t)) {
409 fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
410 exit(1);
411 }
412 }
413 t->node.key = v;
414
415 n = eb32i_insert(r, &t->node);
416 if (n == &t->node)
417 t = NULL; /* node inserted, will malloc next time */
418
419 *alloc = t;
420 return container_of(n, struct timer, node);
421}
422
423int str2ic(const char *s)
424{
425 int i = 0;
426 int j, k;
427
428 if (*s != '-') {
429 /* positive number */
430 while (1) {
431 j = (*s++) - '0';
432 k = i * 10;
433 if ((unsigned)j > 9)
434 break;
435 i = k + j;
436 }
437 } else {
438 /* negative number */
439 s++;
440 while (1) {
441 j = (*s++) - '0';
442 k = i * 10;
443 if ((unsigned)j > 9)
444 break;
445 i = k - j;
446 }
447 }
448
449 return i;
450}
451
452
453/* Equivalent to strtoul with a length. */
454static inline unsigned int __strl2ui(const char *s, int len)
455{
456 unsigned int i = 0;
457 while (len-- > 0) {
458 i = i * 10 - '0';
459 i += (unsigned char)*s++;
460 }
461 return i;
462}
463
464unsigned int strl2ui(const char *s, int len)
465{
466 return __strl2ui(s, len);
467}
468
469/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
470 * the day in milliseconds. It returns -1 for all unparsable values. The parser
471 * looks ugly but gcc emits far better code that way.
472 */
473int convert_date(const char *field)
474{
475 unsigned int h, m, s, ms;
476 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600477 const char *e;
Willy Tarreau72c28532009-01-22 18:56:50 +0100478
479 h = m = s = ms = 0;
480 e = field;
481
482 /* skip the date */
483 while (1) {
484 c = *(e++);
485 if (c == ':')
486 break;
487 if (!c)
488 goto out_err;
489 }
490
491 /* hour + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100492 while (1) {
493 c = *(e++) - '0';
494 if (c > 9)
495 break;
496 h = h * 10 + c;
497 }
498 if (c == (unsigned char)(0 - '0'))
499 goto out_err;
500
501 /* minute + ':' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100502 while (1) {
503 c = *(e++) - '0';
504 if (c > 9)
505 break;
506 m = m * 10 + c;
507 }
508 if (c == (unsigned char)(0 - '0'))
509 goto out_err;
510
511 /* second + '.' or ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100512 while (1) {
513 c = *(e++) - '0';
514 if (c > 9)
515 break;
516 s = s * 10 + c;
517 }
518 if (c == (unsigned char)(0 - '0'))
519 goto out_err;
520
521 /* if there's a '.', we have milliseconds */
522 if (c == (unsigned char)('.' - '0')) {
523 /* millisecond second + ']' */
Willy Tarreau72c28532009-01-22 18:56:50 +0100524 while (1) {
525 c = *(e++) - '0';
526 if (c > 9)
527 break;
528 ms = ms * 10 + c;
529 }
530 if (c == (unsigned char)(0 - '0'))
531 goto out_err;
532 }
533 return (((h * 60) + m) * 60 + s) * 1000 + ms;
534 out_err:
535 return -1;
536}
537
Olivier Burgarde97b9042014-05-22 16:44:59 +0200538/* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp.
539 * It returns -1 for all unparsable values. The parser
540 * looks ugly but gcc emits far better code that way.
541 */
542int convert_date_to_timestamp(const char *field)
543{
544 unsigned int d, mo, y, h, m, s;
545 unsigned char c;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600546 const char *e;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200547 time_t rawtime;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200548 static struct tm * timeinfo;
549 static int last_res;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200550
551 d = mo = y = h = m = s = 0;
552 e = field;
553
554 c = *(e++); // remove '['
555 /* day + '/' */
556 while (1) {
557 c = *(e++) - '0';
558 if (c > 9)
559 break;
560 d = d * 10 + c;
561 if (c == (unsigned char)(0 - '0'))
562 goto out_err;
563 }
564
565 /* month + '/' */
566 c = *(e++);
567 if (c =='F') {
568 mo = 2;
569 e = e+3;
570 } else if (c =='S') {
571 mo = 9;
572 e = e+3;
573 } else if (c =='O') {
574 mo = 10;
575 e = e+3;
576 } else if (c =='N') {
577 mo = 11;
578 e = e+3;
579 } else if (c == 'D') {
580 mo = 12;
581 e = e+3;
582 } else if (c == 'A') {
583 c = *(e++);
584 if (c == 'p') {
585 mo = 4;
586 e = e+2;
587 } else if (c == 'u') {
588 mo = 8;
589 e = e+2;
590 } else
591 goto out_err;
592 } else if (c == 'J') {
593 c = *(e++);
594 if (c == 'a') {
595 mo = 1;
596 e = e+2;
597 } else if (c == 'u') {
598 c = *(e++);
599 if (c == 'n') {
600 mo = 6;
601 e = e+1;
602 } else if (c == 'l') {
603 mo = 7;
604 e++;
605 }
606 } else
607 goto out_err;
608 } else if (c == 'M') {
609 e++;
610 c = *(e++);
611 if (c == 'r') {
612 mo = 3;
613 e = e+1;
614 } else if (c == 'y') {
615 mo = 5;
616 e = e+1;
617 } else
618 goto out_err;
619 } else
620 goto out_err;
621
622 /* year + ':' */
623 while (1) {
624 c = *(e++) - '0';
625 if (c > 9)
626 break;
627 y = y * 10 + c;
628 if (c == (unsigned char)(0 - '0'))
629 goto out_err;
630 }
631
632 /* hour + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200633 while (1) {
634 c = *(e++) - '0';
635 if (c > 9)
636 break;
637 h = h * 10 + c;
638 }
639 if (c == (unsigned char)(0 - '0'))
640 goto out_err;
641
642 /* minute + ':' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200643 while (1) {
644 c = *(e++) - '0';
645 if (c > 9)
646 break;
647 m = m * 10 + c;
648 }
649 if (c == (unsigned char)(0 - '0'))
650 goto out_err;
651
652 /* second + '.' or ']' */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200653 while (1) {
654 c = *(e++) - '0';
655 if (c > 9)
656 break;
657 s = s * 10 + c;
658 }
659
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200660 if (likely(timeinfo)) {
Willy Tarreau03ca6052020-12-21 08:40:04 +0100661 if ((unsigned)timeinfo->tm_min == m &&
662 (unsigned)timeinfo->tm_hour == h &&
663 (unsigned)timeinfo->tm_mday == d &&
664 (unsigned)timeinfo->tm_mon == mo - 1 &&
665 (unsigned)timeinfo->tm_year == y - 1900)
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200666 return last_res + s;
667 }
668 else {
669 time(&rawtime);
670 timeinfo = localtime(&rawtime);
671 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200672
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200673 timeinfo->tm_sec = 0;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200674 timeinfo->tm_min = m;
675 timeinfo->tm_hour = h;
676 timeinfo->tm_mday = d;
677 timeinfo->tm_mon = mo - 1;
678 timeinfo->tm_year = y - 1900;
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200679 last_res = mktime(timeinfo);
Olivier Burgarde97b9042014-05-22 16:44:59 +0200680
Willy Tarreau9f66aa92014-05-23 16:36:56 +0200681 return last_res + s;
Olivier Burgarde97b9042014-05-22 16:44:59 +0200682 out_err:
683 return -1;
684}
685
Willy Tarreau72c28532009-01-22 18:56:50 +0100686void truncated_line(int linenum, const char *line)
687{
688 if (!(filter & FILT_QUIET))
689 fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
690}
691
692int main(int argc, char **argv)
693{
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600694 const char *b, *p, *time_field, *accept_field, *source_field;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200695 const char *filter_term_code_name = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100696 const char *output_file = NULL;
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600697 int f, last;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200698 struct timer *t = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100699 struct eb32_node *n;
Willy Tarreauabe45b62010-10-28 20:33:46 +0200700 struct url_stat *ustat = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100701 int val, test;
Willy Tarreauc8746532014-05-28 23:05:07 +0200702 unsigned int uval;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100703 unsigned int filter_acc_delay = 0, filter_acc_count = 0;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200704 int filter_time_resp = 0;
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200705 int filt_http_status_low = 0, filt_http_status_high = 0;
Willy Tarreau03ca6052020-12-21 08:40:04 +0100706 unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200707 unsigned int filt2_capture_block = 0, filt2_capture_field = 0;
Willy Tarreau72c28532009-01-22 18:56:50 +0100708 int skip_fields = 1;
709
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200710 void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
711
Willy Tarreau72c28532009-01-22 18:56:50 +0100712 argc--; argv++;
713 while (argc > 0) {
714 if (*argv[0] != '-')
715 break;
716
717 if (strcmp(argv[0], "-ad") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200718 if (argc < 2) die("missing option for -ad\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100719 argc--; argv++;
720 filter |= FILT_ACC_DELAY;
721 filter_acc_delay = atol(*argv);
722 }
723 else if (strcmp(argv[0], "-ac") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200724 if (argc < 2) die("missing option for -ac\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100725 argc--; argv++;
726 filter |= FILT_ACC_COUNT;
727 filter_acc_count = atol(*argv);
728 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200729 else if (strcmp(argv[0], "-rt") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200730 if (argc < 2) die("missing option for -rt\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200731 argc--; argv++;
732 filter |= FILT_TIME_RESP;
733 filter_time_resp = atol(*argv);
734 }
735 else if (strcmp(argv[0], "-RT") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200736 if (argc < 2) die("missing option for -RT\n");
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200737 argc--; argv++;
738 filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP;
739 filter_time_resp = atol(*argv);
740 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100741 else if (strcmp(argv[0], "-s") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200742 if (argc < 2) die("missing option for -s\n");
Willy Tarreau72c28532009-01-22 18:56:50 +0100743 argc--; argv++;
744 skip_fields = atol(*argv);
745 }
Willy Tarreau667c9052012-10-10 16:49:28 +0200746 else if (strcmp(argv[0], "-m") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200747 if (argc < 2) die("missing option for -m\n");
Willy Tarreau667c9052012-10-10 16:49:28 +0200748 argc--; argv++;
749 lines_max = atol(*argv);
750 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100751 else if (strcmp(argv[0], "-e") == 0)
752 filter |= FILT_ERRORS_ONLY;
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200753 else if (strcmp(argv[0], "-E") == 0)
754 filter |= FILT_ERRORS_ONLY | FILT_INVERT_ERRORS;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200755 else if (strcmp(argv[0], "-H") == 0)
756 filter |= FILT_HTTP_ONLY;
Willy Tarreau08911ff2011-10-13 13:28:36 +0200757 else if (strcmp(argv[0], "-Q") == 0)
758 filter |= FILT_QUEUE_ONLY;
759 else if (strcmp(argv[0], "-QS") == 0)
760 filter |= FILT_QUEUE_SRV_ONLY;
Willy Tarreau72c28532009-01-22 18:56:50 +0100761 else if (strcmp(argv[0], "-c") == 0)
762 filter |= FILT_COUNT_ONLY;
763 else if (strcmp(argv[0], "-q") == 0)
764 filter |= FILT_QUIET;
765 else if (strcmp(argv[0], "-v") == 0)
766 filter_invert = !filter_invert;
767 else if (strcmp(argv[0], "-gt") == 0)
768 filter |= FILT_GRAPH_TIMERS;
Willy Tarreau214c2032009-02-20 11:02:32 +0100769 else if (strcmp(argv[0], "-pct") == 0)
770 filter |= FILT_PERCENTILE;
Willy Tarreau0f423a72010-05-03 10:50:54 +0200771 else if (strcmp(argv[0], "-st") == 0)
772 filter |= FILT_COUNT_STATUS;
Willy Tarreaud2201062010-05-27 18:17:30 +0200773 else if (strcmp(argv[0], "-srv") == 0)
774 filter |= FILT_COUNT_SRV_STATUS;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200775 else if (strcmp(argv[0], "-cc") == 0)
776 filter |= FILT_COUNT_COOK_CODES;
Willy Tarreaud8fc1102010-09-12 17:56:16 +0200777 else if (strcmp(argv[0], "-tc") == 0)
778 filter |= FILT_COUNT_TERM_CODES;
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200779 else if (strcmp(argv[0], "-tcn") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200780 if (argc < 2) die("missing option for -tcn\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200781 argc--; argv++;
782 filter |= FILT_TERM_CODE_NAME;
783 filter_term_code_name = *argv;
784 }
785 else if (strcmp(argv[0], "-TCN") == 0) {
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200786 if (argc < 2) die("missing option for -TCN\n");
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +0200787 argc--; argv++;
788 filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME;
789 filter_term_code_name = *argv;
790 }
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200791 else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) {
792 char *sep, *str;
793
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200794 if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n");
Willy Tarreaud3007ff2011-09-05 02:07:23 +0200795 filter |= FILT_HTTP_STATUS;
796 if (argv[0][1] == 'H')
797 filter |= FILT_INVERT_HTTP_STATUS;
798
799 argc--; argv++;
800 str = *argv;
801 sep = strchr(str, ':'); /* [min]:[max] */
802 if (!sep)
803 sep = str; /* make max point to min */
804 else
805 *sep++ = 0;
806 filt_http_status_low = *str ? atol(str) : 0;
807 filt_http_status_high = *sep ? atol(sep) : 65535;
808 }
Olivier Burgarde97b9042014-05-22 16:44:59 +0200809 else if (strcmp(argv[0], "-time") == 0) {
810 char *sep, *str;
811
Tim Duesterhus75a0a1f2021-10-28 17:06:23 +0200812 if (argc < 2) die("missing option for -time ([min]:[max])\n");
Olivier Burgarde97b9042014-05-22 16:44:59 +0200813 filter2 |= FILT2_TIMESTAMP;
814
815 argc--; argv++;
816 str = *argv;
817 sep = strchr(str, ':'); /* [min]:[max] */
818 filt2_timestamp_low = *str ? atol(str) : 0;
819 if (!sep)
820 filt2_timestamp_high = 0xFFFFFFFF;
821 else
822 filt2_timestamp_high = atol(++sep);
823 }
Willy Tarreauabe45b62010-10-28 20:33:46 +0200824 else if (strcmp(argv[0], "-u") == 0)
825 filter |= FILT_COUNT_URL_ONLY;
826 else if (strcmp(argv[0], "-uc") == 0)
827 filter |= FILT_COUNT_URL_COUNT;
828 else if (strcmp(argv[0], "-ue") == 0)
829 filter |= FILT_COUNT_URL_ERR;
830 else if (strcmp(argv[0], "-ua") == 0)
831 filter |= FILT_COUNT_URL_TAVG;
832 else if (strcmp(argv[0], "-ut") == 0)
833 filter |= FILT_COUNT_URL_TTOT;
834 else if (strcmp(argv[0], "-uao") == 0)
835 filter |= FILT_COUNT_URL_TAVGO;
836 else if (strcmp(argv[0], "-uto") == 0)
837 filter |= FILT_COUNT_URL_TTOTO;
Baptiste61aaad02012-09-08 23:10:03 +0200838 else if (strcmp(argv[0], "-uba") == 0)
839 filter |= FILT_COUNT_URL_BAVG;
840 else if (strcmp(argv[0], "-ubt") == 0)
841 filter |= FILT_COUNT_URL_BTOT;
Tim Duesterhusa8826662021-10-28 16:36:03 +0200842 else if (strcmp(argv[0], "-query") == 0)
Tim Duesterhusb09bdee2021-10-18 12:12:02 +0200843 filter2 |= FILT2_PRESERVE_QUERY;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100844 else if (strcmp(argv[0], "-ic") == 0)
845 filter |= FILT_COUNT_IP_COUNT;
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200846 else if (strcmp(argv[0], "-hdr") == 0) {
847 char *sep, *str;
848
849 if (argc < 2) die("missing option for -hdr (<block>:<field>)\n");
850 filter2 |= FILT2_EXTRACT_CAPTURE;
851
852 argc--; argv++;
853 str = *argv;
854 sep = strchr(str, ':');
855 if (!sep)
856 die("missing colon in -hdr (<block>:<field>)\n");
857 else
858 *sep++ = 0;
859
860 filt2_capture_block = *str ? atol(str) : 1;
861 filt2_capture_field = *sep ? atol(sep) : 1;
862
863 if (filt2_capture_block < 1 || filt2_capture_field < 1)
864 die("block and field must be at least 1 for -hdr (<block>:<field>)\n");
865 }
Willy Tarreau72c28532009-01-22 18:56:50 +0100866 else if (strcmp(argv[0], "-o") == 0) {
867 if (output_file)
868 die("Fatal: output file name already specified.\n");
869 if (argc < 2)
870 die("Fatal: missing output file name.\n");
871 output_file = argv[1];
872 }
Willy Tarreau615674c2012-01-23 08:15:51 +0100873 else if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0)
874 help();
Willy Tarreau72c28532009-01-22 18:56:50 +0100875 argc--;
876 argv++;
877 }
878
Tim Duesterhusa7e84852021-10-28 17:24:02 +0200879 if (!filter && !filter2)
Willy Tarreau72c28532009-01-22 18:56:50 +0100880 die("No action specified.\n");
881
882 if (filter & FILT_ACC_COUNT && !filter_acc_count)
883 filter_acc_count=1;
884
885 if (filter & FILT_ACC_DELAY && !filter_acc_delay)
886 filter_acc_delay = 1;
887
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200888
889 /* by default, all lines are printed */
890 line_filter = filter_output_line;
891 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))
892 line_filter = filter_accept_holes;
893 else if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))
894 line_filter = filter_graphs;
895 else if (filter & FILT_COUNT_STATUS)
896 line_filter = filter_count_status;
Willy Tarreau8a09b662012-10-10 10:26:22 +0200897 else if (filter & FILT_COUNT_COOK_CODES)
898 line_filter = filter_count_cook_codes;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200899 else if (filter & FILT_COUNT_TERM_CODES)
900 line_filter = filter_count_term_codes;
901 else if (filter & FILT_COUNT_SRV_STATUS)
902 line_filter = filter_count_srv_status;
903 else if (filter & FILT_COUNT_URL_ANY)
904 line_filter = filter_count_url;
905 else if (filter & FILT_COUNT_ONLY)
906 line_filter = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100907
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200908#if defined(POSIX_FADV_SEQUENTIAL)
909 /* around 20% performance improvement is observed on Linux with this
Joseph Herlant42172bd2018-11-09 18:02:35 -0800910 * on cold-cache. Surprisingly, WILLNEED is less performant. Don't
Willy Tarreauf8c95d22012-06-12 09:16:56 +0200911 * use NOREUSE as it flushes the cache and prevents easy data
912 * manipulation on logs!
913 */
914 posix_fadvise(0, 0, 0, POSIX_FADV_SEQUENTIAL);
915#endif
916
Willy Tarreaua1629a52012-11-13 20:48:15 +0100917 if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */
Olivier Burgarde97b9042014-05-22 16:44:59 +0200918 !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) &&
919 !(filter2 & (FILT2_TIMESTAMP))) {
Willy Tarreaua1629a52012-11-13 20:48:15 +0100920 /* read the whole file at once first, ignore it if inverted output */
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100921 if (!filter_invert)
Willy Tarreaua1629a52012-11-13 20:48:15 +0100922 while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL)
Willy Tarreaue1a908c2012-01-03 09:23:03 +0100923 lines_out++;
924
925 goto skip_filters;
926 }
927
Willy Tarreau214c2032009-02-20 11:02:32 +0100928 while ((line = fgets2(stdin)) != NULL) {
Willy Tarreau72c28532009-01-22 18:56:50 +0100929 linenum++;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200930 time_field = NULL; accept_field = NULL;
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100931 source_field = NULL;
Willy Tarreau72c28532009-01-22 18:56:50 +0100932
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200933 test = 1;
Willy Tarreau26deaf52011-07-10 19:47:48 +0200934
935 /* for any line we process, we first ensure that there is a field
936 * looking like the accept date field (beginning with a '[').
937 */
Willy Tarreau7cf479c2013-02-16 23:49:04 +0100938 if (filter & FILT_COUNT_IP_COUNT) {
939 /* we need the IP first */
940 source_field = field_start(line, SOURCE_FIELD + skip_fields);
941 accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1);
942 }
943 else
944 accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
945
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200946 if (unlikely(*accept_field != '[')) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200947 parse_err++;
948 continue;
949 }
950
951 /* the day of month field is begin 01 and 31 */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200952 if (accept_field[1] < '0' || accept_field[1] > '3') {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200953 parse_err++;
954 continue;
955 }
956
Olivier Burgarde97b9042014-05-22 16:44:59 +0200957 if (filter2 & FILT2_TIMESTAMP) {
958 uval = convert_date_to_timestamp(accept_field);
959 test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ;
960 }
961
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200962 if (filter & FILT_HTTP_ONLY) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200963 /* only report lines with at least 4 timers */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200964 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200965 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200966 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200967 truncated_line(linenum, line);
968 continue;
969 }
Willy Tarreau70c428f2011-07-10 17:27:40 +0200970 }
971
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600972 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +0200973 /* we have field TIME_FIELD in [time_field]..[e-1] */
974 p = time_field;
Willy Tarreau70c428f2011-07-10 17:27:40 +0200975 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +0200976 while (!SEP(*p)) {
Willy Tarreau70c428f2011-07-10 17:27:40 +0200977 if (++f == 4)
978 break;
979 SKIP_CHAR(p, '/');
980 }
981 test &= (f >= 4);
982 }
983
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200984 if (filter & FILT_TIME_RESP) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200985 int tps;
986
987 /* only report lines with response times larger than filter_time_resp */
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200988 if (!time_field) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200989 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +0200990 if (unlikely(!*time_field)) {
Willy Tarreau26deaf52011-07-10 19:47:48 +0200991 truncated_line(linenum, line);
992 continue;
993 }
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200994 }
995
Ryan O'Hara8cb99932017-12-15 10:21:39 -0600996 field_stop(time_field + 1);
Willy Tarreau758a6ea2011-07-10 18:53:44 +0200997 /* we have field TIME_FIELD in [time_field]..[e-1], let's check only the response time */
Willy Tarreau5bdfd962009-10-14 15:16:29 +0200998
Willy Tarreau758a6ea2011-07-10 18:53:44 +0200999 p = time_field;
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001000 f = 0;
Willy Tarreaudf6f0d12011-07-10 18:15:08 +02001001 while (!SEP(*p)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001002 tps = str2ic(p);
1003 if (tps < 0) {
1004 tps = -1;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001005 }
Willy Tarreau24bcb4f2010-10-28 20:39:50 +02001006 if (++f == 4)
1007 break;
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001008 SKIP_CHAR(p, '/');
1009 }
1010
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001011 if (unlikely(f < 4)) {
Willy Tarreau5bdfd962009-10-14 15:16:29 +02001012 parse_err++;
1013 continue;
1014 }
1015
1016 test &= (tps >= filter_time_resp) ^ !!(filter & FILT_INVERT_TIME_RESP);
1017 }
1018
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001019 if (filter & (FILT_ERRORS_ONLY | FILT_HTTP_STATUS)) {
1020 /* Check both error codes (-1, 5xx) and status code ranges */
Willy Tarreau26deaf52011-07-10 19:47:48 +02001021 if (time_field)
1022 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1023 else
1024 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1025
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001026 if (unlikely(!*b)) {
Willy Tarreau72c28532009-01-22 18:56:50 +01001027 truncated_line(linenum, line);
1028 continue;
1029 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001030
Willy Tarreaud3007ff2011-09-05 02:07:23 +02001031 val = str2ic(b);
1032 if (filter & FILT_ERRORS_ONLY)
1033 test &= (val < 0 || (val >= 500 && val <= 599)) ^ !!(filter & FILT_INVERT_ERRORS);
1034
1035 if (filter & FILT_HTTP_STATUS)
1036 test &= (val >= filt_http_status_low && val <= filt_http_status_high) ^ !!(filter & FILT_INVERT_HTTP_STATUS);
Willy Tarreau72c28532009-01-22 18:56:50 +01001037 }
1038
Willy Tarreau08911ff2011-10-13 13:28:36 +02001039 if (filter & (FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY)) {
1040 /* Check if the server's queue is non-nul */
1041 if (time_field)
1042 b = field_start(time_field, QUEUE_LEN_FIELD - TIME_FIELD + 1);
1043 else
1044 b = field_start(accept_field, QUEUE_LEN_FIELD - ACCEPT_FIELD + 1);
1045
1046 if (unlikely(!*b)) {
1047 truncated_line(linenum, line);
1048 continue;
1049 }
1050
1051 if (*b == '0') {
1052 if (filter & FILT_QUEUE_SRV_ONLY) {
1053 test = 0;
1054 }
1055 else {
1056 do {
1057 b++;
1058 if (*b == '/') {
1059 b++;
1060 break;
1061 }
1062 } while (*b);
1063 test &= ((unsigned char)(*b - '1') < 9);
1064 }
1065 }
1066 }
1067
Hervé COMMOWICK927cddd2011-08-10 17:42:41 +02001068 if (filter & FILT_TERM_CODE_NAME) {
1069 /* only report corresponding termination code name */
1070 if (time_field)
1071 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1072 else
1073 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1074
1075 if (unlikely(!*b)) {
1076 truncated_line(linenum, line);
1077 continue;
1078 }
1079
1080 test &= (b[0] == filter_term_code_name[0] && b[1] == filter_term_code_name[1]) ^ !!(filter & FILT_INVERT_TERM_CODE_NAME);
1081 }
1082
1083
Willy Tarreau0f423a72010-05-03 10:50:54 +02001084 test ^= filter_invert;
1085 if (!test)
1086 continue;
1087
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001088 /************** here we process inputs *******************/
Willy Tarreau72c28532009-01-22 18:56:50 +01001089
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001090 if (line_filter) {
1091 if (filter & FILT_COUNT_IP_COUNT)
1092 filter_count_ip(source_field, accept_field, time_field, &t);
Tim Duesterhusa7e84852021-10-28 17:24:02 +02001093 else if (filter2 & FILT2_EXTRACT_CAPTURE)
1094 filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field);
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001095 else
1096 line_filter(accept_field, time_field, &t);
1097 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001098 else
Willy Tarreaua1629a52012-11-13 20:48:15 +01001099 lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */
1100 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001101 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001102 }
Willy Tarreauabe45b62010-10-28 20:33:46 +02001103
Willy Tarreaue1a908c2012-01-03 09:23:03 +01001104 skip_filters:
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001105 /*****************************************************
1106 * Here we've finished reading all input. Depending on the
1107 * filters, we may still have some analysis to run on the
1108 * collected data and to output data in a new format.
1109 *************************************************** */
Willy Tarreau72c28532009-01-22 18:56:50 +01001110
1111 if (t)
1112 free(t);
1113
1114 if (filter & FILT_COUNT_ONLY) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001115 printf("%d\n", lines_out);
Willy Tarreau72c28532009-01-22 18:56:50 +01001116 exit(0);
1117 }
1118
Willy Tarreau72c28532009-01-22 18:56:50 +01001119 if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
1120 /* sort and count all timers. Output will look like this :
1121 * <accept_date> <delta_ms from previous one> <nb entries>
1122 */
1123 n = eb32_first(&timers[0]);
1124
1125 if (n)
1126 last = n->key;
1127 while (n) {
1128 unsigned int d, h, m, s, ms;
1129
1130 t = container_of(n, struct timer, node);
1131 h = n->key;
1132 d = h - last;
1133 last = h;
1134
1135 if (d >= filter_acc_delay && t->count >= filter_acc_count) {
1136 ms = h % 1000; h = h / 1000;
1137 s = h % 60; h = h / 60;
1138 m = h % 60; h = h / 60;
Willy Tarreau72c28532009-01-22 18:56:50 +01001139 printf("%02d:%02d:%02d.%03d %d %d %d\n", h, m, s, ms, last, d, t->count);
Willy Tarreau667c9052012-10-10 16:49:28 +02001140 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001141 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001142 break;
Willy Tarreau72c28532009-01-22 18:56:50 +01001143 }
1144 n = eb32_next(n);
1145 }
1146 }
1147 else if (filter & FILT_GRAPH_TIMERS) {
1148 /* sort all timers */
1149 for (f = 0; f < 5; f++) {
1150 struct eb32_node *n;
1151 int val;
1152
1153 val = 0;
1154 n = eb32_first(&timers[f]);
1155 while (n) {
1156 int i;
1157 double d;
1158
1159 t = container_of(n, struct timer, node);
1160 last = n->key;
1161 val = t->count;
1162
1163 i = (last < 0) ? -last : last;
1164 i = fls_auto(i) - QBITS;
1165
1166 if (i > 0)
1167 d = val / (double)(1 << i);
1168 else
1169 d = val;
1170
Willy Tarreaua1629a52012-11-13 20:48:15 +01001171 if (d > 0.0)
Willy Tarreau72c28532009-01-22 18:56:50 +01001172 printf("%d %d %f\n", f, last, d+1.0);
Willy Tarreau72c28532009-01-22 18:56:50 +01001173
1174 n = eb32_next(n);
1175 }
Willy Tarreau214c2032009-02-20 11:02:32 +01001176 }
1177 }
1178 else if (filter & FILT_PERCENTILE) {
1179 /* report timers by percentile :
1180 * <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
1181 * We don't count errs.
1182 */
1183 struct eb32_node *n[5];
1184 unsigned long cum[5];
1185 double step;
1186
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001187 if (!lines_out)
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001188 goto empty;
1189
Willy Tarreau214c2032009-02-20 11:02:32 +01001190 for (f = 1; f < 5; f++) {
1191 n[f] = eb32_first(&timers[f]);
1192 cum[f] = container_of(n[f], struct timer, node)->count;
1193 }
1194
1195 for (step = 1; step <= 1000;) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001196 unsigned int thres = lines_out * (step / 1000.0);
Willy Tarreau214c2032009-02-20 11:02:32 +01001197
1198 printf("%3.1f %d ", step/10.0, thres);
1199 for (f = 1; f < 5; f++) {
1200 struct eb32_node *next;
1201 while (cum[f] < thres) {
1202 /* need to find other keys */
1203 next = eb32_next(n[f]);
1204 if (!next)
1205 break;
1206 n[f] = next;
1207 cum[f] += container_of(next, struct timer, node)->count;
1208 }
1209
1210 /* value still within $step % of total */
1211 printf("%d ", n[f]->key);
1212 }
1213 putchar('\n');
1214 if (step >= 100 && step < 900)
1215 step += 50; // jump 5% by 5% between those steps.
1216 else if (step >= 20 && step < 980)
1217 step += 10;
1218 else
1219 step += 1;
Willy Tarreau72c28532009-01-22 18:56:50 +01001220 }
1221 }
Willy Tarreau0f423a72010-05-03 10:50:54 +02001222 else if (filter & FILT_COUNT_STATUS) {
1223 /* output all statuses in the form of <status> <occurrences> */
1224 n = eb32_first(&timers[0]);
1225 while (n) {
1226 t = container_of(n, struct timer, node);
1227 printf("%d %d\n", n->key, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001228 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001229 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001230 break;
Willy Tarreau0f423a72010-05-03 10:50:54 +02001231 n = eb32_next(n);
1232 }
1233 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001234 else if (filter & FILT_COUNT_SRV_STATUS) {
Willy Tarreaud2201062010-05-27 18:17:30 +02001235 struct ebmb_node *srv_node;
1236 struct srv_st *srv;
1237
1238 printf("#srv_name 1xx 2xx 3xx 4xx 5xx other tot_req req_ok pct_ok avg_ct avg_rt\n");
1239
1240 srv_node = ebmb_first(&timers[0]);
1241 while (srv_node) {
1242 int tot_rq;
1243
1244 srv = container_of(srv_node, struct srv_st, node);
1245
1246 tot_rq = 0;
1247 for (f = 0; f <= 5; f++)
1248 tot_rq += srv->st_cnt[f];
1249
1250 printf("%s %d %d %d %d %d %d %d %d %.1f %d %d\n",
1251 srv_node->key, srv->st_cnt[1], srv->st_cnt[2],
1252 srv->st_cnt[3], srv->st_cnt[4], srv->st_cnt[5], srv->st_cnt[0],
1253 tot_rq,
1254 srv->nb_ok, (double)srv->nb_ok * 100.0 / (tot_rq?tot_rq:1),
1255 (int)(srv->cum_ct / (srv->nb_ct?srv->nb_ct:1)), (int)(srv->cum_rt / (srv->nb_rt?srv->nb_rt:1)));
1256 srv_node = ebmb_next(srv_node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001257 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001258 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001259 break;
Willy Tarreaud2201062010-05-27 18:17:30 +02001260 }
1261 }
Willy Tarreau8a09b662012-10-10 10:26:22 +02001262 else if (filter & (FILT_COUNT_TERM_CODES|FILT_COUNT_COOK_CODES)) {
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001263 /* output all statuses in the form of <code> <occurrences> */
1264 n = eb32_first(&timers[0]);
1265 while (n) {
1266 t = container_of(n, struct timer, node);
1267 printf("%c%c %d\n", (n->key >> 8), (n->key) & 255, t->count);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001268 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001269 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001270 break;
Willy Tarreaud8fc1102010-09-12 17:56:16 +02001271 n = eb32_next(n);
1272 }
1273 }
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001274 else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) {
Willy Tarreauabe45b62010-10-28 20:33:46 +02001275 struct eb_node *node, *next;
1276
1277 if (!(filter & FILT_COUNT_URL_ONLY)) {
1278 /* we have to sort on another criterion. We'll use timers[1] for the
1279 * destination tree.
1280 */
1281
1282 timers[1] = EB_ROOT; /* reconfigure to accept duplicates */
1283 for (node = eb_first(&timers[0]); node; node = next) {
1284 next = eb_next(node);
1285 eb_delete(node);
1286
1287 ustat = container_of(node, struct url_stat, node.url.node);
1288
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001289 if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT))
Willy Tarreauabe45b62010-10-28 20:33:46 +02001290 ustat->node.val.key = ustat->nb_req;
1291 else if (filter & FILT_COUNT_URL_ERR)
1292 ustat->node.val.key = ustat->nb_err;
1293 else if (filter & FILT_COUNT_URL_TTOT)
1294 ustat->node.val.key = ustat->total_time;
1295 else if (filter & FILT_COUNT_URL_TAVG)
1296 ustat->node.val.key = ustat->nb_req ? ustat->total_time / ustat->nb_req : 0;
1297 else if (filter & FILT_COUNT_URL_TTOTO)
1298 ustat->node.val.key = ustat->total_time_ok;
1299 else if (filter & FILT_COUNT_URL_TAVGO)
1300 ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0;
Baptiste61aaad02012-09-08 23:10:03 +02001301 else if (filter & FILT_COUNT_URL_BAVG)
1302 ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0;
1303 else if (filter & FILT_COUNT_URL_BTOT)
1304 ustat->node.val.key = ustat->total_bytes_sent;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001305 else
1306 ustat->node.val.key = 0;
1307
1308 eb64_insert(&timers[1], &ustat->node.val);
1309 }
1310 /* switch trees */
1311 timers[0] = timers[1];
1312 }
1313
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001314 if (FILT_COUNT_IP_COUNT)
1315 printf("#req err ttot tavg oktot okavg bavg btot src\n");
1316 else
1317 printf("#req err ttot tavg oktot okavg bavg btot url\n");
Willy Tarreauabe45b62010-10-28 20:33:46 +02001318
1319 /* scan the tree in its reverse sorting order */
1320 node = eb_last(&timers[0]);
1321 while (node) {
1322 ustat = container_of(node, struct url_stat, node.url.node);
Willy Tarreau2df860c2020-12-21 08:29:09 +01001323 printf("%d %d %llu %llu %llu %llu %llu %llu %s\n",
Willy Tarreauabe45b62010-10-28 20:33:46 +02001324 ustat->nb_req,
1325 ustat->nb_err,
1326 ustat->total_time,
1327 ustat->nb_req ? ustat->total_time / ustat->nb_req : 0,
1328 ustat->total_time_ok,
1329 (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0,
Baptiste61aaad02012-09-08 23:10:03 +02001330 ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0,
1331 ustat->total_bytes_sent,
Willy Tarreauabe45b62010-10-28 20:33:46 +02001332 ustat->url);
1333
1334 node = eb_prev(node);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001335 lines_out++;
Willy Tarreaua1629a52012-11-13 20:48:15 +01001336 if (lines_max >= 0 && lines_out >= lines_max)
Willy Tarreau667c9052012-10-10 16:49:28 +02001337 break;
Willy Tarreauabe45b62010-10-28 20:33:46 +02001338 }
1339 }
Willy Tarreaud2201062010-05-27 18:17:30 +02001340
Willy Tarreau910ba4b2009-11-17 10:16:19 +01001341 empty:
Willy Tarreau72c28532009-01-22 18:56:50 +01001342 if (!(filter & FILT_QUIET))
1343 fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001344 linenum, lines_out, parse_err);
Willy Tarreau72c28532009-01-22 18:56:50 +01001345 exit(0);
1346}
1347
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001348void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr)
1349{
1350 puts(line);
1351 lines_out++;
1352}
1353
Tim Duesterhusa7e84852021-10-28 17:24:02 +02001354void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field)
1355{
1356 const char *e, *f;
1357
1358 if (time_field)
1359 e = field_start(time_field, METH_FIELD - TIME_FIELD + 1);
1360 else
1361 e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1);
1362
1363 while (block-- > 0) {
1364 /* Scan until the start of a capture block ('{') until the URL ('"'). */
1365 while ((*e != '"' && *e != '{') && *e) {
1366 /* Note: some syslog servers escape quotes ! */
1367 if (*e == '\\' && e[1] == '"')
1368 break;
1369
1370 e = field_start(e, 2);
1371 }
1372
1373 if (unlikely(!*e)) {
1374 truncated_line(linenum, line);
1375 return;
1376 }
1377
1378 /* We reached the URL, no more captures will follow. */
1379 if (*e != '{') {
1380 puts("");
1381 lines_out++;
1382 return;
1383 }
1384
1385 /* e points the the opening brace of the capture block. */
1386
1387 e++;
1388 }
1389
1390 /* We are in the first field of the selected capture block. */
1391
1392 while (--field > 0) {
1393 while ((*e != '|' && *e != '}') && *e)
1394 e++;
1395
1396 if (unlikely(!*e)) {
1397 truncated_line(linenum, line);
1398 return;
1399 }
1400
1401 if (*e != '|') {
1402 puts("");
1403 lines_out++;
1404 return;
1405 }
1406
1407 /* e points to the pipe. */
1408
1409 e++;
1410 }
1411
1412 f = e;
1413
1414 while ((*f != '|' && *f != '}') && *f)
1415 f++;
1416
1417 if (unlikely(!*f)) {
1418 truncated_line(linenum, line);
1419 return;
1420 }
1421
1422 fwrite(e, f - e, 1, stdout);
1423 putchar('\n');
1424 lines_out++;
1425}
1426
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001427void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
1428{
1429 struct timer *t2;
1430 int val;
1431
1432 val = convert_date(accept_field);
1433 if (unlikely(val < 0)) {
1434 truncated_line(linenum, line);
1435 return;
1436 }
1437
1438 t2 = insert_value(&timers[0], tptr, val);
1439 t2->count++;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001440 return;
1441}
1442
1443void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr)
1444{
1445 struct timer *t2;
1446 const char *b;
1447 int val;
1448
1449 if (time_field)
1450 b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1451 else
1452 b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1453
1454 if (unlikely(!*b)) {
1455 truncated_line(linenum, line);
1456 return;
1457 }
1458
1459 val = str2ic(b);
1460
1461 t2 = insert_value(&timers[0], tptr, val);
1462 t2->count++;
1463}
1464
Willy Tarreau8a09b662012-10-10 10:26:22 +02001465void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1466{
1467 struct timer *t2;
1468 const char *b;
1469 int val;
1470
1471 if (time_field)
1472 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1473 else
1474 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1475
1476 if (unlikely(!*b)) {
1477 truncated_line(linenum, line);
1478 return;
1479 }
1480
1481 val = 256 * b[2] + b[3];
1482
1483 t2 = insert_value(&timers[0], tptr, val);
1484 t2->count++;
1485}
1486
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001487void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1488{
1489 struct timer *t2;
1490 const char *b;
1491 int val;
1492
1493 if (time_field)
1494 b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1495 else
1496 b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1497
1498 if (unlikely(!*b)) {
1499 truncated_line(linenum, line);
1500 return;
1501 }
1502
1503 val = 256 * b[0] + b[1];
1504
1505 t2 = insert_value(&timers[0], tptr, val);
1506 t2->count++;
1507}
1508
1509void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr)
1510{
1511 const char *b, *e, *p;
1512 int f, err, array[5];
1513 struct ebmb_node *srv_node;
1514 struct srv_st *srv;
1515 int val;
1516
1517 /* the server field is before the status field, so let's
1518 * parse them in the proper order.
1519 */
1520 b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1);
1521 if (unlikely(!*b)) {
1522 truncated_line(linenum, line);
1523 return;
1524 }
1525
1526 e = field_stop(b + 1); /* we have the server name in [b]..[e-1] */
1527
1528 /* the chance that a server name already exists is extremely high,
1529 * so let's perform a normal lookup first.
1530 */
1531 srv_node = ebst_lookup_len(&timers[0], b, e - b);
1532 srv = container_of(srv_node, struct srv_st, node);
1533
1534 if (!srv_node) {
1535 /* server not yet in the tree, let's create it */
1536 srv = (void *)calloc(1, sizeof(struct srv_st) + e - b + 1);
1537 srv_node = &srv->node;
1538 memcpy(&srv_node->key, b, e - b);
1539 srv_node->key[e - b] = '\0';
1540 ebst_insert(&timers[0], srv_node);
1541 }
1542
1543 /* let's collect the connect and response times */
1544 if (!time_field) {
1545 time_field = field_start(e, TIME_FIELD - SERVER_FIELD);
1546 if (unlikely(!*time_field)) {
1547 truncated_line(linenum, line);
1548 return;
1549 }
1550 }
1551
1552 e = field_stop(time_field + 1);
1553 /* we have field TIME_FIELD in [time_field]..[e-1] */
1554
1555 p = time_field;
1556 err = 0;
1557 f = 0;
1558 while (!SEP(*p)) {
1559 array[f] = str2ic(p);
1560 if (array[f] < 0) {
1561 array[f] = -1;
1562 err = 1;
1563 }
1564 if (++f == 5)
1565 break;
1566 SKIP_CHAR(p, '/');
1567 }
1568
1569 if (unlikely(f < 5)){
1570 parse_err++;
1571 return;
1572 }
1573
1574 /* OK we have our timers in array[2,3] */
1575 if (!err)
1576 srv->nb_ok++;
1577
1578 if (array[2] >= 0) {
1579 srv->cum_ct += array[2];
1580 srv->nb_ct++;
1581 }
1582
1583 if (array[3] >= 0) {
1584 srv->cum_rt += array[3];
1585 srv->nb_rt++;
1586 }
1587
1588 /* we're interested in the 5 HTTP status classes (1xx ... 5xx), and
1589 * the invalid ones which will be reported as 0.
1590 */
1591 b = field_start(e, STATUS_FIELD - TIME_FIELD);
1592 if (unlikely(!*b)) {
1593 truncated_line(linenum, line);
1594 return;
1595 }
1596
1597 val = 0;
1598 if (*b >= '1' && *b <= '5')
1599 val = *b - '0';
1600
1601 srv->st_cnt[val]++;
1602}
1603
1604void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr)
1605{
1606 struct url_stat *ustat = NULL;
1607 struct ebpt_node *ebpt_old;
1608 const char *b, *e;
1609 int f, err, array[5];
Baptiste61aaad02012-09-08 23:10:03 +02001610 int val;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001611
1612 /* let's collect the response time */
1613 if (!time_field) {
1614 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1615 if (unlikely(!*time_field)) {
1616 truncated_line(linenum, line);
1617 return;
1618 }
1619 }
1620
1621 /* we have the field TIME_FIELD starting at <time_field>. We'll
1622 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1623 */
1624 e = time_field; err = 0; f = 0;
1625 while (!SEP(*e)) {
1626 array[f] = str2ic(e);
1627 if (array[f] < 0) {
1628 array[f] = -1;
1629 err = 1;
1630 }
1631 if (++f == 5)
1632 break;
1633 SKIP_CHAR(e, '/');
1634 }
1635 if (f < 5) {
1636 parse_err++;
1637 return;
1638 }
1639
1640 /* OK we have our timers in array[3], and err is >0 if at
1641 * least one -1 was seen. <e> points to the first char of
1642 * the last timer. Let's prepare a new node with that.
1643 */
1644 if (unlikely(!ustat))
1645 ustat = calloc(1, sizeof(*ustat));
1646
1647 ustat->nb_err = err;
1648 ustat->nb_req = 1;
1649
1650 /* use array[4] = total time in case of error */
1651 ustat->total_time = (array[3] >= 0) ? array[3] : array[4];
1652 ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0;
1653
Baptiste61aaad02012-09-08 23:10:03 +02001654 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1655 val = str2ic(e);
1656 ustat->total_bytes_sent = val;
1657
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001658 /* the line may be truncated because of a bad request or anything like this,
1659 * without a method. Also, if it does not begin with an quote, let's skip to
1660 * the next field because it's a capture. Let's fall back to the "method" itself
1661 * if there's nothing else.
1662 */
Baptiste61aaad02012-09-08 23:10:03 +02001663 e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001664 while (*e != '"' && *e) {
1665 /* Note: some syslog servers escape quotes ! */
1666 if (*e == '\\' && e[1] == '"')
1667 break;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001668 e = field_start(e, 2);
Willy Tarreau61a40c72011-09-06 08:11:27 +02001669 }
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001670
1671 if (unlikely(!*e)) {
1672 truncated_line(linenum, line);
Ilya Shipitsin4473a2e2017-09-22 22:33:16 +05001673 free(ustat);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001674 return;
1675 }
1676
1677 b = field_start(e, URL_FIELD - METH_FIELD + 1); // avg 40 ns per line
1678 if (!*b)
1679 b = e;
1680
1681 /* stop at end of field or first ';' or '?', takes avg 64 ns per line */
1682 e = b;
1683 do {
Tim Duesterhusb09bdee2021-10-18 12:12:02 +02001684 if (*e == ' '||
1685 (!(filter2 & FILT2_PRESERVE_QUERY) && (*e == '?' || *e == ';'))) {
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001686 *(char *)e = 0;
1687 break;
1688 }
1689 e++;
1690 } while (*e);
1691
1692 /* now instead of copying the URL for a simple lookup, we'll link
1693 * to it from the node we're trying to insert. If it returns a
1694 * different value, it was already there. Otherwise we just have
1695 * to dynamically realloc an entry using strdup().
1696 */
1697 ustat->node.url.key = (char *)b;
1698 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1699
1700 if (ebpt_old != &ustat->node.url) {
1701 struct url_stat *ustat_old;
1702 /* node was already there, let's update previous one */
1703 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1704 ustat_old->nb_req ++;
1705 ustat_old->nb_err += ustat->nb_err;
1706 ustat_old->total_time += ustat->total_time;
1707 ustat_old->total_time_ok += ustat->total_time_ok;
Baptiste61aaad02012-09-08 23:10:03 +02001708 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001709 } else {
1710 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1711 ustat = NULL; /* node was used */
1712 }
1713}
1714
Willy Tarreau7cf479c2013-02-16 23:49:04 +01001715void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr)
1716{
1717 struct url_stat *ustat = NULL;
1718 struct ebpt_node *ebpt_old;
1719 const char *b, *e;
1720 int f, err, array[5];
1721 int val;
1722
1723 /* let's collect the response time */
1724 if (!time_field) {
1725 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line
1726 if (unlikely(!*time_field)) {
1727 truncated_line(linenum, line);
1728 return;
1729 }
1730 }
1731
1732 /* we have the field TIME_FIELD starting at <time_field>. We'll
1733 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1734 */
1735 e = time_field; err = 0; f = 0;
1736 while (!SEP(*e)) {
1737 if (f == 0 || f == 4) {
1738 array[f] = str2ic(e);
1739 if (array[f] < 0) {
1740 array[f] = -1;
1741 err = 1;
1742 }
1743 }
1744 if (++f == 5)
1745 break;
1746 SKIP_CHAR(e, '/');
1747 }
1748 if (f < 5) {
1749 parse_err++;
1750 return;
1751 }
1752
1753 /* OK we have our timers in array[0], and err is >0 if at
1754 * least one -1 was seen. <e> points to the first char of
1755 * the last timer. Let's prepare a new node with that.
1756 */
1757 if (unlikely(!ustat))
1758 ustat = calloc(1, sizeof(*ustat));
1759
1760 ustat->nb_err = err;
1761 ustat->nb_req = 1;
1762
1763 /* use array[4] = total time in case of error */
1764 ustat->total_time = (array[0] >= 0) ? array[0] : array[4];
1765 ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0;
1766
1767 e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1768 val = str2ic(e);
1769 ustat->total_bytes_sent = val;
1770
1771 /* the source might be IPv4 or IPv6, so we always strip the port by
1772 * removing the last colon.
1773 */
1774 b = source_field;
1775 e = field_stop(b + 1);
1776 while (e > b && e[-1] != ':')
1777 e--;
1778 *(char *)(e - 1) = '\0';
1779
1780 /* now instead of copying the src for a simple lookup, we'll link
1781 * to it from the node we're trying to insert. If it returns a
1782 * different value, it was already there. Otherwise we just have
1783 * to dynamically realloc an entry using strdup(). We're using the
1784 * <url> field of the node to store the source address.
1785 */
1786 ustat->node.url.key = (char *)b;
1787 ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1788
1789 if (ebpt_old != &ustat->node.url) {
1790 struct url_stat *ustat_old;
1791 /* node was already there, let's update previous one */
1792 ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1793 ustat_old->nb_req ++;
1794 ustat_old->nb_err += ustat->nb_err;
1795 ustat_old->total_time += ustat->total_time;
1796 ustat_old->total_time_ok += ustat->total_time_ok;
1797 ustat_old->total_bytes_sent += ustat->total_bytes_sent;
1798 } else {
1799 ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1800 ustat = NULL; /* node was used */
1801 }
1802}
1803
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001804void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr)
1805{
1806 struct timer *t2;
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001807 const char *p;
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001808 int f, err, array[5];
1809
1810 if (!time_field) {
1811 time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
1812 if (unlikely(!*time_field)) {
1813 truncated_line(linenum, line);
1814 return;
1815 }
1816 }
1817
Ryan O'Hara8cb99932017-12-15 10:21:39 -06001818 field_stop(time_field + 1);
Willy Tarreaua2b39fb2011-07-10 21:39:35 +02001819 /* we have field TIME_FIELD in [time_field]..[e-1] */
1820
1821 p = time_field;
1822 err = 0;
1823 f = 0;
1824 while (!SEP(*p)) {
1825 array[f] = str2ic(p);
1826 if (array[f] < 0) {
1827 array[f] = -1;
1828 err = 1;
1829 }
1830 if (++f == 5)
1831 break;
1832 SKIP_CHAR(p, '/');
1833 }
1834
1835 if (unlikely(f < 5)) {
1836 parse_err++;
1837 return;
1838 }
1839
1840 /* if we find at least one negative time, we count one error
1841 * with a time equal to the total session time. This will
1842 * emphasize quantum timing effects associated to known
1843 * timeouts. Note that on some buggy machines, it is possible
1844 * that the total time is negative, hence the reason to reset
1845 * it.
1846 */
1847
1848 if (filter & FILT_GRAPH_TIMERS) {
1849 if (err) {
1850 if (array[4] < 0)
1851 array[4] = -1;
1852 t2 = insert_timer(&timers[0], tptr, array[4]); // total time
1853 t2->count++;
1854 } else {
1855 int v;
1856
1857 t2 = insert_timer(&timers[1], tptr, array[0]); t2->count++; // req
1858 t2 = insert_timer(&timers[2], tptr, array[2]); t2->count++; // conn
1859 t2 = insert_timer(&timers[3], tptr, array[3]); t2->count++; // resp
1860
1861 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1862 if (v < 0 && !(filter & FILT_QUIET))
1863 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1864 line, array[0], array[1], array[2], array[3], array[4], v);
1865 t2 = insert_timer(&timers[4], tptr, v); t2->count++;
1866 lines_out++;
1867 }
1868 } else { /* percentile */
1869 if (err) {
1870 if (array[4] < 0)
1871 array[4] = -1;
1872 t2 = insert_value(&timers[0], tptr, array[4]); // total time
1873 t2->count++;
1874 } else {
1875 int v;
1876
1877 t2 = insert_value(&timers[1], tptr, array[0]); t2->count++; // req
1878 t2 = insert_value(&timers[2], tptr, array[2]); t2->count++; // conn
1879 t2 = insert_value(&timers[3], tptr, array[3]); t2->count++; // resp
1880
1881 v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1882 if (v < 0 && !(filter & FILT_QUIET))
1883 fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1884 line, array[0], array[1], array[2], array[3], array[4], v);
1885 t2 = insert_value(&timers[4], tptr, v); t2->count++;
1886 lines_out++;
1887 }
1888 }
1889}
1890
1891
Willy Tarreau72c28532009-01-22 18:56:50 +01001892/*
1893 * Local variables:
1894 * c-indent-level: 8
1895 * c-basic-offset: 8
1896 * End:
1897 */