Blame - include/common/ist.h - haproxy

blob: 10c0b6c083d945c295c40be31c21c80f9d4f5c9b [file] [log] [blame]

Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	1	/*
				2	* include/common/ist.h
				3	* Very simple indirect string manipulation functions.
				4	*
				5	* Copyright (C) 2014-2017 Willy Tarreau - w@1wt.eu
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining
				8	* a copy of this software and associated documentation files (the
				9	* "Software"), to deal in the Software without restriction, including
				10	* without limitation the rights to use, copy, modify, merge, publish,
				11	* distribute, sublicense, and/or sell copies of the Software, and to
				12	* permit persons to whom the Software is furnished to do so, subject to
				13	* the following conditions:
				14	*
				15	* The above copyright notice and this permission notice shall be
				16	* included in all copies or substantial portions of the Software.
				17	*
				18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				19	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
				20	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				21	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
				22	* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
				23	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				24	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
				25	* OTHER DEALINGS IN THE SOFTWARE.
				26	*/
				27
				28	#ifndef _COMMON_IST_H
				29	#define _COMMON_IST_H
				30
Christopher Faulet	2076145	2018-06-06 16:33:53 +0200	[diff] [blame]	31	#include <ctype.h>
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	32	#include <string.h>
Willy Tarreau	a7280a1	2018-11-26 19:41:40 +0100	[diff] [blame]	33	#include <unistd.h>
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	34
				35	#include <common/config.h>
				36
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	37	/* ASCII to lower case conversion table */
Willy Tarreau	0f35c59	2019-05-15 16:07:36 +0200	[diff] [blame]	38	#define _IST_LC ((const unsigned char[256]){ \
				39	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
				40	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, \
				41	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, \
				42	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, \
				43	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, \
				44	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, \
				45	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, \
				46	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, \
				47	0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, \
				48	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, \
				49	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, \
				50	0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, \
				51	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, \
				52	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, \
				53	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, \
				54	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, \
				55	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, \
				56	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, \
				57	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, \
				58	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, \
				59	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, \
				60	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, \
				61	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, \
				62	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, \
				63	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, \
				64	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, \
				65	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, \
				66	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, \
				67	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, \
				68	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, \
				69	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, \
				70	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, \
				71	})
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	72
				73	/* ASCII to upper case conversion table */
Willy Tarreau	0f35c59	2019-05-15 16:07:36 +0200	[diff] [blame]	74	#define _IST_UC ((const unsigned char[256]){ \
				75	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
				76	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, \
				77	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, \
				78	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, \
				79	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, \
				80	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, \
				81	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, \
				82	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, \
				83	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, \
				84	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, \
				85	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, \
				86	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, \
				87	0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, \
				88	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, \
				89	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, \
				90	0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, \
				91	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, \
				92	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, \
				93	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, \
				94	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, \
				95	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, \
				96	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, \
				97	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, \
				98	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, \
				99	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, \
				100	0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, \
				101	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, \
				102	0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, \
				103	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, \
				104	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, \
				105	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, \
				106	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, \
				107	})
				108
				109	#ifdef USE_OBSOLETE_LINKER
				110	/* some old linkers and some non-ELF platforms have issues with the weak
				111	* attribute so we turn these arrays to literals there.
				112	*/
				113	#define ist_lc _IST_LC
				114	#define ist_uc _IST_UC
				115	#else
				116	const unsigned char ist_lc[256] __attribute__((weak)) = _IST_LC;
				117	const unsigned char ist_uc[256] __attribute__((weak)) = _IST_UC;
				118	#endif
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	119
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	120	/* This string definition will most often be used to represent a read-only
				121	* string returned from a function, based on the starting point and its length
				122	* in bytes. No storage is provided, only a pointer and a length. The types
				123	* here are important as we only want to have 2 native machine words there so
				124	* that on modern architectures the compiler is capable of efficiently
				125	* returning a register pair without having to allocate stack room from the
				126	* caller. This is done with -freg-struct which is often enabled by default.
				127	*/
				128	struct ist {
				129	char *ptr;
				130	size_t len;
				131	};
				132
Willy Tarreau	2ba6727	2017-09-21 15:24:10 +0200	[diff] [blame]	133	/* makes a constant ist from a constant string, for use in array declarations */
				134	#define IST(str) { .ptr = str "", .len = (sizeof str "") - 1 }
				135
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	136	/* makes an ist from a regular zero terminated string. Null has length 0.
				137	* Constants are detected and replaced with constant initializers. Other values
				138	* are measured by hand without strlen() as it's much cheaper and inlinable on
				139	* small strings. The construct is complex because we must never call
				140	* __builtin_strlen() with an expression otherwise it involves a real
				141	* measurement.
				142	*/
				143	#if __GNUC__ >= 4
				144	// gcc >= 4 detects constant propagation of str through __x and resolves the
				145	// length of constant strings easily.
				146	#define ist(str) ({ \
				147	char __x = (void )(str); \
				148	(struct ist){ \
				149	.ptr = __x, \
				150	.len = __builtin_constant_p(str) ? \
				151	((void )str == (void )0) ? 0 : \
				152	__builtin_strlen(__x) : \
				153	({ \
				154	size_t __l = 0; \
				155	if (__x) for (__l--; __x[++__l]; ) ; \
				156	__l; \
				157	}) \
				158	}; \
				159	})
				160	#else
				161	// gcc < 4 can't do this, and the side effect is a warning each time a NULL is
				162	// passed to ist() due to the check on __builtin_strlen(). It doesn't have the
				163	// ability to know that this code is never called.
				164	#define ist(str) ({ \
				165	char __x = (void )(str); \
				166	(struct ist){ \
				167	.ptr = __x, \
				168	.len = __builtin_constant_p(str) ? \
				169	((void )str == (void )0) ? 0 : \
				170	__builtin_strlen(str) : \
				171	({ \
				172	size_t __l = 0; \
				173	if (__x) for (__l--; __x[++__l]; ) ; \
				174	__l; \
				175	}) \
				176	}; \
				177	})
				178	#endif
				179
				180	/* makes an ist struct from a string and a length */
				181	static inline struct ist ist2(const void *ptr, size_t len)
				182	{
				183	return (struct ist){ .ptr = (char *)ptr, .len = len };
				184	}
				185
Willy Tarreau	e67c4e5	2017-10-19 06:28:23 +0200	[diff] [blame]	186	/* This function MODIFIES the string to add a zero AFTER the end, and returns
				187	* the start pointer. The purpose is to use it on strings extracted by parsers
				188	* from larger strings cut with delimiters that are not important and can be
				189	* destroyed. It allows any such string to be used with regular string
				190	* functions. It's also convenient to use with printf() to show data extracted
				191	* from writable areas. The caller is obviously responsible for ensuring that
				192	* the string is valid and that the first byte past the end is writable. If
				193	* these conditions cannot be satisfied, use istpad() below instead.
				194	*/
				195	static inline char *ist0(struct ist ist)
				196	{
				197	ist.ptr[ist.len] = 0;
				198	return ist.ptr;
				199	}
				200
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	201	/* returns the length of the string */
				202	static inline size_t istlen(const struct ist ist)
				203	{
				204	return ist.len;
				205	}
				206
				207	/* skips to next character in the string, always stops at the end */
				208	static inline struct ist istnext(const struct ist ist)
				209	{
				210	struct ist ret = ist;
				211
				212	if (ret.len) {
				213	ret.len--;
				214	ret.ptr++;
				215	}
				216	return ret;
				217	}
				218
				219	/* copies the contents from string <ist> to buffer <buf> and adds a trailing
				220	* zero. The caller must ensure <buf> is large enough.
				221	*/
				222	static inline struct ist istpad(void *buf, const struct ist ist)
				223	{
				224	struct ist ret = { .ptr = buf, .len = ist.len };
				225
				226	for (ret.len = 0; ret.len < ist.len; ret.len++)
				227	ret.ptr[ret.len] = ist.ptr[ret.len];
				228
				229	ret.ptr[ret.len] = 0;
				230	return ret;
				231	}
				232
				233	/* trims string <ist> to no more than <size> characters. The string is
				234	* returned.
				235	*/
				236	static inline struct ist isttrim(const struct ist ist, size_t size)
				237	{
				238	struct ist ret = ist;
				239
				240	if (ret.len > size)
				241	ret.len = size;
				242	return ret;
				243	}
				244
				245	/* trims string <ist> to no more than <size>-1 characters and ensures that a
				246	* zero is placed after <ist.len> (possibly reduced by one) and before <size>,
				247	* unless <size> is already zero. The string is returned. This is mostly aimed
				248	* at building printable strings that need to be zero-terminated.
				249	*/
				250	static inline struct ist istzero(const struct ist ist, size_t size)
				251	{
				252	struct ist ret = ist;
				253
				254	if (!size)
				255	ret.len = 0;
				256	else {
				257	if (ret.len > size - 1)
				258	ret.len = size - 1;
				259	ret.ptr[ret.len] = 0;
				260	}
				261	return ret;
				262	}
				263
				264	/* returns the ordinal difference between two strings :
				265	* < 0 if ist1 < ist2
				266	* = 0 if ist1 == ist2
				267	* > 0 if ist1 > ist2
				268	*/
				269	static inline int istdiff(const struct ist ist1, const struct ist ist2)
				270	{
				271	struct ist l = ist1;
				272	struct ist r = ist2;
				273
				274	do {
				275	if (!l.len--)
				276	return -r.len;
				277	if (!r.len--)
				278	return 1;
				279	} while (l.ptr++ == r.ptr++);
				280
				281	return (unsigned char )(l.ptr - 1) - (unsigned char )(r.ptr - 1);
				282	}
				283
				284	/* returns non-zero if <ist1> starts like <ist2> (empty strings do match) */
				285	static inline int istmatch(const struct ist ist1, const struct ist ist2)
				286	{
				287	struct ist l = ist1;
				288	struct ist r = ist2;
				289
				290	if (l.len < r.len)
				291	return 0;
				292
				293	while (r.len--) {
				294	if (l.ptr++ != r.ptr++)
				295	return 0;
				296	}
				297	return 1;
				298	}
				299
				300	/* returns non-zero if <ist1> starts like <ist2> on the first <count>
				301	* characters (empty strings do match).
				302	*/
				303	static inline int istnmatch(const struct ist ist1, const struct ist ist2, size_t count)
				304	{
				305	struct ist l = ist1;
				306	struct ist r = ist2;
				307
				308	if (l.len > count)
				309	l.len = count;
				310	if (r.len > count)
				311	r.len = count;
				312	return istmatch(l, r);
				313	}
				314
				315	/* returns non-zero if <ist1> equals <ist2> (empty strings are equal) */
				316	static inline int isteq(const struct ist ist1, const struct ist ist2)
				317	{
				318	struct ist l = ist1;
				319	struct ist r = ist2;
				320
				321	if (l.len != r.len)
				322	return 0;
				323
				324	while (l.len--) {
				325	if (l.ptr++ != r.ptr++)
				326	return 0;
				327	}
				328	return 1;
				329	}
				330
Christopher Faulet	2076145	2018-06-06 16:33:53 +0200	[diff] [blame]	331	/* returns non-zero if <ist1> equals <ist2>, ignoring the case (empty strings are equal) */
				332	static inline int isteqi(const struct ist ist1, const struct ist ist2)
				333	{
				334	struct ist l = ist1;
				335	struct ist r = ist2;
				336
				337	if (l.len != r.len)
				338	return 0;
				339
				340	while (l.len--) {
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	341	if (l.ptr != r.ptr &&
				342	ist_lc[(unsigned char)l.ptr] != ist_lc[(unsigned char)r.ptr])
Christopher Faulet	2076145	2018-06-06 16:33:53 +0200	[diff] [blame]	343	return 0;
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	344
Christopher Faulet	2076145	2018-06-06 16:33:53 +0200	[diff] [blame]	345	l.ptr++;
				346	r.ptr++;
				347	}
				348	return 1;
				349	}
				350
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	351	/* returns non-zero if <ist1> equals <ist2> on the first <count> characters
				352	* (empty strings are equal).
				353	*/
				354	static inline int istneq(const struct ist ist1, const struct ist ist2, size_t count)
				355	{
				356	struct ist l = ist1;
				357	struct ist r = ist2;
				358
				359	if (l.len > count)
				360	l.len = count;
				361	if (r.len > count)
				362	r.len = count;
				363	return isteq(l, r);
				364	}
				365
				366	/* copies <src> over <dst> for a maximum of <count> bytes. Returns the number
				367	* of characters copied (src.len), or -1 if it does not fit. In all cases, the
				368	* contents are copied prior to reporting an error, so that the destination
				369	* at least contains a valid but truncated string.
				370	*/
				371	static inline ssize_t istcpy(struct ist *dst, const struct ist src, size_t count)
				372	{
				373	dst->len = 0;
				374
				375	if (count > src.len)
				376	count = src.len;
				377
				378	while (dst->len < count) {
				379	dst->ptr[dst->len] = src.ptr[dst->len];
				380	dst->len++;
				381	}
				382
				383	if (dst->len == src.len)
				384	return src.len;
				385
				386	return -1;
				387	}
				388
				389	/* copies <src> over <dst> for a maximum of <count> bytes. Returns the number
				390	* of characters copied, or -1 if it does not fit. A (possibly truncated) valid
				391	* copy of <src> is always left into <dst>, and a trailing \0 is appended as
				392	* long as <count> is not null, even if that results in reducing the string by
				393	* one character.
				394	*/
				395	static inline ssize_t istscpy(struct ist *dst, const struct ist src, size_t count)
				396	{
				397	dst->len = 0;
				398
				399	if (!count)
				400	goto fail;
				401
				402	if (count > src.len)
				403	count = src.len + 1;
				404
				405	while (dst->len < count - 1) {
				406	dst->ptr[dst->len] = src.ptr[dst->len];
				407	dst->len++;
				408	}
				409
				410	dst->ptr[dst->len] = 0;
				411	if (dst->len == src.len)
				412	return src.len;
				413	fail:
				414	return -1;
				415	}
				416
				417	/* appends <src> after <dst> for a maximum of <count> total bytes in <dst> after
				418	* the copy. <dst> is assumed to be <count> or less before the call. The new
				419	* string's length is returned, or -1 if a truncation happened. In all cases,
				420	* the contents are copied prior to reporting an error, so that the destination
				421	* at least contains a valid but truncated string.
				422	*/
				423	static inline ssize_t istcat(struct ist *dst, const struct ist src, size_t count)
				424	{
				425	const char *s = src.ptr;
				426
				427	while (dst->len < count && s != src.ptr + src.len)
				428	dst->ptr[dst->len++] = *s++;
				429
				430	if (s == src.ptr + src.len)
				431	return dst->len;
				432
				433	return -1;
				434	}
				435
				436	/* appends <src> after <dst> for a maximum of <count> total bytes in <dst> after
				437	* the copy. <dst> is assumed to be <count> or less before the call. The new
				438	* string's length is returned, or -1 if a truncation happened. In all cases,
				439	* the contents are copied prior to reporting an error, so that the destination
				440	* at least contains a valid but truncated string.
				441	*/
				442	static inline ssize_t istscat(struct ist *dst, const struct ist src, size_t count)
				443	{
				444	const char *s = src.ptr;
				445
				446	if (!count)
				447	goto fail;
				448
				449	while (dst->len < count - 1 && s != src.ptr + src.len) {
				450	dst->ptr[dst->len++] = *s++;
				451	}
				452
				453	dst->ptr[dst->len] = 0;
				454	if (s == src.ptr + src.len)
				455	return dst->len;
				456	fail:
				457	return -1;
				458	}
				459
Willy Tarreau	3f2d696	2018-12-07 08:35:07 +0100	[diff] [blame]	460	/* copies the entire <src> over <dst>, which must be allocated large enough to
				461	* hold the whole contents. No trailing zero is appended, this is mainly used
				462	* for protocol processing where the frame length has already been checked. An
				463	* ist made of the output and its length are returned. The destination is not
				464	* touched if src.len is null.
				465	*/
				466	static inline struct ist ist2bin(char *dst, const struct ist src)
				467	{
				468	size_t ofs = 0;
				469
				470	/* discourage the compiler from trying to optimize for large strings,
				471	* but tell it that most of our strings are not empty.
				472	*/
				473	if (__builtin_expect(ofs < src.len, 1)) {
				474	do {
				475	dst[ofs] = src.ptr[ofs];
				476	ofs++;
				477	} while (__builtin_expect(ofs < src.len, 0));
				478	}
				479	return ist2(dst, ofs);
				480	}
				481
				482	/* copies the entire <src> over <dst>, which must be allocated large enough to
				483	* hold the whole contents as well as a trailing zero which is always appended.
				484	* This is mainly used for protocol conversions where the frame length has
				485	* already been checked. An ist made of the output and its length (not counting
				486	* the trailing zero) are returned.
				487	*/
				488	static inline struct ist ist2str(char *dst, const struct ist src, size_t count)
				489	{
				490	size_t ofs = 0;
				491
				492	/* discourage the compiler from trying to optimize for large strings,
				493	* but tell it that most of our strings are not empty.
				494	*/
				495	if (__builtin_expect(ofs < src.len, 1)) {
				496	do {
				497	dst[ofs] = src.ptr[ofs];
				498	ofs++;
				499	} while (__builtin_expect(ofs < src.len, 0));
				500	}
				501	dst[ofs] = 0;
				502	return ist2(dst, ofs);
				503	}
				504
				505	/* makes a lower case copy of the entire <src> into <dst>, which must have been
				506	* allocated large enough to hold the whole contents. No trailing zero is
				507	* appended, this is mainly used for protocol processing where the frame length
				508	* has already been checked. An ist made of the output and its length are
				509	* returned. The destination is not touched if src.len is null.
				510	*/
				511	static inline struct ist ist2bin_lc(char *dst, const struct ist src)
				512	{
				513	size_t ofs = 0;
				514
				515	/* discourage the compiler from trying to optimize for large strings,
				516	* but tell it that most of our strings are not empty.
				517	*/
				518	if (__builtin_expect(ofs < src.len, 1)) {
				519	do {
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	520	dst[ofs] = ist_lc[(unsigned char)src.ptr[ofs]];
Willy Tarreau	3f2d696	2018-12-07 08:35:07 +0100	[diff] [blame]	521	ofs++;
				522	} while (__builtin_expect(ofs < src.len, 0));
				523	}
				524	return ist2(dst, ofs);
				525	}
				526
				527	/* makes a lower case copy of the entire <src> into <dst>, which must have been
				528	* allocated large enough to hold the whole contents as well as a trailing zero
				529	* which is always appended. This is mainly used for protocol conversions where
				530	* the frame length has already been checked. An ist made of the output and its
				531	* length (not counting the trailing zero) are returned.
				532	*/
				533	static inline struct ist ist2str_lc(char *dst, const struct ist src, size_t count)
				534	{
				535	size_t ofs = 0;
				536
				537	/* discourage the compiler from trying to optimize for large strings,
				538	* but tell it that most of our strings are not empty.
				539	*/
				540	if (__builtin_expect(ofs < src.len, 1)) {
				541	do {
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	542	dst[ofs] = ist_lc[(unsigned char)src.ptr[ofs]];
Willy Tarreau	3f2d696	2018-12-07 08:35:07 +0100	[diff] [blame]	543	ofs++;
				544	} while (__builtin_expect(ofs < src.len, 0));
				545	}
				546	dst[ofs] = 0;
				547	return ist2(dst, ofs);
				548	}
				549
				550	/* makes an upper case copy of the entire <src> into <dst>, which must have
				551	* been allocated large enough to hold the whole contents. No trailing zero is
				552	* appended, this is mainly used for protocol processing where the frame length
				553	* has already been checked. An ist made of the output and its length are
				554	* returned. The destination is not touched if src.len is null.
				555	*/
				556	static inline struct ist ist2bin_uc(char *dst, const struct ist src)
				557	{
				558	size_t ofs = 0;
				559
				560	/* discourage the compiler from trying to optimize for large strings,
				561	* but tell it that most of our strings are not empty.
				562	*/
				563	if (__builtin_expect(ofs < src.len, 1)) {
				564	do {
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	565	dst[ofs] = ist_uc[(unsigned char)src.ptr[ofs]];
Willy Tarreau	3f2d696	2018-12-07 08:35:07 +0100	[diff] [blame]	566	ofs++;
				567	} while (__builtin_expect(ofs < src.len, 0));
				568	}
				569	return ist2(dst, ofs);
				570	}
				571
				572	/* makes an upper case copy of the entire <src> into <dst>, which must have been
				573	* allocated large enough to hold the whole contents as well as a trailing zero
				574	* which is always appended. This is mainly used for protocol conversions where
				575	* the frame length has already been checked. An ist made of the output and its
				576	* length (not counting the trailing zero) are returned.
				577	*/
				578	static inline struct ist ist2str_uc(char *dst, const struct ist src, size_t count)
				579	{
				580	size_t ofs = 0;
				581
				582	/* discourage the compiler from trying to optimize for large strings,
				583	* but tell it that most of our strings are not empty.
				584	*/
				585	if (__builtin_expect(ofs < src.len, 1)) {
				586	do {
Willy Tarreau	d6735d6	2018-12-07 09:40:01 +0100	[diff] [blame]	587	dst[ofs] = ist_uc[(unsigned char)src.ptr[ofs]];
Willy Tarreau	3f2d696	2018-12-07 08:35:07 +0100	[diff] [blame]	588	ofs++;
				589	} while (__builtin_expect(ofs < src.len, 0));
				590	}
				591	dst[ofs] = 0;
				592	return ist2(dst, ofs);
				593	}
				594
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	595	/* looks for first occurrence of character <chr> in string <ist>. Returns the
				596	* pointer if found, or NULL if not found.
				597	*/
				598	static inline char *istchr(const struct ist ist, char chr)
				599	{
				600	char *s = ist.ptr;
				601
				602	do {
				603	if (s >= ist.ptr + ist.len)
				604	return NULL;
				605	} while (*s++ != chr);
				606	return s - 1;
				607	}
				608
Willy Tarreau	8f3ce06	2019-11-22 15:58:53 +0100	[diff] [blame]	609	/* Returns a pointer to the first control character found in <ist>, or NULL if
				610	* none is present. A control character is defined as a byte whose value is
				611	* between 0x00 and 0x1F included. The function is optimized for strings having
				612	* no CTL chars by processing up to sizeof(long) bytes at once on architectures
				613	* supporting efficient unaligned accesses. Despite this it is not very fast
				614	* (~0.43 byte/cycle) and should mostly be used on low match probability when
				615	* it can save a call to a much slower function.
				616	*/
				617	static inline const char *ist_find_ctl(const struct ist ist)
				618	{
				619	const union { unsigned long v; } __attribute__((packed)) *u;
				620	const char curr = (void )ist.ptr - sizeof(long);
				621	const char *last = curr + ist.len;
				622	unsigned long l1, l2;
				623
				624	do {
				625	curr += sizeof(long);
				626	if (curr > last)
				627	break;
				628	u = (void *)curr;
				629	/* subtract 0x202020...20 to the value to generate a carry in
				630	* the lower byte if the byte contains a lower value. If we
				631	* generate a bit 7 that was not there, it means the byte was
				632	* within 0x00..0x1F.
				633	*/
				634	l2 = u->v;
				635	l1 = ~l2 & ((~0UL / 255) * 0x80); /* 0x808080...80 */
				636	l2 -= (~0UL / 255) * 0x20; /* 0x202020...20 */
				637	} while ((l1 & l2) == 0);
				638
				639	last += sizeof(long);
				640	if (__builtin_expect(curr < last, 0)) {
				641	do {
				642	if ((uint8_t)*curr < 0x20)
				643	return curr;
				644	curr++;
				645	} while (curr < last);
				646	}
				647	return NULL;
				648	}
				649
Willy Tarreau	e11f727	2017-05-30 17:49:36 +0200	[diff] [blame]	650	/* looks for first occurrence of character <chr> in string <ist> and returns
				651	* the tail of the string starting with this character, or (ist.end,0) if not
				652	* found.
				653	*/
				654	static inline struct ist istfind(const struct ist ist, char chr)
				655	{
				656	struct ist ret = ist;
				657
				658	while (ret.len--) {
				659	if (*ret.ptr++ == chr)
				660	return ist2(ret.ptr - 1, ret.len + 1);
				661	}
				662	return ist2(ret.ptr, 0);
				663	}
				664
				665	/* looks for first occurrence of character different from <chr> in string <ist>
				666	* and returns the tail of the string starting at this character, or (ist_end,0)
				667	* if not found.
				668	*/
				669	static inline struct ist istskip(const struct ist ist, char chr)
				670	{
				671	struct ist ret = ist;
				672
				673	while (ret.len--) {
				674	if (*ret.ptr++ != chr)
				675	return ist2(ret.ptr - 1, ret.len + 1);
				676	}
				677	return ist2(ret.ptr, 0);
				678	}
				679
				680	/* looks for first occurrence of string <pat> in string <ist> and returns the
				681	* tail of the string starting at this position, or (NULL,0) if not found. The
				682	* empty pattern is found everywhere.
				683	*/
				684	static inline struct ist istist(const struct ist ist, const struct ist pat)
				685	{
				686	struct ist ret = ist;
				687	size_t pos;
				688
				689	if (!pat.len)
				690	return ret;
				691
				692	while (1) {
				693	loop:
				694	ret = istfind(ret, *pat.ptr);
				695	if (ret.len < pat.len)
				696	break;
				697
				698	/* ret.len >= 1, pat.len >= 1 and ret.ptr == pat.ptr */
				699
				700	ret = istnext(ret);
				701	for (pos = 0; pos < pat.len - 1; ) {
				702	++pos;
				703	if (ret.ptr[pos - 1] != pat.ptr[pos])
				704	goto loop;
				705	}
				706	return ist2(ret.ptr - 1, ret.len + 1);
				707	}
				708	return ist2(NULL, 0);
				709	}
				710
				711	#endif