blob: 5f23f223c228e968b5308b75bc2fa847dc4e87fa [file] [log] [blame]
Emeric Brun7122ab32017-07-07 10:26:46 +02001/* plock - progressive locks
2 *
3 * Copyright (C) 2012-2017 Willy Tarreau <w@1wt.eu>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#include "atomic-ops.h"
27
28/* 64 bit */
29#define PLOCK64_RL_1 0x0000000000000004ULL
30#define PLOCK64_RL_ANY 0x00000000FFFFFFFCULL
31#define PLOCK64_SL_1 0x0000000100000000ULL
32#define PLOCK64_SL_ANY 0x0000000300000000ULL
33#define PLOCK64_WL_1 0x0000000400000000ULL
34#define PLOCK64_WL_ANY 0xFFFFFFFC00000000ULL
35
36/* 32 bit */
37#define PLOCK32_RL_1 0x00000004
38#define PLOCK32_RL_ANY 0x0000FFFC
39#define PLOCK32_SL_1 0x00010000
40#define PLOCK32_SL_ANY 0x00030000
41#define PLOCK32_WL_1 0x00040000
42#define PLOCK32_WL_ANY 0xFFFC0000
43
44/* dereferences <*p> as unsigned long without causing aliasing issues */
45#define pl_deref_long(p) ({ volatile unsigned long *__plock_l = (void *)(p); *__plock_l; })
46
47/* dereferences <*p> as unsigned int without causing aliasing issues */
48#define pl_deref_int(p) ({ volatile unsigned int *__plock_i = (void *)(p); *__plock_i; })
49
50/* request shared read access (R), return non-zero on success, otherwise 0 */
51#define pl_try_r(lock) ( \
52 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
53 unsigned long ret = pl_deref_long(lock) & PLOCK64_WL_ANY; \
54 pl_barrier(); \
55 if (!__builtin_expect(ret, 0)) { \
56 ret = pl_xadd((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \
57 if (__builtin_expect(ret, 0)) \
58 pl_sub((lock), PLOCK64_RL_1); \
59 } \
60 !ret; /* return value */ \
61 }) : (sizeof(*(lock)) == 4) ? ({ \
62 unsigned int ret = pl_deref_int(lock) & PLOCK32_WL_ANY; \
63 pl_barrier(); \
64 if (!__builtin_expect(ret, 0)) { \
65 ret = pl_xadd((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \
66 if (__builtin_expect(ret, 0)) \
67 pl_sub((lock), PLOCK32_RL_1); \
68 } \
69 !ret; /* return value */ \
70 }) : ({ \
71 void __unsupported_argument_size_for_pl_try_r__(char *,int); \
72 __unsupported_argument_size_for_pl_try_r__(__FILE__,__LINE__); \
73 0; \
74 }) \
75)
76
77/* request shared read access (R) and wait for it */
78#define pl_take_r(lock) \
79 do { \
80 while (__builtin_expect(pl_try_r(lock), 1) == 0) \
81 pl_cpu_relax(); \
82 } while (0)
83
84/* release the read access (R) lock */
85#define pl_drop_r(lock) ( \
86 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
87 pl_sub(lock, PLOCK64_RL_1); \
88 }) : (sizeof(*(lock)) == 4) ? ({ \
89 pl_sub(lock, PLOCK32_RL_1); \
90 }) : ({ \
91 void __unsupported_argument_size_for_pl_drop_r__(char *,int); \
92 __unsupported_argument_size_for_pl_drop_r__(__FILE__,__LINE__); \
93 }) \
94)
95
96/* request a seek access (S), return non-zero on success, otherwise 0 */
97#define pl_try_s(lock) ( \
98 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
99 unsigned long ret = pl_deref_long(lock); \
100 pl_barrier(); \
101 if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
102 ret = pl_xadd((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \
103 (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
104 if (__builtin_expect(ret, 0)) \
105 pl_sub((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \
106 } \
107 !ret; /* return value */ \
108 }) : (sizeof(*(lock)) == 4) ? ({ \
109 unsigned int ret = pl_deref_int(lock); \
110 pl_barrier(); \
111 if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
112 ret = pl_xadd((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \
113 (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
114 if (__builtin_expect(ret, 0)) \
115 pl_sub((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \
116 } \
117 !ret; /* return value */ \
118 }) : ({ \
119 void __unsupported_argument_size_for_pl_try_s__(char *,int); \
120 __unsupported_argument_size_for_pl_try_s__(__FILE__,__LINE__); \
121 0; \
122 }) \
123)
124
125/* request a seek access (S) and wait for it */
126#define pl_take_s(lock) \
127 do { \
128 while (__builtin_expect(pl_try_s(lock), 0) == 0) \
129 pl_cpu_relax(); \
130 } while (0)
131
132/* release the seek access (S) lock */
133#define pl_drop_s(lock) ( \
134 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
135 pl_sub(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \
136 }) : (sizeof(*(lock)) == 4) ? ({ \
137 pl_sub(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \
138 }) : ({ \
139 void __unsupported_argument_size_for_pl_drop_s__(char *,int); \
140 __unsupported_argument_size_for_pl_drop_s__(__FILE__,__LINE__); \
141 }) \
142)
143
144/* drop the S lock and go back to the R lock */
145#define pl_stor(lock) ( \
146 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
147 pl_sub(lock, PLOCK64_SL_1); \
148 }) : (sizeof(*(lock)) == 4) ? ({ \
149 pl_sub(lock, PLOCK32_SL_1); \
150 }) : ({ \
151 void __unsupported_argument_size_for_pl_stor__(char *,int); \
152 __unsupported_argument_size_for_pl_stor__(__FILE__,__LINE__); \
153 }) \
154)
155
156/* take the W lock under the S lock */
157#define pl_stow(lock) ( \
158 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
159 unsigned long ret = pl_xadd((lock), PLOCK64_WL_1); \
160 pl_barrier(); \
161 while ((ret & PLOCK64_RL_ANY) != PLOCK64_RL_1) \
162 ret = pl_deref_long(lock); \
163 }) : (sizeof(*(lock)) == 4) ? ({ \
164 unsigned int ret = pl_xadd((lock), PLOCK32_WL_1); \
165 pl_barrier(); \
166 while ((ret & PLOCK32_RL_ANY) != PLOCK32_RL_1) \
167 ret = pl_deref_int(lock); \
168 }) : ({ \
169 void __unsupported_argument_size_for_pl_stow__(char *,int); \
170 __unsupported_argument_size_for_pl_stow__(__FILE__,__LINE__); \
171 }) \
172)
173
174/* drop the W lock and go back to the S lock */
175#define pl_wtos(lock) ( \
176 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
177 pl_sub(lock, PLOCK64_WL_1); \
178 }) : (sizeof(*(lock)) == 4) ? ({ \
179 pl_sub(lock, PLOCK32_WL_1); \
180 }) : ({ \
181 void __unsupported_argument_size_for_pl_wtos__(char *,int); \
182 __unsupported_argument_size_for_pl_wtos__(__FILE__,__LINE__); \
183 }) \
184)
185
186/* drop the W lock and go back to the R lock */
187#define pl_wtor(lock) ( \
188 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
189 pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \
190 }) : (sizeof(*(lock)) == 4) ? ({ \
191 pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \
192 }) : ({ \
193 void __unsupported_argument_size_for_pl_wtor__(char *,int); \
194 __unsupported_argument_size_for_pl_wtor__(__FILE__,__LINE__); \
195 }) \
196)
197
198/* request a write access (W), return non-zero on success, otherwise 0.
199 *
200 * Below there is something important : by taking both W and S, we will cause
201 * an overflow of W at 4/5 of the maximum value that can be stored into W due
202 * to the fact that S is 2 bits, so we're effectively adding 5 to the word
203 * composed by W:S. But for all words multiple of 4 bits, the maximum value is
204 * multiple of 15 thus of 5. So the largest value we can store with all bits
205 * set to one will be met by adding 5, and then adding 5 again will place value
206 * 1 in W and value 0 in S, so we never leave W with 0. Also, even upon such an
207 * overflow, there's no risk to confuse it with an atomic lock because R is not
208 * null since it will not have overflown. For 32-bit locks, this situation
209 * happens when exactly 13108 threads try to grab the lock at once, W=1, S=0
210 * and R=13108. For 64-bit locks, it happens at 858993460 concurrent writers
211 * where W=1, S=0 and R=858993460.
212 */
213#define pl_try_w(lock) ( \
214 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
215 unsigned long ret = pl_deref_long(lock); \
216 pl_barrier(); \
217 if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
218 ret = pl_xadd((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
219 if (__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
220 /* a writer, seeker or atomic is present, let's leave */ \
221 pl_sub((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
222 ret &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */ \
223 } else { \
224 /* wait for all other readers to leave */ \
225 while (ret) \
226 ret = pl_deref_long(lock) - \
227 (PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
228 ret = 0; \
229 } \
230 } \
231 !ret; /* return value */ \
232 }) : (sizeof(*(lock)) == 4) ? ({ \
233 unsigned int ret = pl_deref_int(lock); \
234 pl_barrier(); \
235 if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
236 ret = pl_xadd((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
237 if (__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
238 /* a writer, seeker or atomic is present, let's leave */ \
239 pl_sub((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
240 ret &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */ \
241 } else { \
242 /* wait for all other readers to leave */ \
243 while (ret) \
244 ret = pl_deref_int(lock) - \
245 (PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
246 ret = 0; \
247 } \
248 } \
249 !ret; /* return value */ \
250 }) : ({ \
251 void __unsupported_argument_size_for_pl_try_w__(char *,int); \
252 __unsupported_argument_size_for_pl_try_w__(__FILE__,__LINE__); \
253 0; \
254 }) \
255)
256
257/* request a seek access (W) and wait for it */
258#define pl_take_w(lock) \
259 do { \
260 while (__builtin_expect(pl_try_w(lock), 0) == 0) \
261 pl_cpu_relax(); \
262 } while (0)
263
264/* drop the write (W) lock entirely */
265#define pl_drop_w(lock) ( \
266 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
267 pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
268 }) : (sizeof(*(lock)) == 4) ? ({ \
269 pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
270 }) : ({ \
271 void __unsupported_argument_size_for_pl_drop_w__(char *,int); \
272 __unsupported_argument_size_for_pl_drop_w__(__FILE__,__LINE__); \
273 }) \
274)
275
276/* Try to upgrade from R to S, return non-zero on success, otherwise 0.
277 * This lock will fail if S or W are already held. In case of failure to grab
278 * the lock, it MUST NOT be retried without first dropping R, or it may never
279 * complete due to S waiting for R to leave before upgrading to W.
280 */
281#define pl_try_rtos(lock) ( \
282 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
283 unsigned long ret = pl_deref_long(lock); \
284 pl_barrier(); \
285 if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
286 ret = pl_xadd((lock), PLOCK64_SL_1) & \
287 (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
288 if (__builtin_expect(ret, 0)) \
289 pl_sub((lock), PLOCK64_SL_1); \
290 } \
291 !ret; /* return value */ \
292 }) : (sizeof(*(lock)) == 4) ? ({ \
293 unsigned int ret = pl_deref_int(lock); \
294 pl_barrier(); \
295 if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
296 ret = pl_xadd((lock), PLOCK32_SL_1) & \
297 (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
298 if (__builtin_expect(ret, 0)) \
299 pl_sub((lock), PLOCK32_SL_1); \
300 } \
301 !ret; /* return value */ \
302 }) : ({ \
303 void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \
304 __unsupported_argument_size_for_pl_try_rtos__(__FILE__,__LINE__); \
305 0; \
306 }) \
307)
308
309
310/* request atomic write access (A), return non-zero on success, otherwise 0.
311 * It's a bit tricky as we only use the W bits for this and want to distinguish
312 * between other atomic users and regular lock users. We have to give up if an
313 * S lock appears. It's possible that such a lock stays hidden in the W bits
314 * after an overflow, but in this case R is still held, ensuring we stay in the
315 * loop until we discover the conflict. The lock only return successfully if all
316 * readers are gone (or converted to A).
317 */
318#define pl_try_a(lock) ( \
319 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
320 unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \
321 pl_barrier(); \
322 if (!__builtin_expect(ret, 0)) { \
323 ret = pl_xadd((lock), PLOCK64_WL_1); \
324 while (1) { \
325 if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \
326 pl_sub((lock), PLOCK64_WL_1); \
327 break; /* return !ret */ \
328 } \
329 ret &= PLOCK64_RL_ANY; \
330 if (!__builtin_expect(ret, 0)) \
331 break; /* return !ret */ \
332 ret = pl_deref_long(lock); \
333 } \
334 } \
335 !ret; /* return value */ \
336 }) : (sizeof(*(lock)) == 4) ? ({ \
337 unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \
338 pl_barrier(); \
339 if (!__builtin_expect(ret, 0)) { \
340 ret = pl_xadd((lock), PLOCK32_WL_1); \
341 while (1) { \
342 if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \
343 pl_sub((lock), PLOCK32_WL_1); \
344 break; /* return !ret */ \
345 } \
346 ret &= PLOCK32_RL_ANY; \
347 if (!__builtin_expect(ret, 0)) \
348 break; /* return !ret */ \
349 ret = pl_deref_int(lock); \
350 } \
351 } \
352 !ret; /* return value */ \
353 }) : ({ \
354 void __unsupported_argument_size_for_pl_try_a__(char *,int); \
355 __unsupported_argument_size_for_pl_try_a__(__FILE__,__LINE__); \
356 0; \
357 }) \
358)
359
360/* request atomic write access (A) and wait for it */
361#define pl_take_a(lock) \
362 do { \
363 while (__builtin_expect(pl_try_a(lock), 1) == 0) \
364 pl_cpu_relax(); \
365 } while (0)
366
367/* release atomic write access (A) lock */
368#define pl_drop_a(lock) ( \
369 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
370 pl_sub(lock, PLOCK64_WL_1); \
371 }) : (sizeof(*(lock)) == 4) ? ({ \
372 pl_sub(lock, PLOCK32_WL_1); \
373 }) : ({ \
374 void __unsupported_argument_size_for_pl_drop_a__(char *,int); \
375 __unsupported_argument_size_for_pl_drop_a__(__FILE__,__LINE__); \
376 }) \
377)
378
379/* Try to upgrade from R to A, return non-zero on success, otherwise 0.
380 * This lock will fail if S is held or appears while waiting (typically due to
381 * a previous grab that was disguised as a W due to an overflow). In case of
382 * failure to grab the lock, it MUST NOT be retried without first dropping R,
383 * or it may never complete due to S waiting for R to leave before upgrading
384 * to W. The lock succeeds once there's no more R (ie all of them have either
385 * completed or were turned to A).
386 */
387#define pl_try_rtoa(lock) ( \
388 (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
389 unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \
390 pl_barrier(); \
391 if (!__builtin_expect(ret, 0)) { \
392 ret = pl_xadd((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
393 while (1) { \
394 if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \
395 pl_sub((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
396 break; /* return !ret */ \
397 } \
398 ret &= PLOCK64_RL_ANY; \
399 if (!__builtin_expect(ret, 0)) \
400 break; /* return !ret */ \
401 ret = pl_deref_long(lock); \
402 } \
403 } \
404 !ret; /* return value */ \
405 }) : (sizeof(*(lock)) == 4) ? ({ \
406 unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \
407 pl_barrier(); \
408 if (!__builtin_expect(ret, 0)) { \
409 ret = pl_xadd((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
410 while (1) { \
411 if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \
412 pl_sub((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
413 break; /* return !ret */ \
414 } \
415 ret &= PLOCK32_RL_ANY; \
416 if (!__builtin_expect(ret, 0)) \
417 break; /* return !ret */ \
418 ret = pl_deref_int(lock); \
419 } \
420 } \
421 !ret; /* return value */ \
422 }) : ({ \
423 void __unsupported_argument_size_for_pl_try_rtoa__(char *,int); \
424 __unsupported_argument_size_for_pl_try_rtoa__(__FILE__,__LINE__); \
425 0; \
426 }) \
427)