blob: 9d30334616cc3cc32dbda0bdb27caa329351cc3e [file] [log] [blame]
Willy Tarreau48d84c12010-11-14 17:09:33 +01001/*
2 * Fast system call support for x86 on Linux
3 *
4 * Copyright 2010 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Recent kernels support a faster syscall ABI on x86 using the VDSO page, but
12 * some libc that are built for CPUs earlier than i686 do not implement it.
13 * This code bypasses the libc when the VDSO is detected. It should only be
14 * used when it's sure that the libc really does not support the VDSO, but
15 * fixing the libc is preferred. Using the VDSO can improve the overall
16 * performance by about 10%.
17 */
18
19#if defined(__linux__) && defined(__i386__)
20/* Silently ignore other platforms to be friendly with distro packagers */
21
22#include <dlfcn.h>
23#include <sys/mman.h>
24
25void int80(void); /* declared in the assembler code */
26static void *vsyscall = &int80; /* initialize vsyscall to use int80 by default */
27static __attribute__((used)) unsigned int back_ebx;
28
29/* now we redefine some frequently used syscalls. Epoll_create is defined too
30 * in order to replace old disabled implementations.
31 */
32asm
33(
34 "epoll_create: .GLOBL epoll_create\n"
35 " mov $0xfe, %eax\n"
36 " mov %ebx, back_ebx\n"
37 " mov 4(%esp), %ebx\n"
38 " jmp do_syscall\n"
39
40 "epoll_ctl: .GLOBL epoll_ctl\n"
41 " push %esi\n"
42 " mov $0xff, %eax\n"
43 " mov %ebx, back_ebx\n"
44 " mov 20(%esp), %esi\n"
45 " mov 16(%esp), %edx\n"
46 " mov 12(%esp), %ecx\n"
47 " mov 8(%esp), %ebx\n"
48 " call do_syscall\n"
49 " pop %esi\n"
50 " ret\n"
51
52 "epoll_wait: .GLOBL epoll_wait\n"
53 " push %esi\n"
54 " mov $0x100, %eax\n"
55 " mov %ebx, back_ebx\n"
56 " mov 20(%esp), %esi\n"
57 " mov 16(%esp), %edx\n"
58 " mov 12(%esp), %ecx\n"
59 " mov 8(%esp), %ebx\n"
60 " call do_syscall\n"
61 " pop %esi\n"
62 " ret\n"
63
64 "splice: .GLOBL splice\n"
65 " push %ebp\n"
66 " push %edi\n"
67 " push %esi\n"
68 " mov $0x139, %eax\n"
69 " mov %ebx, back_ebx\n"
70 " mov 36(%esp), %ebp\n"
71 " mov 32(%esp), %edi\n"
72 " mov 28(%esp), %esi\n"
73 " mov 24(%esp), %edx\n"
74 " mov 20(%esp), %ecx\n"
75 " mov 16(%esp), %ebx\n"
76 " call do_syscall\n"
77 " pop %esi\n"
78 " pop %edi\n"
79 " pop %ebp\n"
80 " ret\n"
81
82 "close: .GLOBL close\n"
83 " mov $0x06, %eax\n"
84 " mov %ebx, back_ebx\n"
85 " mov 4(%esp), %ebx\n"
86 " jmp do_syscall\n"
87
88 "gettimeofday: .GLOBL gettimeofday\n"
89 " mov $0x4e, %eax\n"
90 " mov %ebx, back_ebx\n"
91 " mov 8(%esp), %ecx\n"
92 " mov 4(%esp), %ebx\n"
93 " jmp do_syscall\n"
94
95 "fcntl: .GLOBL fcntl\n"
96 " mov $0xdd, %eax\n"
97 " mov %ebx, back_ebx\n"
98 " mov 12(%esp), %edx\n"
99 " mov 8(%esp), %ecx\n"
100 " mov 4(%esp), %ebx\n"
101 " jmp do_syscall\n"
102
103 "socket: .GLOBL socket\n"
104 " mov $0x01, %eax\n"
105 " jmp socketcall\n"
106
107 "bind: .GLOBL bind\n"
108 " mov $0x02, %eax\n"
109 " jmp socketcall\n"
110
111 "connect: .GLOBL connect\n"
112 " mov $0x03, %eax\n"
113 " jmp socketcall\n"
114
115 "listen: .GLOBL listen\n"
116 " mov $0x04, %eax\n"
117 " jmp socketcall\n"
118
119 "accept: .GLOBL accept\n"
120 " mov $0x05, %eax\n"
121 " jmp socketcall\n"
122
Willy Tarreau1bc4aab2012-10-08 20:11:03 +0200123 "accept4: .GLOBL accept4\n"
124 " mov $0x12, %eax\n"
125 " jmp socketcall\n"
126
Willy Tarreau48d84c12010-11-14 17:09:33 +0100127 "getsockname: .GLOBL getsockname\n"
128 " mov $0x06, %eax\n"
129 " jmp socketcall\n"
130
131 "send: .GLOBL send\n"
132 " mov $0x09, %eax\n"
133 " jmp socketcall\n"
134
135 "recv: .GLOBL recv\n"
136 " mov $0x0a, %eax\n"
137 " jmp socketcall\n"
138
139 "shutdown: .GLOBL shutdown\n"
140 " mov $0x0d, %eax\n"
141 " jmp socketcall\n"
142
143 "setsockopt: .GLOBL setsockopt\n"
144 " mov $0x0e, %eax\n"
145 " jmp socketcall\n"
146
147 "getsockopt: .GLOBL getsockopt\n"
148 " mov $0x0f, %eax\n"
149 " jmp socketcall\n"
150
151 "socketcall:\n"
152 " mov %ebx, back_ebx\n"
153 " mov %eax, %ebx\n"
154 " mov $0x66, %eax\n"
155 " lea 4(%esp), %ecx\n"
156 /* fall through */
157
158 "do_syscall:\n"
159 " call *vsyscall\n" // always valid, may be int80 or vsyscall
160 " mov back_ebx, %ebx\n"
161 " cmpl $0xfffff000, %eax\n" // consider -4096..-1 for errno
162 " jae 0f\n"
163 " ret\n"
164 "0:\n" // error handling
165 " neg %eax\n" // get errno value
166 " push %eax\n" // save it
167 " call __errno_location\n"
168 " popl (%eax)\n" // store the pushed errno into the proper location
169 " mov $-1, %eax\n" // and return -1
170 " ret\n"
171
172 "int80:\n" // default compatible calling convention
173 " int $0x80\n"
174 " ret\n"
175);
176
177__attribute__((constructor))
178static void __i386_linux_vsyscall_init(void)
179{
180 /* We can get the pointer by resolving the __kernel_vsyscall symbol
181 * from the "linux-gate.so.1" virtual shared object, but this requires
182 * libdl. Or we can also know that the vsyscall pointer is always
183 * located at 0xFFFFE018 when /proc/sys/abi/vsyscall32 contains the
184 * default value 2. So we can use that once we've checked that we can
185 * access it without faulting. The dlsym method will also work when
186 * vsyscall32 = 1, which randomizes the VDSO address.
187 */
188#ifdef USE_VSYSCALL_DLSYM
189 void *handle = dlopen("linux-gate.so.1", RTLD_NOW);
190 if (handle) {
Willy Tarreau5b88da22012-06-18 20:01:30 +0200191 void *ptr;
192
193 ptr = dlsym(handle, "__kernel_vsyscall_kml");
194 if (!ptr)
195 ptr = dlsym(handle, "__kernel_vsyscall");
Willy Tarreau48d84c12010-11-14 17:09:33 +0100196 if (ptr)
197 vsyscall = ptr;
Willy Tarreau5b88da22012-06-18 20:01:30 +0200198 dlclose(handle);
Willy Tarreau48d84c12010-11-14 17:09:33 +0100199 }
200#else
201 /* Heuristic: trying to mprotect() the VDSO area will only succeed if
202 * it is mapped.
203 */
204 if (mprotect((void *)0xffffe000, 4096, PROT_READ|PROT_EXEC) == 0) {
205 unsigned long ptr = *(unsigned long *)0xFFFFE018; /* VDSO is mapped */
206 if ((ptr & 0xFFFFE000) == 0xFFFFE000)
207 vsyscall = (void *)ptr;
208 }
209#endif
210}
211
212#endif /* defined(__linux__) && defined(__i386__) */