blob: a4927e2d20b6ad4c7d72f3224d9a5ab51a388b66 [file] [log] [blame]
Willy Tarreau48d84c12010-11-14 17:09:33 +01001/*
2 * Fast system call support for x86 on Linux
3 *
4 * Copyright 2010 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Recent kernels support a faster syscall ABI on x86 using the VDSO page, but
12 * some libc that are built for CPUs earlier than i686 do not implement it.
13 * This code bypasses the libc when the VDSO is detected. It should only be
14 * used when it's sure that the libc really does not support the VDSO, but
15 * fixing the libc is preferred. Using the VDSO can improve the overall
16 * performance by about 10%.
17 */
18
19#if defined(__linux__) && defined(__i386__)
20/* Silently ignore other platforms to be friendly with distro packagers */
21
22#include <dlfcn.h>
23#include <sys/mman.h>
24
25void int80(void); /* declared in the assembler code */
26static void *vsyscall = &int80; /* initialize vsyscall to use int80 by default */
27static __attribute__((used)) unsigned int back_ebx;
28
29/* now we redefine some frequently used syscalls. Epoll_create is defined too
30 * in order to replace old disabled implementations.
31 */
32asm
33(
34 "epoll_create: .GLOBL epoll_create\n"
35 " mov $0xfe, %eax\n"
36 " mov %ebx, back_ebx\n"
37 " mov 4(%esp), %ebx\n"
38 " jmp do_syscall\n"
39
40 "epoll_ctl: .GLOBL epoll_ctl\n"
41 " push %esi\n"
42 " mov $0xff, %eax\n"
43 " mov %ebx, back_ebx\n"
44 " mov 20(%esp), %esi\n"
45 " mov 16(%esp), %edx\n"
46 " mov 12(%esp), %ecx\n"
47 " mov 8(%esp), %ebx\n"
48 " call do_syscall\n"
49 " pop %esi\n"
50 " ret\n"
51
52 "epoll_wait: .GLOBL epoll_wait\n"
53 " push %esi\n"
54 " mov $0x100, %eax\n"
55 " mov %ebx, back_ebx\n"
56 " mov 20(%esp), %esi\n"
57 " mov 16(%esp), %edx\n"
58 " mov 12(%esp), %ecx\n"
59 " mov 8(%esp), %ebx\n"
60 " call do_syscall\n"
61 " pop %esi\n"
62 " ret\n"
63
64 "splice: .GLOBL splice\n"
65 " push %ebp\n"
66 " push %edi\n"
67 " push %esi\n"
68 " mov $0x139, %eax\n"
69 " mov %ebx, back_ebx\n"
70 " mov 36(%esp), %ebp\n"
71 " mov 32(%esp), %edi\n"
72 " mov 28(%esp), %esi\n"
73 " mov 24(%esp), %edx\n"
74 " mov 20(%esp), %ecx\n"
75 " mov 16(%esp), %ebx\n"
76 " call do_syscall\n"
77 " pop %esi\n"
78 " pop %edi\n"
79 " pop %ebp\n"
80 " ret\n"
81
82 "close: .GLOBL close\n"
83 " mov $0x06, %eax\n"
84 " mov %ebx, back_ebx\n"
85 " mov 4(%esp), %ebx\n"
86 " jmp do_syscall\n"
87
88 "gettimeofday: .GLOBL gettimeofday\n"
89 " mov $0x4e, %eax\n"
90 " mov %ebx, back_ebx\n"
91 " mov 8(%esp), %ecx\n"
92 " mov 4(%esp), %ebx\n"
93 " jmp do_syscall\n"
94
95 "fcntl: .GLOBL fcntl\n"
96 " mov $0xdd, %eax\n"
97 " mov %ebx, back_ebx\n"
98 " mov 12(%esp), %edx\n"
99 " mov 8(%esp), %ecx\n"
100 " mov 4(%esp), %ebx\n"
101 " jmp do_syscall\n"
102
103 "socket: .GLOBL socket\n"
104 " mov $0x01, %eax\n"
105 " jmp socketcall\n"
106
107 "bind: .GLOBL bind\n"
108 " mov $0x02, %eax\n"
109 " jmp socketcall\n"
110
111 "connect: .GLOBL connect\n"
112 " mov $0x03, %eax\n"
113 " jmp socketcall\n"
114
115 "listen: .GLOBL listen\n"
116 " mov $0x04, %eax\n"
117 " jmp socketcall\n"
118
119 "accept: .GLOBL accept\n"
120 " mov $0x05, %eax\n"
121 " jmp socketcall\n"
122
123 "getsockname: .GLOBL getsockname\n"
124 " mov $0x06, %eax\n"
125 " jmp socketcall\n"
126
127 "send: .GLOBL send\n"
128 " mov $0x09, %eax\n"
129 " jmp socketcall\n"
130
131 "recv: .GLOBL recv\n"
132 " mov $0x0a, %eax\n"
133 " jmp socketcall\n"
134
135 "shutdown: .GLOBL shutdown\n"
136 " mov $0x0d, %eax\n"
137 " jmp socketcall\n"
138
139 "setsockopt: .GLOBL setsockopt\n"
140 " mov $0x0e, %eax\n"
141 " jmp socketcall\n"
142
143 "getsockopt: .GLOBL getsockopt\n"
144 " mov $0x0f, %eax\n"
145 " jmp socketcall\n"
146
147 "socketcall:\n"
148 " mov %ebx, back_ebx\n"
149 " mov %eax, %ebx\n"
150 " mov $0x66, %eax\n"
151 " lea 4(%esp), %ecx\n"
152 /* fall through */
153
154 "do_syscall:\n"
155 " call *vsyscall\n" // always valid, may be int80 or vsyscall
156 " mov back_ebx, %ebx\n"
157 " cmpl $0xfffff000, %eax\n" // consider -4096..-1 for errno
158 " jae 0f\n"
159 " ret\n"
160 "0:\n" // error handling
161 " neg %eax\n" // get errno value
162 " push %eax\n" // save it
163 " call __errno_location\n"
164 " popl (%eax)\n" // store the pushed errno into the proper location
165 " mov $-1, %eax\n" // and return -1
166 " ret\n"
167
168 "int80:\n" // default compatible calling convention
169 " int $0x80\n"
170 " ret\n"
171);
172
173__attribute__((constructor))
174static void __i386_linux_vsyscall_init(void)
175{
176 /* We can get the pointer by resolving the __kernel_vsyscall symbol
177 * from the "linux-gate.so.1" virtual shared object, but this requires
178 * libdl. Or we can also know that the vsyscall pointer is always
179 * located at 0xFFFFE018 when /proc/sys/abi/vsyscall32 contains the
180 * default value 2. So we can use that once we've checked that we can
181 * access it without faulting. The dlsym method will also work when
182 * vsyscall32 = 1, which randomizes the VDSO address.
183 */
184#ifdef USE_VSYSCALL_DLSYM
185 void *handle = dlopen("linux-gate.so.1", RTLD_NOW);
186 if (handle) {
187 void *ptr = dlsym(handle, "__kernel_vsyscall");
188 dlclose(handle);
189 if (ptr)
190 vsyscall = ptr;
191 }
192#else
193 /* Heuristic: trying to mprotect() the VDSO area will only succeed if
194 * it is mapped.
195 */
196 if (mprotect((void *)0xffffe000, 4096, PROT_READ|PROT_EXEC) == 0) {
197 unsigned long ptr = *(unsigned long *)0xFFFFE018; /* VDSO is mapped */
198 if ((ptr & 0xFFFFE000) == 0xFFFFE000)
199 vsyscall = (void *)ptr;
200 }
201#endif
202}
203
204#endif /* defined(__linux__) && defined(__i386__) */