blob: 2b6912c5092115c27cbf7fff6e14ee9aa7a77953 [file] [log] [blame]
Heinrich Schuchardte59072a2018-09-04 19:34:58 +02001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * EFI Unicode collation protocol
4 *
5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
6 */
7
Heinrich Schuchardte59072a2018-09-04 19:34:58 +02008#include <charset.h>
9#include <cp1250.h>
10#include <cp437.h>
11#include <efi_loader.h>
12
Heinrich Schuchardtb5dd2652019-06-12 21:06:28 +020013/* Characters that may not be used in FAT 8.3 file names */
14static const char illegal[] = "+,<=>:;\"/\\|?*[]\x7f";
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020015
16/*
17 * EDK2 assumes codepage 1250 when creating FAT 8.3 file names.
18 * Linux defaults to codepage 437 for FAT 8.3 file names.
19 */
20#if CONFIG_FAT_DEFAULT_CODEPAGE == 1250
21/* Unicode code points for code page 1250 characters 0x80 - 0xff */
22static const u16 codepage[] = CP1250;
23#else
24/* Unicode code points for code page 437 characters 0x80 - 0xff */
Heinrich Schuchardta9ff07b2021-02-27 14:08:35 +010025static const u16 *codepage = codepage_437;
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020026#endif
27
Heinrich Schuchardtb3258842019-05-16 07:52:58 +020028/* GUID of the EFI_UNICODE_COLLATION_PROTOCOL2 */
29const efi_guid_t efi_guid_unicode_collation_protocol2 =
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020030 EFI_UNICODE_COLLATION_PROTOCOL2_GUID;
31
32/**
33 * efi_stri_coll() - compare utf-16 strings case-insenitively
34 *
35 * @this: unicode collation protocol instance
36 * @s1: first string
37 * @s2: second string
38 *
39 * This function implements the StriColl() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +010040 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020041 *
42 * See the Unified Extensible Firmware Interface (UEFI) specification for
43 * details.
44 *
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020045 * Return: 0: s1 == s2, > 0: s1 > s2, < 0: s1 < s2
46 */
47static efi_intn_t EFIAPI efi_stri_coll(
48 struct efi_unicode_collation_protocol *this, u16 *s1, u16 *s2)
49{
50 s32 c1, c2;
51 efi_intn_t ret = 0;
52
53 EFI_ENTRY("%p, %ls, %ls", this, s1, s2);
54 for (; *s1 | *s2; ++s1, ++s2) {
55 c1 = utf_to_upper(*s1);
56 c2 = utf_to_upper(*s2);
57 if (c1 < c2) {
58 ret = -1;
59 goto out;
60 } else if (c1 > c2) {
61 ret = 1;
62 goto out;
63 }
64 }
65out:
66 EFI_EXIT(EFI_SUCCESS);
67 return ret;
68}
69
70/**
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +020071 * next_lower() - get next codepoint converted to lower case
72 *
73 * @string: pointer to u16 string, on return advanced by one codepoint
74 * Return: first codepoint of string converted to lower case
75 */
76static s32 next_lower(const u16 **string)
77{
78 return utf_to_lower(utf16_get(string));
79}
80
81/**
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020082 * metai_match() - compare utf-16 string with a pattern string case-insenitively
83 *
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +020084 * @string: string to compare
85 * @pattern: pattern string
Heinrich Schuchardte59072a2018-09-04 19:34:58 +020086 *
87 * The pattern string may use these:
88 * - * matches >= 0 characters
89 * - ? matches 1 character
90 * - [<char1><char2>...<charN>] match any character in the set
91 * - [<char1>-<char2>] matches any character in the range
92 *
93 * This function is called my efi_metai_match().
94 *
95 * For '*' pattern searches this function calls itself recursively.
96 * Performance-wise this is suboptimal, especially for multiple '*' wildcards.
97 * But it results in simple code.
98 *
99 * Return: true if the string is matched.
100 */
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200101static bool metai_match(const u16 *string, const u16 *pattern)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200102{
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200103 s32 first, s, p;
104
105 for (; *string && *pattern;) {
106 const u16 *string_old = string;
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200107
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200108 s = next_lower(&string);
109 p = next_lower(&pattern);
110
111 switch (p) {
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200112 case '*':
113 /* Match 0 or more characters */
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200114 for (;; s = next_lower(&string)) {
115 if (metai_match(string_old, pattern))
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200116 return true;
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200117 if (!s)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200118 return false;
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200119 string_old = string;
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200120 }
121 case '?':
122 /* Match any one character */
123 break;
124 case '[':
125 /* Match any character in the set */
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200126 p = next_lower(&pattern);
127 first = p;
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200128 if (first == ']')
129 /* Empty set */
130 return false;
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200131 p = next_lower(&pattern);
132 if (p == '-') {
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200133 /* Range */
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200134 p = next_lower(&pattern);
135 if (s < first || s > p)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200136 return false;
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200137 p = next_lower(&pattern);
138 if (p != ']')
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200139 return false;
140 } else {
141 /* Set */
142 bool hit = false;
143
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200144 if (s == first)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200145 hit = true;
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200146 for (; p && p != ']';
147 p = next_lower(&pattern)) {
148 if (p == s)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200149 hit = true;
150 }
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200151 if (!hit || p != ']')
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200152 return false;
153 }
154 break;
155 default:
156 /* Match one character */
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200157 if (p != s)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200158 return false;
159 }
160 }
Heinrich Schuchardte6e62e02019-06-12 19:18:24 +0200161 if (!*pattern && !*string)
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200162 return true;
163 return false;
164}
165
166/**
167 * efi_metai_match() - compare utf-16 string with a pattern string
168 * case-insenitively
169 *
170 * @this: unicode collation protocol instance
Heinrich Schuchardt41020f32020-04-10 17:39:23 +0200171 * @string: string to compare
172 * @pattern: pattern string
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200173 *
174 * The pattern string may use these:
175 * - * matches >= 0 characters
176 * - ? matches 1 character
177 * - [<char1><char2>...<charN>] match any character in the set
178 * - [<char1>-<char2>] matches any character in the range
179 *
180 * This function implements the MetaMatch() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +0100181 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200182 *
183 * Return: true if the string is matched.
184 */
185static bool EFIAPI efi_metai_match(struct efi_unicode_collation_protocol *this,
186 const u16 *string, const u16 *pattern)
187{
188 bool ret;
189
190 EFI_ENTRY("%p, %ls, %ls", this, string, pattern);
191 ret = metai_match(string, pattern);
192 EFI_EXIT(EFI_SUCCESS);
193 return ret;
194}
195
196/**
197 * efi_str_lwr() - convert to lower case
198 *
199 * @this: unicode collation protocol instance
200 * @string: string to convert
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200201 *
202 * The conversion is done in place. As long as upper and lower letters use the
203 * same number of words this does not pose a problem.
204 *
205 * This function implements the StrLwr() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +0100206 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200207 */
208static void EFIAPI efi_str_lwr(struct efi_unicode_collation_protocol *this,
209 u16 *string)
210{
211 EFI_ENTRY("%p, %ls", this, string);
212 for (; *string; ++string)
213 *string = utf_to_lower(*string);
214 EFI_EXIT(EFI_SUCCESS);
215}
216
217/**
218 * efi_str_upr() - convert to upper case
219 *
220 * @this: unicode collation protocol instance
221 * @string: string to convert
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200222 *
223 * The conversion is done in place. As long as upper and lower letters use the
224 * same number of words this does not pose a problem.
225 *
226 * This function implements the StrUpr() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +0100227 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200228 */
229static void EFIAPI efi_str_upr(struct efi_unicode_collation_protocol *this,
230 u16 *string)
231{
232 EFI_ENTRY("%p, %ls", this, string);
233 for (; *string; ++string)
234 *string = utf_to_upper(*string);
235 EFI_EXIT(EFI_SUCCESS);
236}
237
238/**
239 * efi_fat_to_str() - convert an 8.3 file name from an OEM codepage to Unicode
240 *
241 * @this: unicode collation protocol instance
242 * @fat_size: size of the string to convert
243 * @fat: string to convert
244 * @string: converted string
245 *
246 * This function implements the FatToStr() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +0100247 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200248 */
249static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this,
250 efi_uintn_t fat_size, char *fat, u16 *string)
251{
252 efi_uintn_t i;
253 u16 c;
254
255 EFI_ENTRY("%p, %zu, %s, %p", this, fat_size, fat, string);
256 for (i = 0; i < fat_size; ++i) {
257 c = (unsigned char)fat[i];
258 if (c > 0x80)
Mikhail Ilin5e19eae2022-11-22 10:33:24 +0300259 c = codepage[c - 0x80];
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200260 string[i] = c;
261 if (!c)
262 break;
263 }
264 string[i] = 0;
265 EFI_EXIT(EFI_SUCCESS);
266}
267
268/**
269 * efi_fat_to_str() - convert a utf-16 string to legal characters for a FAT
270 * file name in an OEM code page
271 *
272 * @this: unicode collation protocol instance
273 * @string: Unicode string to convert
274 * @fat_size: size of the target buffer
275 * @fat: converted string
276 *
277 * This function implements the StrToFat() service of the
Heinrich Schuchardt3bca2342021-01-16 09:58:06 +0100278 * EFI_UNICODE_COLLATION_PROTOCOL2.
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200279 *
280 * Return: true if an illegal character was substituted by '_'.
281 */
282static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this,
283 const u16 *string, efi_uintn_t fat_size,
284 char *fat)
285{
286 efi_uintn_t i;
287 s32 c;
288 bool ret = false;
289
290 EFI_ENTRY("%p, %ls, %zu, %p", this, string, fat_size, fat);
291 for (i = 0; i < fat_size;) {
292 c = utf16_get(&string);
293 switch (c) {
294 /* Ignore period and space */
295 case '.':
296 case ' ':
297 continue;
298 case 0:
299 break;
300 }
301 c = utf_to_upper(c);
Heinrich Schuchardt91fb0892021-02-27 14:08:36 +0100302 if (utf_to_cp(&c, codepage) ||
303 (c && (c < 0x20 || strchr(illegal, c)))) {
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200304 ret = true;
Heinrich Schuchardt91fb0892021-02-27 14:08:36 +0100305 c = '_';
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200306 }
307
308 fat[i] = c;
309 if (!c)
310 break;
311 ++i;
312 }
313 EFI_EXIT(EFI_SUCCESS);
314 return ret;
315}
316
Heinrich Schuchardtb3258842019-05-16 07:52:58 +0200317const struct efi_unicode_collation_protocol efi_unicode_collation_protocol2 = {
Heinrich Schuchardte59072a2018-09-04 19:34:58 +0200318 .stri_coll = efi_stri_coll,
319 .metai_match = efi_metai_match,
320 .str_lwr = efi_str_lwr,
321 .str_upr = efi_str_upr,
322 .fat_to_str = efi_fat_to_str,
323 .str_to_fat = efi_str_to_fat,
324 .supported_languages = "en",
325};