blob: 673470c8d2c852f3f3a242cb8623e15a7d8fbf0b [file] [log] [blame]
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +02001// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Unit tests for Unicode functions
4 *
5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
6 */
7
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +02008#include <charset.h>
9#include <command.h>
Heinrich Schuchardt31805542020-10-30 12:23:59 +010010#include <efi_loader.h>
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020011#include <errno.h>
Simon Glass0f2af882020-05-10 11:40:05 -060012#include <log.h>
Simon Glass9bc15642020-02-03 07:36:16 -070013#include <malloc.h>
Simon Glass38754db2024-11-02 13:37:00 -060014#include <test/lib.h>
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020015#include <test/test.h>
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020016#include <test/ut.h>
17
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020018/* Constants c1-c4 and d1-d4 encode the same letters */
19
20/* Six characters translating to one utf-8 byte each. */
21static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
22/* One character translating to two utf-8 bytes */
23static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00};
24/* Three characters translating to three utf-8 bytes each */
25static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00};
26/* Three letters translating to four utf-8 bytes each */
27static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87,
28 0x0000};
29
30/* Illegal utf-16 strings */
31static const u16 i1[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00};
32static const u16 i2[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00};
33static const u16 i3[] = {0x69, 0x33, 0xd801, 0x00};
34
35/* Six characters translating to one utf-16 word each. */
36static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
37/* Eight characters translating to one utf-16 word each */
38static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75,
39 0x72, 0x00};
40/* Three characters translating to one utf-16 word each */
41static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89,
42 0xa6, 0x00};
43/* Three letters translating to two utf-16 word each */
44static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
45 0xf0, 0x90, 0x92, 0x87, 0x00};
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +010046/* Letter not in code page 437 */
47static const char d5[] = {0xCE, 0x92, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F,
48 0x74, 0x20, 0x42, 0x00};
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020049
50/* Illegal utf-8 strings */
51static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
52static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
53static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
Heinrich Schuchardte3fa74b2021-02-27 14:08:37 +010054static const char j4[] = {0xa1, 0x00};
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +020055
Heinrich Schuchardt551983d2019-07-14 17:47:46 +020056static int unicode_test_u16_strlen(struct unit_test_state *uts)
57{
58 ut_asserteq(6, u16_strlen(c1));
59 ut_asserteq(8, u16_strlen(c2));
60 ut_asserteq(3, u16_strlen(c3));
61 ut_asserteq(6, u16_strlen(c4));
62 return 0;
63}
Simon Glass38754db2024-11-02 13:37:00 -060064LIB_TEST(unicode_test_u16_strlen, 0);
Heinrich Schuchardt551983d2019-07-14 17:47:46 +020065
Heinrich Schuchardt060edff2022-12-18 05:32:14 +000066static int unicode_test_u16_strnlen(struct unit_test_state *uts)
67{
68 ut_asserteq(0, u16_strnlen(c1, 0));
69 ut_asserteq(4, u16_strnlen(c1, 4));
70 ut_asserteq(6, u16_strnlen(c1, 6));
71 ut_asserteq(6, u16_strnlen(c1, 7));
72
73 return 0;
74}
Simon Glass38754db2024-11-02 13:37:00 -060075LIB_TEST(unicode_test_u16_strnlen, 0);
Heinrich Schuchardt060edff2022-12-18 05:32:14 +000076
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +010077static int unicode_test_u16_strdup(struct unit_test_state *uts)
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010078{
79 u16 *copy = u16_strdup(c4);
80
81 ut_assert(copy != c4);
Simon Glassa3186e62020-05-10 12:52:45 -060082 ut_asserteq_mem(copy, c4, sizeof(c4));
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010083 free(copy);
Simon Glassa3186e62020-05-10 12:52:45 -060084
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010085 return 0;
86}
Simon Glass38754db2024-11-02 13:37:00 -060087LIB_TEST(unicode_test_u16_strdup, 0);
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010088
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +010089static int unicode_test_u16_strcpy(struct unit_test_state *uts)
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010090{
91 u16 *r;
92 u16 copy[10];
93
94 r = u16_strcpy(copy, c1);
95 ut_assert(r == copy);
Simon Glassa3186e62020-05-10 12:52:45 -060096 ut_asserteq_mem(copy, c1, sizeof(c1));
97
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +010098 return 0;
99}
Simon Glass38754db2024-11-02 13:37:00 -0600100LIB_TEST(unicode_test_u16_strcpy, 0);
Heinrich Schuchardt45fceca2018-12-14 22:00:37 +0100101
Heinrich Schuchardt485599d2018-08-31 21:31:30 +0200102/* U-Boot uses UTF-16 strings in the EFI context only. */
103#if CONFIG_IS_ENABLED(EFI_LOADER) && !defined(API_BUILD)
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100104static int unicode_test_string16(struct unit_test_state *uts)
Heinrich Schuchardt485599d2018-08-31 21:31:30 +0200105{
106 char buf[20];
Heinrich Schuchardt3c864672022-01-29 18:28:08 +0100107 int ret;
Heinrich Schuchardt485599d2018-08-31 21:31:30 +0200108
109 /* Test length and precision */
110 memset(buf, 0xff, sizeof(buf));
111 sprintf(buf, "%8.6ls", c2);
112 ut_asserteq(' ', buf[1]);
113 ut_assert(!strncmp(&buf[2], d2, 7));
114 ut_assert(!buf[9]);
115
116 memset(buf, 0xff, sizeof(buf));
117 sprintf(buf, "%8.6ls", c4);
118 ut_asserteq(' ', buf[4]);
119 ut_assert(!strncmp(&buf[5], d4, 12));
120 ut_assert(!buf[17]);
121
122 memset(buf, 0xff, sizeof(buf));
123 sprintf(buf, "%-8.2ls", c4);
124 ut_asserteq(' ', buf[8]);
125 ut_assert(!strncmp(buf, d4, 8));
126 ut_assert(!buf[14]);
127
128 /* Test handling of illegal utf-16 sequences */
129 memset(buf, 0xff, sizeof(buf));
130 sprintf(buf, "%ls", i1);
131 ut_asserteq_str("i1?l", buf);
132
133 memset(buf, 0xff, sizeof(buf));
134 sprintf(buf, "%ls", i2);
135 ut_asserteq_str("i2?l", buf);
136
137 memset(buf, 0xff, sizeof(buf));
138 sprintf(buf, "%ls", i3);
139 ut_asserteq_str("i3?", buf);
140
Heinrich Schuchardt3c864672022-01-29 18:28:08 +0100141 memset(buf, 0xff, sizeof(buf));
142 ret = snprintf(buf, 4, "%ls", c1);
143 ut_asserteq(6, ret);
144 ut_asserteq_str("U-B", buf);
145
146 memset(buf, 0xff, sizeof(buf));
147 ret = snprintf(buf, 6, "%ls", c2);
148 ut_asserteq_str("kafb", buf);
149 ut_asserteq(9, ret);
150
151 memset(buf, 0xff, sizeof(buf));
152 ret = snprintf(buf, 7, "%ls", c2);
153 ut_asserteq_str("kafb\xC3\xA1", buf);
154 ut_asserteq(9, ret);
155
156 memset(buf, 0xff, sizeof(buf));
157 ret = snprintf(buf, 8, "%ls", c3);
158 ut_asserteq_str("\xE6\xBD\x9C\xE6\xB0\xB4", buf);
159 ut_asserteq(9, ret);
160
161 memset(buf, 0xff, sizeof(buf));
162 ret = snprintf(buf, 11, "%ls", c4);
163 ut_asserteq_str("\xF0\x90\x92\x8D\xF0\x90\x92\x96", buf);
164 ut_asserteq(12, ret);
165
166 memset(buf, 0xff, sizeof(buf));
167 ret = snprintf(buf, 4, "%ls", c4);
168 ut_asserteq_str("", buf);
169 ut_asserteq(12, ret);
170
Heinrich Schuchardt485599d2018-08-31 21:31:30 +0200171 return 0;
172}
Simon Glass38754db2024-11-02 13:37:00 -0600173LIB_TEST(unicode_test_string16, 0);
Heinrich Schuchardt485599d2018-08-31 21:31:30 +0200174#endif
175
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100176static int unicode_test_utf8_get(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200177{
178 const char *s;
179 s32 code;
180 int i;
181
182 /* Check characters less than 0x800 */
183 s = d2;
184 for (i = 0; i < 8; ++i) {
185 code = utf8_get((const char **)&s);
186 /* c2 is the utf-8 encoding of d2 */
187 ut_asserteq(c2[i], code);
188 if (!code)
189 break;
190 }
Marek Vasut1531c4e2023-03-10 04:33:13 +0100191 ut_asserteq_ptr(s, d2 + 9);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200192
193 /* Check characters less than 0x10000 */
194 s = d3;
195 for (i = 0; i < 4; ++i) {
196 code = utf8_get((const char **)&s);
197 /* c3 is the utf-8 encoding of d3 */
198 ut_asserteq(c3[i], code);
199 if (!code)
200 break;
201 }
Marek Vasut1531c4e2023-03-10 04:33:13 +0100202 ut_asserteq_ptr(s, d3 + 9);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200203
204 /* Check character greater 0xffff */
205 s = d4;
206 code = utf8_get((const char **)&s);
207 ut_asserteq(0x0001048d, code);
208 ut_asserteq_ptr(s, d4 + 4);
209
Heinrich Schuchardte3fa74b2021-02-27 14:08:37 +0100210 /* Check illegal character */
211 s = j4;
212 code = utf8_get((const char **)&s);
213 ut_asserteq(-1, code);
214 ut_asserteq_ptr(j4 + 1, s);
215
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200216 return 0;
217}
Simon Glass38754db2024-11-02 13:37:00 -0600218LIB_TEST(unicode_test_utf8_get, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200219
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100220static int unicode_test_utf8_put(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200221{
222 char buffer[8] = { 0, };
223 char *pos;
224
225 /* Commercial at, translates to one character */
226 pos = buffer;
Marek Vasut1531c4e2023-03-10 04:33:13 +0100227 ut_assert(!utf8_put('@', &pos));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200228 ut_asserteq(1, pos - buffer);
229 ut_asserteq('@', buffer[0]);
230 ut_assert(!buffer[1]);
231
232 /* Latin letter G with acute, translates to two charactes */
233 pos = buffer;
234 ut_assert(!utf8_put(0x1f4, &pos));
235 ut_asserteq(2, pos - buffer);
236 ut_asserteq_str("\xc7\xb4", buffer);
237
238 /* Tagalog letter i, translates to three characters */
239 pos = buffer;
240 ut_assert(!utf8_put(0x1701, &pos));
241 ut_asserteq(3, pos - buffer);
242 ut_asserteq_str("\xe1\x9c\x81", buffer);
243
244 /* Hamster face, translates to four characters */
245 pos = buffer;
246 ut_assert(!utf8_put(0x1f439, &pos));
247 ut_asserteq(4, pos - buffer);
248 ut_asserteq_str("\xf0\x9f\x90\xb9", buffer);
249
250 /* Illegal code */
251 pos = buffer;
252 ut_asserteq(-1, utf8_put(0xd888, &pos));
253
254 return 0;
255}
Simon Glass38754db2024-11-02 13:37:00 -0600256LIB_TEST(unicode_test_utf8_put, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200257
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100258static int unicode_test_utf8_utf16_strlen(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200259{
260 ut_asserteq(6, utf8_utf16_strlen(d1));
261 ut_asserteq(8, utf8_utf16_strlen(d2));
262 ut_asserteq(3, utf8_utf16_strlen(d3));
263 ut_asserteq(6, utf8_utf16_strlen(d4));
264
265 /* illegal utf-8 sequences */
266 ut_asserteq(4, utf8_utf16_strlen(j1));
Heinrich Schuchardtfc5f1a12018-09-12 00:05:32 +0200267 ut_asserteq(4, utf8_utf16_strlen(j2));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200268 ut_asserteq(3, utf8_utf16_strlen(j3));
269
270 return 0;
271}
Simon Glass38754db2024-11-02 13:37:00 -0600272LIB_TEST(unicode_test_utf8_utf16_strlen, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200273
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100274static int unicode_test_utf8_utf16_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200275{
276 ut_asserteq(3, utf8_utf16_strnlen(d1, 3));
277 ut_asserteq(6, utf8_utf16_strnlen(d1, 13));
278 ut_asserteq(6, utf8_utf16_strnlen(d2, 6));
279 ut_asserteq(2, utf8_utf16_strnlen(d3, 2));
280 ut_asserteq(4, utf8_utf16_strnlen(d4, 2));
281 ut_asserteq(6, utf8_utf16_strnlen(d4, 3));
282
283 /* illegal utf-8 sequences */
284 ut_asserteq(4, utf8_utf16_strnlen(j1, 16));
Heinrich Schuchardtfc5f1a12018-09-12 00:05:32 +0200285 ut_asserteq(4, utf8_utf16_strnlen(j2, 16));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200286 ut_asserteq(3, utf8_utf16_strnlen(j3, 16));
287
288 return 0;
289}
Simon Glass38754db2024-11-02 13:37:00 -0600290LIB_TEST(unicode_test_utf8_utf16_strnlen, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200291
292/**
293 * ut_u16_strcmp() - Compare to u16 strings.
294 *
295 * @a1: first string
296 * @a2: second string
297 * @count: number of u16 to compare
298 * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2
299 */
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100300static int unicode_test_u16_strcmp(const u16 *a1, const u16 *a2, size_t count)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200301{
302 for (; (*a1 || *a2) && count; ++a1, ++a2, --count) {
303 if (*a1 < *a2)
304 return -1;
305 if (*a1 > *a2)
306 return 1;
307 }
308 return 0;
309}
310
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100311static int unicode_test_utf8_utf16_strcpy(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200312{
313 u16 buf[16];
314 u16 *pos;
315
316 pos = buf;
317 utf8_utf16_strcpy(&pos, d1);
318 ut_asserteq(6, pos - buf);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100319 ut_assert(!unicode_test_u16_strcmp(buf, c1, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200320
321 pos = buf;
322 utf8_utf16_strcpy(&pos, d2);
323 ut_asserteq(8, pos - buf);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100324 ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200325
326 pos = buf;
327 utf8_utf16_strcpy(&pos, d3);
328 ut_asserteq(3, pos - buf);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100329 ut_assert(!unicode_test_u16_strcmp(buf, c3, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200330
331 pos = buf;
332 utf8_utf16_strcpy(&pos, d4);
333 ut_asserteq(6, pos - buf);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100334 ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200335
336 /* Illegal utf-8 strings */
337 pos = buf;
338 utf8_utf16_strcpy(&pos, j1);
339 ut_asserteq(4, pos - buf);
Simon Glass276af332022-01-23 12:55:14 -0700340 ut_assert(!unicode_test_u16_strcmp(buf, u"j1?l", SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200341
342 pos = buf;
343 utf8_utf16_strcpy(&pos, j2);
Heinrich Schuchardtfc5f1a12018-09-12 00:05:32 +0200344 ut_asserteq(4, pos - buf);
Simon Glass276af332022-01-23 12:55:14 -0700345 ut_assert(!unicode_test_u16_strcmp(buf, u"j2?l", SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200346
347 pos = buf;
348 utf8_utf16_strcpy(&pos, j3);
349 ut_asserteq(3, pos - buf);
Simon Glass276af332022-01-23 12:55:14 -0700350 ut_assert(!unicode_test_u16_strcmp(buf, u"j3?", SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200351
352 return 0;
353}
Simon Glass38754db2024-11-02 13:37:00 -0600354LIB_TEST(unicode_test_utf8_utf16_strcpy, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200355
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100356static int unicode_test_utf8_utf16_strncpy(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200357{
358 u16 buf[16];
359 u16 *pos;
360
361 pos = buf;
362 memset(buf, 0, sizeof(buf));
363 utf8_utf16_strncpy(&pos, d1, 4);
364 ut_asserteq(4, pos - buf);
365 ut_assert(!buf[4]);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100366 ut_assert(!unicode_test_u16_strcmp(buf, c1, 4));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200367
368 pos = buf;
369 memset(buf, 0, sizeof(buf));
370 utf8_utf16_strncpy(&pos, d2, 10);
371 ut_asserteq(8, pos - buf);
372 ut_assert(buf[4]);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100373 ut_assert(!unicode_test_u16_strcmp(buf, c2, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200374
375 pos = buf;
376 memset(buf, 0, sizeof(buf));
377 utf8_utf16_strncpy(&pos, d3, 2);
378 ut_asserteq(2, pos - buf);
379 ut_assert(!buf[2]);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100380 ut_assert(!unicode_test_u16_strcmp(buf, c3, 2));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200381
382 pos = buf;
383 memset(buf, 0, sizeof(buf));
384 utf8_utf16_strncpy(&pos, d4, 2);
385 ut_asserteq(4, pos - buf);
386 ut_assert(!buf[4]);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100387 ut_assert(!unicode_test_u16_strcmp(buf, c4, 4));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200388
389 pos = buf;
390 memset(buf, 0, sizeof(buf));
391 utf8_utf16_strncpy(&pos, d4, 10);
392 ut_asserteq(6, pos - buf);
393 ut_assert(buf[5]);
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100394 ut_assert(!unicode_test_u16_strcmp(buf, c4, SIZE_MAX));
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200395
396 return 0;
397}
Simon Glass38754db2024-11-02 13:37:00 -0600398LIB_TEST(unicode_test_utf8_utf16_strncpy, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200399
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100400static int unicode_test_utf16_get(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200401{
402 const u16 *s;
403 s32 code;
404 int i;
405
406 /* Check characters less than 0x10000 */
407 s = c2;
408 for (i = 0; i < 9; ++i) {
409 code = utf16_get((const u16 **)&s);
410 ut_asserteq(c2[i], code);
411 if (!code)
412 break;
413 }
414 ut_asserteq_ptr(c2 + 8, s);
415
416 /* Check character greater 0xffff */
417 s = c4;
418 code = utf16_get((const u16 **)&s);
419 ut_asserteq(0x0001048d, code);
420 ut_asserteq_ptr(c4 + 2, s);
421
422 return 0;
423}
Simon Glass38754db2024-11-02 13:37:00 -0600424LIB_TEST(unicode_test_utf16_get, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200425
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100426static int unicode_test_utf16_put(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200427{
428 u16 buffer[4] = { 0, };
429 u16 *pos;
430
431 /* Commercial at, translates to one word */
432 pos = buffer;
433 ut_assert(!utf16_put('@', &pos));
434 ut_asserteq(1, pos - buffer);
435 ut_asserteq((u16)'@', buffer[0]);
436 ut_assert(!buffer[1]);
437
438 /* Hamster face, translates to two words */
439 pos = buffer;
440 ut_assert(!utf16_put(0x1f439, &pos));
441 ut_asserteq(2, pos - buffer);
442 ut_asserteq((u16)0xd83d, buffer[0]);
443 ut_asserteq((u16)0xdc39, buffer[1]);
444 ut_assert(!buffer[2]);
445
446 /* Illegal code */
447 pos = buffer;
448 ut_asserteq(-1, utf16_put(0xd888, &pos));
449
450 return 0;
451}
Simon Glass38754db2024-11-02 13:37:00 -0600452LIB_TEST(unicode_test_utf16_put, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200453
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100454static int unicode_test_utf16_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200455{
456 ut_asserteq(3, utf16_strnlen(c1, 3));
457 ut_asserteq(6, utf16_strnlen(c1, 13));
458 ut_asserteq(6, utf16_strnlen(c2, 6));
459 ut_asserteq(2, utf16_strnlen(c3, 2));
460 ut_asserteq(2, utf16_strnlen(c4, 2));
461 ut_asserteq(3, utf16_strnlen(c4, 3));
462
463 /* illegal utf-16 word sequences */
464 ut_asserteq(4, utf16_strnlen(i1, 16));
465 ut_asserteq(4, utf16_strnlen(i2, 16));
466 ut_asserteq(3, utf16_strnlen(i3, 16));
467
468 return 0;
469}
Simon Glass38754db2024-11-02 13:37:00 -0600470LIB_TEST(unicode_test_utf16_strnlen, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200471
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100472static int unicode_test_utf16_utf8_strlen(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200473{
474 ut_asserteq(6, utf16_utf8_strlen(c1));
475 ut_asserteq(9, utf16_utf8_strlen(c2));
476 ut_asserteq(9, utf16_utf8_strlen(c3));
477 ut_asserteq(12, utf16_utf8_strlen(c4));
478
479 /* illegal utf-16 word sequences */
480 ut_asserteq(4, utf16_utf8_strlen(i1));
481 ut_asserteq(4, utf16_utf8_strlen(i2));
482 ut_asserteq(3, utf16_utf8_strlen(i3));
483
484 return 0;
485}
Simon Glass38754db2024-11-02 13:37:00 -0600486LIB_TEST(unicode_test_utf16_utf8_strlen, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200487
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100488static int unicode_test_utf16_utf8_strnlen(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200489{
490 ut_asserteq(3, utf16_utf8_strnlen(c1, 3));
491 ut_asserteq(6, utf16_utf8_strnlen(c1, 13));
492 ut_asserteq(7, utf16_utf8_strnlen(c2, 6));
493 ut_asserteq(6, utf16_utf8_strnlen(c3, 2));
494 ut_asserteq(8, utf16_utf8_strnlen(c4, 2));
495 ut_asserteq(12, utf16_utf8_strnlen(c4, 3));
496 return 0;
497}
Simon Glass38754db2024-11-02 13:37:00 -0600498LIB_TEST(unicode_test_utf16_utf8_strnlen, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200499
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100500static int unicode_test_utf16_utf8_strcpy(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200501{
502 char buf[16];
503 char *pos;
504
505 pos = buf;
506 utf16_utf8_strcpy(&pos, c1);
507 ut_asserteq(6, pos - buf);
508 ut_asserteq_str(d1, buf);
509
510 pos = buf;
511 utf16_utf8_strcpy(&pos, c2);
512 ut_asserteq(9, pos - buf);
513 ut_asserteq_str(d2, buf);
514
515 pos = buf;
516 utf16_utf8_strcpy(&pos, c3);
517 ut_asserteq(9, pos - buf);
518 ut_asserteq_str(d3, buf);
519
520 pos = buf;
521 utf16_utf8_strcpy(&pos, c4);
522 ut_asserteq(12, pos - buf);
523 ut_asserteq_str(d4, buf);
524
525 /* Illegal utf-16 strings */
526 pos = buf;
527 utf16_utf8_strcpy(&pos, i1);
528 ut_asserteq(4, pos - buf);
529 ut_asserteq_str("i1?l", buf);
530
531 pos = buf;
532 utf16_utf8_strcpy(&pos, i2);
533 ut_asserteq(4, pos - buf);
534 ut_asserteq_str("i2?l", buf);
535
536 pos = buf;
537 utf16_utf8_strcpy(&pos, i3);
538 ut_asserteq(3, pos - buf);
539 ut_asserteq_str("i3?", buf);
540
541 return 0;
542}
Simon Glass38754db2024-11-02 13:37:00 -0600543LIB_TEST(unicode_test_utf16_utf8_strcpy, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200544
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100545static int unicode_test_utf16_utf8_strncpy(struct unit_test_state *uts)
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200546{
547 char buf[16];
548 char *pos;
549
550 pos = buf;
551 memset(buf, 0, sizeof(buf));
552 utf16_utf8_strncpy(&pos, c1, 4);
553 ut_asserteq(4, pos - buf);
554 ut_assert(!buf[4]);
555 ut_assert(!strncmp(buf, d1, 4));
556
557 pos = buf;
558 memset(buf, 0, sizeof(buf));
559 utf16_utf8_strncpy(&pos, c2, 10);
560 ut_asserteq(9, pos - buf);
561 ut_assert(buf[4]);
562 ut_assert(!strncmp(buf, d2, SIZE_MAX));
563
564 pos = buf;
565 memset(buf, 0, sizeof(buf));
566 utf16_utf8_strncpy(&pos, c3, 2);
567 ut_asserteq(6, pos - buf);
568 ut_assert(!buf[6]);
569 ut_assert(!strncmp(buf, d3, 6));
570
571 pos = buf;
572 memset(buf, 0, sizeof(buf));
573 utf16_utf8_strncpy(&pos, c4, 2);
574 ut_asserteq(8, pos - buf);
575 ut_assert(!buf[8]);
576 ut_assert(!strncmp(buf, d4, 8));
577
578 pos = buf;
579 memset(buf, 0, sizeof(buf));
580 utf16_utf8_strncpy(&pos, c4, 10);
581 ut_asserteq(12, pos - buf);
582 ut_assert(buf[5]);
583 ut_assert(!strncmp(buf, d4, SIZE_MAX));
584
585 return 0;
586}
Simon Glass38754db2024-11-02 13:37:00 -0600587LIB_TEST(unicode_test_utf16_utf8_strncpy, 0);
Heinrich Schuchardtb8b6c812018-08-31 21:31:28 +0200588
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100589static int unicode_test_utf_to_lower(struct unit_test_state *uts)
Heinrich Schuchardt15552f72018-09-04 19:34:57 +0200590{
591 ut_asserteq('@', utf_to_lower('@'));
592 ut_asserteq('a', utf_to_lower('A'));
593 ut_asserteq('z', utf_to_lower('Z'));
594 ut_asserteq('[', utf_to_lower('['));
595 ut_asserteq('m', utf_to_lower('m'));
596 /* Latin letter O with diaresis (umlaut) */
597 ut_asserteq(0x00f6, utf_to_lower(0x00d6));
598#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
599 /* Cyrillic letter I*/
600 ut_asserteq(0x0438, utf_to_lower(0x0418));
601#endif
602 return 0;
603}
Simon Glass38754db2024-11-02 13:37:00 -0600604LIB_TEST(unicode_test_utf_to_lower, 0);
Heinrich Schuchardt15552f72018-09-04 19:34:57 +0200605
Heinrich Schuchardt46da93c2019-02-15 23:12:50 +0100606static int unicode_test_utf_to_upper(struct unit_test_state *uts)
Heinrich Schuchardt15552f72018-09-04 19:34:57 +0200607{
608 ut_asserteq('`', utf_to_upper('`'));
609 ut_asserteq('A', utf_to_upper('a'));
610 ut_asserteq('Z', utf_to_upper('z'));
611 ut_asserteq('{', utf_to_upper('{'));
612 ut_asserteq('M', utf_to_upper('M'));
613 /* Latin letter O with diaresis (umlaut) */
614 ut_asserteq(0x00d6, utf_to_upper(0x00f6));
615#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
616 /* Cyrillic letter I */
617 ut_asserteq(0x0418, utf_to_upper(0x0438));
618#endif
619 return 0;
620}
Simon Glass38754db2024-11-02 13:37:00 -0600621LIB_TEST(unicode_test_utf_to_upper, 0);
Heinrich Schuchardt15552f72018-09-04 19:34:57 +0200622
Heinrich Schuchardt0b1d5212022-12-29 14:44:04 +0100623static int unicode_test_u16_strcasecmp(struct unit_test_state *uts)
624{
625 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
626 ut_assert(u16_strcasecmp(u"aBcd", u"abcd") == 0);
627 ut_assert(u16_strcasecmp(u"abcd", u"abCd") == 0);
628 ut_assert(u16_strcasecmp(u"abcdE", u"abcd") > 0);
629 ut_assert(u16_strcasecmp(u"abcd", u"abcdE") < 0);
630 ut_assert(u16_strcasecmp(u"abcE", u"abcd") > 0);
631 ut_assert(u16_strcasecmp(u"abcd", u"abcE") < 0);
632 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
633 ut_assert(u16_strcasecmp(u"abcd", u"abcd") == 0);
634 if (CONFIG_IS_ENABLED(EFI_UNICODE_CAPITALIZATION)) {
635 /* Cyrillic letters */
636 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0430",
637 u"\x041a\x041d\x0418\x0413\x0410") == 0);
638 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0430",
639 u"\x041a\x041d\x0418\x0413\x0411") < 0);
640 ut_assert(u16_strcasecmp(u"\x043a\x043d\x0438\x0433\x0431",
641 u"\x041a\x041d\x0418\x0413\x0410") > 0);
642 }
643
644 return 0;
645}
Simon Glass38754db2024-11-02 13:37:00 -0600646LIB_TEST(unicode_test_u16_strcasecmp, 0);
Heinrich Schuchardt0b1d5212022-12-29 14:44:04 +0100647
AKASHI Takahiro0d1705e2019-09-18 10:26:30 +0900648static int unicode_test_u16_strncmp(struct unit_test_state *uts)
649{
Simon Glass276af332022-01-23 12:55:14 -0700650 ut_assert(u16_strncmp(u"abc", u"abc", 3) == 0);
651 ut_assert(u16_strncmp(u"abcdef", u"abcghi", 3) == 0);
652 ut_assert(u16_strncmp(u"abcdef", u"abcghi", 6) < 0);
653 ut_assert(u16_strncmp(u"abcghi", u"abcdef", 6) > 0);
654 ut_assert(u16_strcmp(u"abc", u"abc") == 0);
655 ut_assert(u16_strcmp(u"abcdef", u"deghi") < 0);
656 ut_assert(u16_strcmp(u"deghi", u"abcdef") > 0);
AKASHI Takahiro0d1705e2019-09-18 10:26:30 +0900657 return 0;
658}
Simon Glass38754db2024-11-02 13:37:00 -0600659LIB_TEST(unicode_test_u16_strncmp, 0);
AKASHI Takahiro0d1705e2019-09-18 10:26:30 +0900660
Heinrich Schuchardt3bb3d862020-05-09 09:16:49 +0200661static int unicode_test_u16_strsize(struct unit_test_state *uts)
662{
663 ut_asserteq_64(u16_strsize(c1), 14);
664 ut_asserteq_64(u16_strsize(c2), 18);
665 ut_asserteq_64(u16_strsize(c3), 8);
666 ut_asserteq_64(u16_strsize(c4), 14);
667 return 0;
668}
Simon Glass38754db2024-11-02 13:37:00 -0600669LIB_TEST(unicode_test_u16_strsize, 0);
Heinrich Schuchardt3bb3d862020-05-09 09:16:49 +0200670
Heinrich Schuchardt91fb0892021-02-27 14:08:36 +0100671static int unicode_test_utf_to_cp(struct unit_test_state *uts)
672{
673 int ret;
674 s32 c;
675
676 c = '\n';
677 ret = utf_to_cp(&c, codepage_437);
678 ut_asserteq(0, ret);
679 ut_asserteq('\n', c);
680
681 c = 'a';
682 ret = utf_to_cp(&c, codepage_437);
683 ut_asserteq(0, ret);
684 ut_asserteq('a', c);
685
686 c = 0x03c4; /* Greek small letter tau */
687 ret = utf_to_cp(&c, codepage_437);
688 ut_asserteq(0, ret);
689 ut_asserteq(0xe7, c);
690
691 c = 0x03a4; /* Greek capital letter tau */
692 ret = utf_to_cp(&c, codepage_437);
693 ut_asserteq(-ENOENT, ret);
694 ut_asserteq('?', c);
695
696 return 0;
697}
Simon Glass38754db2024-11-02 13:37:00 -0600698LIB_TEST(unicode_test_utf_to_cp, 0);
Heinrich Schuchardt91fb0892021-02-27 14:08:36 +0100699
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100700static void utf8_to_cp437_stream_helper(const char *in, char *out)
701{
702 char buffer[5];
703 int ret;
704
705 *buffer = 0;
706 for (; *in; ++in) {
707 ret = utf8_to_cp437_stream(*in, buffer);
708 if (ret)
709 *out++ = ret;
710 }
711 *out = 0;
712}
713
714static int unicode_test_utf8_to_cp437_stream(struct unit_test_state *uts)
715{
716 char buf[16];
717
718 utf8_to_cp437_stream_helper(d1, buf);
719 ut_asserteq_str("U-Boot", buf);
720 utf8_to_cp437_stream_helper(d2, buf);
721 ut_asserteq_str("kafb\xa0tur", buf);
722 utf8_to_cp437_stream_helper(d5, buf);
723 ut_asserteq_str("? is not B", buf);
724 utf8_to_cp437_stream_helper(j2, buf);
725 ut_asserteq_str("j2l", buf);
726
727 return 0;
728}
Simon Glass38754db2024-11-02 13:37:00 -0600729LIB_TEST(unicode_test_utf8_to_cp437_stream, 0);
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100730
731static void utf8_to_utf32_stream_helper(const char *in, s32 *out)
732{
733 char buffer[5];
734 int ret;
735
736 *buffer = 0;
737 for (; *in; ++in) {
738 ret = utf8_to_utf32_stream(*in, buffer);
739 if (ret)
740 *out++ = ret;
741 }
742 *out = 0;
743}
744
745static int unicode_test_utf8_to_utf32_stream(struct unit_test_state *uts)
746{
747 s32 buf[16];
748
749 const u32 u1[] = {0x55, 0x2D, 0x42, 0x6F, 0x6F, 0x74, 0x0000};
750 const u32 u2[] = {0x6B, 0x61, 0x66, 0x62, 0xE1, 0x74, 0x75, 0x72, 0x00};
Heinrich Schuchardtee737c82024-01-18 18:57:12 +0100751 const u32 u3[] = {0x6f5c, 0x6c34, 0x8266};
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100752 const u32 u4[] = {0x6A, 0x32, 0x6C, 0x00};
Heinrich Schuchardtee737c82024-01-18 18:57:12 +0100753 const u32 u5[] = {0x0392, 0x20, 0x69, 0x73, 0x20, 0x6E, 0x6F, 0x74,
754 0x20, 0x42, 0x00};
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100755
756 memset(buf, 0, sizeof(buf));
757 utf8_to_utf32_stream_helper(d1, buf);
758 ut_asserteq_mem(u1, buf, sizeof(u1));
759
760 memset(buf, 0, sizeof(buf));
761 utf8_to_utf32_stream_helper(d2, buf);
762 ut_asserteq_mem(u2, buf, sizeof(u2));
763
764 memset(buf, 0, sizeof(buf));
Heinrich Schuchardtee737c82024-01-18 18:57:12 +0100765 utf8_to_utf32_stream_helper(d3, buf);
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100766 ut_asserteq_mem(u3, buf, sizeof(u3));
767
768 memset(buf, 0, sizeof(buf));
Heinrich Schuchardtee737c82024-01-18 18:57:12 +0100769 utf8_to_utf32_stream_helper(d5, buf);
770 ut_asserteq_mem(u5, buf, sizeof(u5));
771
772 memset(buf, 0, sizeof(buf));
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100773 utf8_to_utf32_stream_helper(j2, buf);
774 ut_asserteq_mem(u4, buf, sizeof(u4));
775
776 return 0;
777}
Simon Glass38754db2024-11-02 13:37:00 -0600778LIB_TEST(unicode_test_utf8_to_utf32_stream, 0);
Heinrich Schuchardt6aa77ac2021-02-27 14:08:38 +0100779
Heinrich Schuchardt31805542020-10-30 12:23:59 +0100780#ifdef CONFIG_EFI_LOADER
781static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
782{
783 u16 buf[16];
Simon Glass276af332022-01-23 12:55:14 -0700784 u16 const expected[] = u"Capsule0AF9";
Heinrich Schuchardt31805542020-10-30 12:23:59 +0100785 u16 *pos;
786
787 memset(buf, 0xeb, sizeof(buf));
Ilias Apalodimas21575292020-12-31 12:26:46 +0200788 pos = efi_create_indexed_name(buf, sizeof(buf), "Capsule", 0x0af9);
Heinrich Schuchardt31805542020-10-30 12:23:59 +0100789
790 ut_asserteq_mem(expected, buf, sizeof(expected));
791 ut_asserteq(pos - buf, u16_strnlen(buf, SIZE_MAX));
792
793 return 0;
794}
Simon Glass38754db2024-11-02 13:37:00 -0600795LIB_TEST(unicode_test_efi_create_indexed_name, 0);
Heinrich Schuchardt31805542020-10-30 12:23:59 +0100796#endif
797
Masahisa Kojimace5ccc42022-04-28 17:09:35 +0900798static int unicode_test_u16_strlcat(struct unit_test_state *uts)
799{
800 u16 buf[40];
801 u16 dest[] = {0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0};
802 u16 src[] = {0x03B1, 0x2172, 0x6F5C, 0x8247, 0};
803 u16 concat_str[] = {0x3053, 0x3093, 0x306b, 0x3061, 0x306f,
804 0x03B1, 0x2172, 0x6F5C, 0x8247, 0};
805 u16 null_src = u'\0';
806 size_t ret, expected;
807 int i;
808
809 /* dest and src are empty string */
810 memset(buf, 0, sizeof(buf));
Dan Carpenter5f5f89a2023-07-27 10:12:58 +0300811 ret = u16_strlcat(buf, &null_src, ARRAY_SIZE(buf));
Matthias Schiffer0b18c3d2023-07-14 13:24:51 +0200812 ut_asserteq(0, ret);
Masahisa Kojimace5ccc42022-04-28 17:09:35 +0900813
814 /* dest is empty string */
815 memset(buf, 0, sizeof(buf));
Dan Carpenter5f5f89a2023-07-27 10:12:58 +0300816 ret = u16_strlcat(buf, src, ARRAY_SIZE(buf));
Matthias Schiffer0b18c3d2023-07-14 13:24:51 +0200817 ut_asserteq(4, ret);
Masahisa Kojimace5ccc42022-04-28 17:09:35 +0900818 ut_assert(!unicode_test_u16_strcmp(buf, src, 40));
819
820 /* src is empty string */
821 memset(buf, 0xCD, (sizeof(buf) - sizeof(u16)));
822 buf[39] = 0;
823 memcpy(buf, dest, sizeof(dest));
Dan Carpenter5f5f89a2023-07-27 10:12:58 +0300824 ret = u16_strlcat(buf, &null_src, ARRAY_SIZE(buf));
Matthias Schiffer0b18c3d2023-07-14 13:24:51 +0200825 ut_asserteq(5, ret);
Masahisa Kojimace5ccc42022-04-28 17:09:35 +0900826 ut_assert(!unicode_test_u16_strcmp(buf, dest, 40));
827
828 for (i = 0; i <= 40; i++) {
829 memset(buf, 0xCD, (sizeof(buf) - sizeof(u16)));
830 buf[39] = 0;
831 memcpy(buf, dest, sizeof(dest));
Matthias Schiffer0b18c3d2023-07-14 13:24:51 +0200832 expected = min(5, i) + 4;
Masahisa Kojimace5ccc42022-04-28 17:09:35 +0900833 ret = u16_strlcat(buf, src, i);
834 ut_asserteq(expected, ret);
835 if (i <= 6) {
836 ut_assert(!unicode_test_u16_strcmp(buf, dest, 40));
837 } else if (i < 10) {
838 ut_assert(!unicode_test_u16_strcmp(buf, concat_str, i - 1));
839 } else {
840 ut_assert(!unicode_test_u16_strcmp(buf, concat_str, 40));
841 }
842 }
843
844 return 0;
845}
Simon Glass38754db2024-11-02 13:37:00 -0600846LIB_TEST(unicode_test_u16_strlcat, 0);