lib/charset: UTF-8 stream conversion
Provide functions to convert an UTF-8 stream to code page 437 or UTF-32.
Add unit tests.
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
diff --git a/lib/charset.c b/lib/charset.c
index 946d5ee..f44c58d 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -481,15 +481,6 @@
return dest;
}
-/**
- * utf_to_cp() - translate Unicode code point to 8bit codepage
- *
- * Codepoints that do not exist in the codepage are rendered as question mark.
- *
- * @c: pointer to Unicode code point to be translated
- * @codepage: Unicode to codepage translation table
- * Return: 0 on success, -ENOENT if codepoint cannot be translated
- */
int utf_to_cp(s32 *c, const u16 *codepage)
{
if (*c >= 0x80) {
@@ -507,3 +498,49 @@
}
return 0;
}
+
+int utf8_to_cp437_stream(u8 c, char *buffer)
+{
+ char *end;
+ const char *pos;
+ s32 s;
+ int ret;
+
+ for (;;) {
+ pos = buffer;
+ end = buffer + strlen(buffer);
+ *end++ = c;
+ *end = 0;
+ s = utf8_get(&pos);
+ if (s > 0) {
+ *buffer = 0;
+ ret = utf_to_cp(&s, codepage_437);
+ return s;
+ }
+ if (pos == end)
+ return 0;
+ *buffer = 0;
+ }
+}
+
+int utf8_to_utf32_stream(u8 c, char *buffer)
+{
+ char *end;
+ const char *pos;
+ s32 s;
+
+ for (;;) {
+ pos = buffer;
+ end = buffer + strlen(buffer);
+ *end++ = c;
+ *end = 0;
+ s = utf8_get(&pos);
+ if (s > 0) {
+ *buffer = 0;
+ return s;
+ }
+ if (pos == end)
+ return 0;
+ *buffer = 0;
+ }
+}