diff --git a/MAINTAINERS b/MAINTAINERS
index 5a039b8..31b49c0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -775,6 +775,16 @@
 T:	git https://source.denx.de/u-boot/custodians/u-boot-i2c.git
 F:	drivers/i2c/
 
+KWBIMAGE / KWBOOT TOOLS
+M:	Pali Rohár <pali@kernel.org>
+M:	Marek Behún <marek.behun@nic.cz>
+M:	Stefan Roese <sr@denx.de>
+S:	Maintained
+T:	git https://source.denx.de/u-boot/custodians/u-boot-marvell.git
+F:	doc/README.kwbimage
+F:	doc/kwboot.1
+F:	tools/kwb*
+
 LOGGING
 M:	Simon Glass <sjg@chromium.org>
 S:	Maintained
diff --git a/doc/kwboot.1 b/doc/kwboot.1
index 1e9ca26..acdea89 100644
--- a/doc/kwboot.1
+++ b/doc/kwboot.1
@@ -1,21 +1,22 @@
-.TH KWBOOT 1 "2012-05-19"
+.TH KWBOOT 1 "2021-08-25"
 
 .SH NAME
-kwboot \- Boot Marvell Kirkwood SoCs over a serial link.
+kwboot \- Boot Marvell Kirkwood (and others 32-bit) SoCs over a serial link.
 .SH SYNOPSIS
 .B kwboot
 .RB [ "-b \fIimage\fP" ]
-.RB [ "-p" ]
 .RB [ "-t" ]
 .RB [ "-B \fIbaudrate\fP" ]
 .RB \fITTY\fP
 .SH "DESCRIPTION"
 
-The \fBmkimage\fP program boots boards based on Marvell's Kirkwood
-platform over their integrated UART. Boot image files will typically
+The \fBkwboot\fP program boots boards based on Marvell's 32-bit
+platforms including Kirkwood, Dove, A370, AXP, A375, A38x
+and A39x over their integrated UART. Boot image files will typically
 contain a second stage boot loader, such as U-Boot. The image file
 must conform to Marvell's BootROM firmware image format
-(\fIkwbimage\fP), created using a tool such as \fBmkimage\fP.
+(\fIkwbimage v0\fP or \fIv1\fP), created using a tool such as
+\fBmkimage\fP.
 
 Following power-up or a system reset, system BootROM code polls the
 UART for a brief period of time, sensing a handshake message which
@@ -36,25 +37,23 @@
 Handshake; then upload file \fIimage\fP over \fITTY\fP.
 
 Note that for the encapsulated boot code to be executed, \fIimage\fP
-must be of type "UART boot" (0x69). Boot images of different types,
-such as backup images of vendor firmware downloaded from flash memory
-(type 0x8B), will not work (or not as expected). See \fB-p\fP for a
-workaround.
+must be of type "UART boot" (0x69). The \fBkwboot\fP program changes
+this type automatically, unless the \fIimage\fP is signed, in which
+case it cannot be changed.
 
 This mode writes handshake status and upload progress indication to
-stdout.
+stdout. It is possible that \fIimage\fP contains an optional binary
+code in it's header which may also print some output via UART (for
+example U-Boot SPL does this). In such a case, this output is also
+written to stdout after the header is sent.
 
 .TP
 .BI "\-p"
-In combination with \fB-b\fP, patches the header in \fIimage\fP prior
-to upload, to "UART boot" type.
+Obsolete. Does nothing.
 
-This option attempts on-the-fly conversion of some none-UART image
-types, such as images which were originally formatted to be stored in
-flash memory.
-
-Conversion is performed in memory. The contents of \fIimage\fP will
-not be altered.
+In the past, when this option was used, the program patched the header
+in the image prior upload, to "UART boot" type. This is now done by
+default.
 
 .TP
 .BI "\-t"
@@ -65,11 +64,26 @@
 immediately following a successful image upload.
 
 If standard I/O streams connect to a console, this mode will terminate
-after receiving 'ctrl-\\' followed by 'c' from console input.
+after receiving \fBctrl-\e\fP followed by \fBc\fP from console input.
 
 .TP
 .BI "\-B \fIbaudrate\fP"
-Adjust the baud rate on \fITTY\fP. Default rate is 115200.
+If used in combination with \fB-b\fP, inject into the image header
+code that changes baud rate to \fIbaudrate\fP after uploading image
+header, and code that changes the baud rate back to the default
+(115200 Bd) before executing payload, and also adjust the baud rate
+on \fITTY\fP correspondingly. This can make the upload significantly
+faster.
+
+If used in combination with \fB-t\fP, adjust the baud rate to
+\fIbaudrate\fP on \fITTY\fP before starting terminal.
+
+If both \fB-b\fP and \fB-t\fP are used, the baud rate is changed
+back to 115200 after the upload.
+
+Tested values for \fIbaudrate\fP for Armada 38x include: 115200,
+230400, 460800, 500000, 576000, 921600, 1000000, 1152000, 1500000,
+2000000, 2500000, 3125000, 4000000 and 5200000.
 
 .SH "SEE ALSO"
 .PP
@@ -82,3 +96,7 @@
 Luka Perkov <luka@openwrt.org>
 .br
 David Purdy <david.c.purdy@gmail.com>
+.br
+Pali Rohár <pali@kernel.org>
+.br
+Marek Behún <marek.behun@nic.cz>
diff --git a/tools/kwbimage.c b/tools/kwbimage.c
index d200ff2..77bf4dd 100644
--- a/tools/kwbimage.c
+++ b/tools/kwbimage.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
  * Image manipulator for Marvell SoCs
- *  supports Kirkwood, Dove, Armada 370, Armada XP, and Armada 38x
+ *  supports Kirkwood, Dove, Armada 370, Armada XP, Armada 375, Armada 38x and
+ *  Armada 39x
  *
  * (C) Copyright 2013 Thomas Petazzoni
  * <thomas.petazzoni@free-electrons.com>
@@ -280,14 +281,6 @@
 	return csum;
 }
 
-size_t kwbimage_header_size(unsigned char *ptr)
-{
-	if (image_version((void *)ptr) == 0)
-		return sizeof(struct main_hdr_v0);
-	else
-		return KWBHEADER_V1_SIZE((struct main_hdr_v1 *)ptr);
-}
-
 /*
  * Verify checksum over a complete header that includes the checksum field.
  * Return 1 when OK, otherwise 0.
@@ -298,7 +291,7 @@
 	struct main_hdr_v0 *main_hdr = (struct main_hdr_v0 *)hdr;
 	uint8_t checksum;
 
-	checksum = image_checksum8(hdr, kwbimage_header_size(hdr));
+	checksum = image_checksum8(hdr, kwbheader_size_for_csum(hdr));
 	/* Calculated checksum includes the header checksum field. Compensate
 	 * for that.
 	 */
@@ -542,7 +535,7 @@
 	}
 
 	if (4 + size_seq > sizeof(dst->key)) {
-		fprintf(stderr, "export pk failed: seq too large (%d, %lu)\n",
+		fprintf(stderr, "export pk failed: seq too large (%d, %zu)\n",
 			4 + size_seq, sizeof(dst->key));
 		fprintf(stderr, errmsg, keyname);
 		return -ENOBUFS;
@@ -1618,34 +1611,20 @@
 static void kwbimage_print_header(const void *ptr)
 {
 	struct main_hdr_v0 *mhdr = (struct main_hdr_v0 *)ptr;
+	struct opt_hdr_v1 *ohdr;
 
 	printf("Image Type:   MVEBU Boot from %s Image\n",
 	       image_boot_mode_name(mhdr->blockid));
-	printf("Image version:%d\n", image_version((void *)ptr));
-	if (image_version((void *)ptr) == 1) {
-		struct main_hdr_v1 *mhdr = (struct main_hdr_v1 *)ptr;
-
-		if (mhdr->ext & 0x1) {
-			struct opt_hdr_v1 *ohdr = (struct opt_hdr_v1 *)
-						  ((uint8_t *)ptr +
-						   sizeof(*mhdr));
+	printf("Image version:%d\n", kwbimage_version(ptr));
 
-			while (1) {
-				uint32_t ohdr_size;
-
-				ohdr_size = (ohdr->headersz_msb << 16) |
-					    le16_to_cpu(ohdr->headersz_lsb);
-				if (ohdr->headertype == OPT_HDR_V1_BINARY_TYPE) {
-					printf("BIN Hdr Size: ");
-					genimg_print_size(ohdr_size - 12 - 4 * ohdr->data[0]);
-				}
-				if (!(*((uint8_t *)ohdr + ohdr_size - 4) & 0x1))
-					break;
-				ohdr = (struct opt_hdr_v1 *)((uint8_t *)ohdr +
-							     ohdr_size);
-			}
+	for_each_opt_hdr_v1 (ohdr, mhdr) {
+		if (ohdr->headertype == OPT_HDR_V1_BINARY_TYPE) {
+			printf("BIN Hdr Size: ");
+			genimg_print_size(opt_hdr_v1_size(ohdr) - 12 -
+					  4 * ohdr->data[0]);
 		}
 	}
+
 	printf("Data Size:    ");
 	genimg_print_size(mhdr->blocksize - sizeof(uint32_t));
 	printf("Load Address: %08x\n", mhdr->destaddr);
@@ -1663,8 +1642,8 @@
 static int kwbimage_verify_header(unsigned char *ptr, int image_size,
 				  struct image_tool_params *params)
 {
-	uint8_t checksum;
-	size_t header_size = kwbimage_header_size(ptr);
+	size_t header_size = kwbheader_size(ptr);
+	uint8_t csum;
 
 	if (header_size > image_size)
 		return -FDT_ERR_BADSTRUCTURE;
@@ -1673,52 +1652,27 @@
 		return -FDT_ERR_BADSTRUCTURE;
 
 	/* Only version 0 extended header has checksum */
-	if (image_version((void *)ptr) == 0) {
+	if (kwbimage_version(ptr) == 0) {
 		struct main_hdr_v0 *mhdr = (struct main_hdr_v0 *)ptr;
 
 		if (mhdr->ext & 0x1) {
-			struct ext_hdr_v0 *ext_hdr;
+			struct ext_hdr_v0 *ext_hdr = (void *)(mhdr + 1);
 
-			if (header_size + sizeof(*ext_hdr) > image_size)
-				return -FDT_ERR_BADSTRUCTURE;
-
-			ext_hdr = (struct ext_hdr_v0 *)
-				(ptr + sizeof(struct main_hdr_v0));
-			checksum = image_checksum8(ext_hdr,
-						   sizeof(struct ext_hdr_v0)
-						   - sizeof(uint8_t));
-			if (checksum != ext_hdr->checksum)
+			csum = image_checksum8(ext_hdr, sizeof(*ext_hdr) - 1);
+			if (csum != ext_hdr->checksum)
 				return -FDT_ERR_BADSTRUCTURE;
 		}
-	} else if (image_version((void *)ptr) == 1) {
+	} else if (kwbimage_version(ptr) == 1) {
 		struct main_hdr_v1 *mhdr = (struct main_hdr_v1 *)ptr;
+		const uint8_t *mhdr_end;
+		struct opt_hdr_v1 *ohdr;
 		uint32_t offset;
 		uint32_t size;
 
-		if (mhdr->ext & 0x1) {
-			uint32_t ohdr_size;
-			struct opt_hdr_v1 *ohdr = (struct opt_hdr_v1 *)
-						  (ptr + sizeof(*mhdr));
-
-			while (1) {
-				if ((uint8_t *)ohdr + sizeof(*ohdr) >
-				    (uint8_t *)mhdr + header_size)
-					return -FDT_ERR_BADSTRUCTURE;
-
-				ohdr_size = (ohdr->headersz_msb << 16) |
-					    le16_to_cpu(ohdr->headersz_lsb);
-
-				if (ohdr_size < 8 ||
-				    (uint8_t *)ohdr + ohdr_size >
-				    (uint8_t *)mhdr + header_size)
-					return -FDT_ERR_BADSTRUCTURE;
-
-				if (!(*((uint8_t *)ohdr + ohdr_size - 4) & 0x1))
-					break;
-				ohdr = (struct opt_hdr_v1 *)((uint8_t *)ohdr +
-							     ohdr_size);
-			}
-		}
+		mhdr_end = (uint8_t *)mhdr + header_size;
+		for_each_opt_hdr_v1 (ohdr, ptr)
+			if (!opt_hdr_v1_valid_size(ohdr, mhdr_end))
+				return -FDT_ERR_BADSTRUCTURE;
 
 		offset = le32_to_cpu(mhdr->srcaddr);
 
@@ -1864,37 +1818,25 @@
 static int kwbimage_extract_subimage(void *ptr, struct image_tool_params *params)
 {
 	struct main_hdr_v1 *mhdr = (struct main_hdr_v1 *)ptr;
-	size_t header_size = kwbimage_header_size(ptr);
+	size_t header_size = kwbheader_size(ptr);
+	struct opt_hdr_v1 *ohdr;
 	int idx = params->pflag;
 	int cur_idx = 0;
 	uint32_t offset;
 	ulong image;
 	ulong size;
 
-	if (image_version((void *)ptr) == 1 && (mhdr->ext & 0x1)) {
-		struct opt_hdr_v1 *ohdr = (struct opt_hdr_v1 *)
-					  ((uint8_t *)ptr +
-					   sizeof(*mhdr));
-
-		while (1) {
-			uint32_t ohdr_size = (ohdr->headersz_msb << 16) |
-					     le16_to_cpu(ohdr->headersz_lsb);
+	for_each_opt_hdr_v1 (ohdr, ptr) {
+		if (ohdr->headertype != OPT_HDR_V1_BINARY_TYPE)
+			continue;
 
-			if (ohdr->headertype == OPT_HDR_V1_BINARY_TYPE) {
-				if (idx == cur_idx) {
-					image = (ulong)&ohdr->data[4 +
-					         4 * ohdr->data[0]];
-					size = ohdr_size - 12 -
-					       4 * ohdr->data[0];
-					goto extract;
-				}
-				++cur_idx;
-			}
-			if (!(*((uint8_t *)ohdr + ohdr_size - 4) & 0x1))
-				break;
-			ohdr = (struct opt_hdr_v1 *)((uint8_t *)ohdr +
-						     ohdr_size);
+		if (idx == cur_idx) {
+			image = (ulong)&ohdr->data[4 + 4 * ohdr->data[0]];
+			size = opt_hdr_v1_size(ohdr) - 12 - 4 * ohdr->data[0];
+			goto extract;
 		}
+
+		++cur_idx;
 	}
 
 	if (idx != cur_idx) {
diff --git a/tools/kwbimage.h b/tools/kwbimage.h
index 10e3254..126d482 100644
--- a/tools/kwbimage.h
+++ b/tools/kwbimage.h
@@ -69,12 +69,7 @@
 	uint8_t               checksum;
 } __packed;
 
-struct kwb_header {
-	struct main_hdr_v0	kwb_hdr;
-	struct ext_hdr_v0	kwb_exthdr;
-} __packed;
-
-/* Structure of the main header, version 1 (Armada 370/38x/XP) */
+/* Structure of the main header, version 1 (Armada 370/XP/375/38x/39x) */
 struct main_hdr_v1 {
 	uint8_t  blockid;               /* 0x0       */
 	uint8_t  flags;                 /* 0x1       */
@@ -108,7 +103,7 @@
 #define MAIN_HDR_V1_OPT_BAUD_115200	0x7
 
 /*
- * Header for the optional headers, version 1 (Armada 370, Armada XP)
+ * Header for the optional headers, version 1 (Armada 370/XP/375/38x/39x)
  */
 struct opt_hdr_v1 {
 	uint8_t  headertype;
@@ -132,7 +127,7 @@
 } __packed;
 
 /*
- * Structure of secure header (Armada 38x)
+ * Structure of secure header (Armada XP/375/38x/39x)
  */
 struct secure_hdr_v1 {
 	uint8_t  headertype;		/* 0x0 */
@@ -195,9 +190,6 @@
 #define OPT_HDR_V1_BINARY_TYPE   0x2
 #define OPT_HDR_V1_REGISTER_TYPE 0x3
 
-#define KWBHEADER_V1_SIZE(hdr) \
-	(((hdr)->headersz_msb << 16) | le16_to_cpu((hdr)->headersz_lsb))
-
 enum kwbimage_cmd {
 	CMD_INVALID,
 	CMD_BOOT_FROM,
@@ -225,10 +217,91 @@
  * header, byte 8 was reserved, and always set to 0. In the v1 header,
  * byte 8 has been changed to a proper field, set to 1.
  */
-static inline unsigned int image_version(void *header)
+static inline unsigned int kwbimage_version(const void *header)
 {
-	unsigned char *ptr = header;
+	const unsigned char *ptr = header;
 	return ptr[8];
 }
 
+static inline size_t kwbheader_size(const void *header)
+{
+	if (kwbimage_version(header) == 0) {
+		const struct main_hdr_v0 *hdr = header;
+
+		return sizeof(*hdr) +
+		       (hdr->ext & 0x1) ? sizeof(struct ext_hdr_v0) : 0;
+	} else {
+		const struct main_hdr_v1 *hdr = header;
+
+		return (hdr->headersz_msb << 16) |
+		       le16_to_cpu(hdr->headersz_lsb);
+	}
+}
+
+static inline size_t kwbheader_size_for_csum(const void *header)
+{
+	if (kwbimage_version(header) == 0)
+		return sizeof(struct main_hdr_v0);
+	else
+		return kwbheader_size(header);
+}
+
+static inline uint32_t opt_hdr_v1_size(const struct opt_hdr_v1 *ohdr)
+{
+	return (ohdr->headersz_msb << 16) | le16_to_cpu(ohdr->headersz_lsb);
+}
+
+static inline int opt_hdr_v1_valid_size(const struct opt_hdr_v1 *ohdr,
+					const void *mhdr_end)
+{
+	uint32_t ohdr_size;
+
+	if ((void *)(ohdr + 1) > mhdr_end)
+		return 0;
+
+	ohdr_size = opt_hdr_v1_size(ohdr);
+	if (ohdr_size < 8 || (void *)((uint8_t *)ohdr + ohdr_size) > mhdr_end)
+		return 0;
+
+	return 1;
+}
+
+static inline struct opt_hdr_v1 *opt_hdr_v1_first(void *img) {
+	struct main_hdr_v1 *mhdr;
+
+	if (kwbimage_version(img) != 1)
+		return NULL;
+
+	mhdr = img;
+	if (mhdr->ext & 0x1)
+		return (struct opt_hdr_v1 *)(mhdr + 1);
+	else
+		return NULL;
+}
+
+static inline uint8_t *opt_hdr_v1_ext(struct opt_hdr_v1 *cur)
+{
+	uint32_t size = opt_hdr_v1_size(cur);
+
+	return (uint8_t *)cur + size - 4;
+}
+
+static inline struct opt_hdr_v1 *_opt_hdr_v1_next(struct opt_hdr_v1 *cur)
+{
+	return (struct opt_hdr_v1 *)((uint8_t *)cur + opt_hdr_v1_size(cur));
+}
+
+static inline struct opt_hdr_v1 *opt_hdr_v1_next(struct opt_hdr_v1 *cur)
+{
+	if (*opt_hdr_v1_ext(cur) & 0x1)
+		return _opt_hdr_v1_next(cur);
+	else
+		return NULL;
+}
+
+#define for_each_opt_hdr_v1(ohdr, img)		\
+	for ((ohdr) = opt_hdr_v1_first((img));	\
+	     (ohdr) != NULL;			\
+	     (ohdr) = opt_hdr_v1_next((ohdr)))
+
 #endif /* _KWBIMAGE_H_ */
diff --git a/tools/kwboot.c b/tools/kwboot.c
index 7feeaa4..6a1a030 100644
--- a/tools/kwboot.c
+++ b/tools/kwboot.c
@@ -1,8 +1,11 @@
 /*
  * Boot a Marvell SoC, with Xmodem over UART0.
- *  supports Kirkwood, Dove, Armada 370, Armada XP
+ *  supports Kirkwood, Dove, Armada 370, Armada XP, Armada 375, Armada 38x and
+ *           Armada 39x
  *
  * (c) 2012 Daniel Stodden <daniel.stodden@gmail.com>
+ * (c) 2021 Pali Rohár <pali@kernel.org>
+ * (c) 2021 Marek Behún <marek.behun@nic.cz>
  *
  * References: marvell.com, "88F6180, 88F6190, 88F6192, and 88F6281
  *   Integrated Controller: Functional Specifications" December 2,
@@ -11,6 +14,7 @@
 
 #include "kwbimage.h"
 #include "mkimage.h"
+#include "version.h"
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -22,10 +26,15 @@
 #include <errno.h>
 #include <unistd.h>
 #include <stdint.h>
-#include <termios.h>
-#include <sys/mman.h>
+#include <time.h>
 #include <sys/stat.h>
 
+#ifdef __linux__
+#include "termios_linux.h"
+#else
+#include <termios.h>
+#endif
+
 /*
  * Marvell BootROM UART Sensing
  */
@@ -56,15 +65,199 @@
 #define NAK	21	/* target block negative ack */
 #define CAN	24	/* target/sender transfer cancellation */
 
+#define KWBOOT_XM_BLKSZ	128 /* xmodem block size */
+
 struct kwboot_block {
 	uint8_t soh;
 	uint8_t pnum;
 	uint8_t _pnum;
-	uint8_t data[128];
+	uint8_t data[KWBOOT_XM_BLKSZ];
 	uint8_t csum;
 } __packed;
 
 #define KWBOOT_BLK_RSP_TIMEO 1000 /* ms */
+#define KWBOOT_HDR_RSP_TIMEO 10000 /* ms */
+
+/* ARM code making baudrate changing function return to original exec address */
+static unsigned char kwboot_pre_baud_code[] = {
+				/* exec_addr:                                 */
+	0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+	0x0c, 0xe0, 0x1f, 0xe5, /* ldr lr, exec_addr                          */
+};
+
+/* ARM code for binary header injection to change baudrate */
+static unsigned char kwboot_baud_code[] = {
+				/* ; #define UART_BASE 0xd0012000             */
+				/* ; #define THR       0x00                   */
+				/* ; #define DLL       0x00                   */
+				/* ; #define DLH       0x04                   */
+				/* ; #define LCR       0x0c                   */
+				/* ; #define   DLAB    0x80                   */
+				/* ; #define LSR       0x14                   */
+				/* ; #define   THRE    0x20                   */
+				/* ; #define   TEMT    0x40                   */
+				/* ; #define DIV_ROUND(a, b) ((a + b/2) / b)  */
+				/* ;                                          */
+				/* ; u32 set_baudrate(u32 old_b, u32 new_b) { */
+				/* ;   const u8 *str = "$baudratechange";     */
+				/* ;   u8 c;                                  */
+				/* ;   do {                                   */
+				/* ;       c = *str++;                        */
+				/* ;       writel(UART_BASE + THR, c);        */
+				/* ;   } while (c);                           */
+				/* ;   while                                  */
+				/* ;      (!(readl(UART_BASE + LSR) & TEMT)); */
+				/* ;   u32 lcr = readl(UART_BASE + LCR);      */
+				/* ;   writel(UART_BASE + LCR, lcr | DLAB);   */
+				/* ;   u8 old_dll = readl(UART_BASE + DLL);   */
+				/* ;   u8 old_dlh = readl(UART_BASE + DLH);   */
+				/* ;   u16 old_dl = old_dll | (old_dlh << 8); */
+				/* ;   u32 clk = old_b * old_dl;              */
+				/* ;   u16 new_dl = DIV_ROUND(clk, new_b);    */
+				/* ;   u8 new_dll = new_dl & 0xff;            */
+				/* ;   u8 new_dlh = (new_dl >> 8) & 0xff;     */
+				/* ;   writel(UART_BASE + DLL, new_dll);      */
+				/* ;   writel(UART_BASE + DLH, new_dlh);      */
+				/* ;   writel(UART_BASE + LCR, lcr & ~DLAB);  */
+				/* ;   msleep(1);                             */
+				/* ;   return 0;                              */
+				/* ; }                                        */
+
+	0xfe, 0x5f, 0x2d, 0xe9, /* push  { r1 - r12, lr }                     */
+
+				/*  ; r0 = UART_BASE                          */
+	0x02, 0x0a, 0xa0, 0xe3, /* mov   r0, #0x2000                          */
+	0x01, 0x00, 0x4d, 0xe3, /* movt  r0, #0xd001                          */
+
+				/*  ; r2 = address of preamble string         */
+	0xd0, 0x20, 0x8f, 0xe2, /* adr   r2, preamble                         */
+
+				/*  ; Send preamble string over UART          */
+				/* .Lloop_preamble:                           */
+				/*                                            */
+				/*  ; Wait until Transmitter Holding is Empty */
+				/* .Lloop_thre:                               */
+				/*  ; r1 = UART_BASE[LSR] & THRE              */
+	0x14, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x14]                      */
+	0x20, 0x00, 0x11, 0xe3, /* tst   r1, #0x20                            */
+	0xfc, 0xff, 0xff, 0x0a, /* beq   .Lloop_thre                          */
+
+				/*  ; Put character into Transmitter FIFO     */
+				/*  ; r1 = *r2++                              */
+	0x01, 0x10, 0xd2, 0xe4, /* ldrb  r1, [r2], #1                         */
+				/*  ; UART_BASE[THR] = r1                     */
+	0x00, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0]                       */
+
+				/*  ; Loop until end of preamble string       */
+	0x00, 0x00, 0x51, 0xe3, /* cmp   r1, #0                               */
+	0xf8, 0xff, 0xff, 0x1a, /* bne   .Lloop_preamble                      */
+
+				/*  ; Wait until Transmitter FIFO is Empty    */
+				/* .Lloop_txempty:                            */
+				/*  ; r1 = UART_BASE[LSR] & TEMT              */
+	0x14, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x14]                      */
+	0x40, 0x00, 0x11, 0xe3, /* tst   r1, #0x40                            */
+	0xfc, 0xff, 0xff, 0x0a, /* beq   .Lloop_txempty                       */
+
+				/*  ; Set Divisor Latch Access Bit            */
+				/*  ; UART_BASE[LCR] |= DLAB                  */
+	0x0c, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x0c]                      */
+	0x80, 0x10, 0x81, 0xe3, /* orr   r1, r1, #0x80                        */
+	0x0c, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0c]                      */
+
+				/*  ; Read current Divisor Latch              */
+				/*  ; r1 = UART_BASE[DLH]<<8 | UART_BASE[DLL] */
+	0x00, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x00]                      */
+	0xff, 0x10, 0x01, 0xe2, /* and   r1, r1, #0xff                        */
+	0x01, 0x20, 0xa0, 0xe1, /* mov   r2, r1                               */
+	0x04, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x04]                      */
+	0xff, 0x10, 0x01, 0xe2, /* and   r1, r1, #0xff                        */
+	0x41, 0x14, 0xa0, 0xe1, /* asr   r1, r1, #8                           */
+	0x02, 0x10, 0x81, 0xe1, /* orr   r1, r1, r2                           */
+
+				/*  ; Read old baudrate value                 */
+				/*  ; r2 = old_baudrate                       */
+	0x8c, 0x20, 0x9f, 0xe5, /* ldr   r2, old_baudrate                     */
+
+				/*  ; Calculate base clock                    */
+				/*  ; r1 = r2 * r1                            */
+	0x92, 0x01, 0x01, 0xe0, /* mul   r1, r2, r1                           */
+
+				/*  ; Read new baudrate value                 */
+				/*  ; r2 = baudrate                           */
+	0x88, 0x20, 0x9f, 0xe5, /* ldr   r2, baudrate                         */
+
+				/*  ; Calculate new Divisor Latch             */
+				/*  ; r1 = DIV_ROUND(r1, r2) =                */
+				/*  ;    = (r1 + r2/2) / r2                   */
+	0xa2, 0x10, 0x81, 0xe0, /* add   r1, r1, r2, lsr #1                   */
+	0x02, 0x40, 0xa0, 0xe1, /* mov   r4, r2                               */
+	0xa1, 0x00, 0x54, 0xe1, /* cmp   r4, r1, lsr #1                       */
+				/* .Lloop_div1:                               */
+	0x84, 0x40, 0xa0, 0x91, /* movls r4, r4, lsl #1                       */
+	0xa1, 0x00, 0x54, 0xe1, /* cmp   r4, r1, lsr #1                       */
+	0xfc, 0xff, 0xff, 0x9a, /* bls   .Lloop_div1                          */
+	0x00, 0x30, 0xa0, 0xe3, /* mov   r3, #0                               */
+				/* .Lloop_div2:                               */
+	0x04, 0x00, 0x51, 0xe1, /* cmp   r1, r4                               */
+	0x04, 0x10, 0x41, 0x20, /* subhs r1, r1, r4                           */
+	0x03, 0x30, 0xa3, 0xe0, /* adc   r3, r3, r3                           */
+	0xa4, 0x40, 0xa0, 0xe1, /* mov   r4, r4, lsr #1                       */
+	0x02, 0x00, 0x54, 0xe1, /* cmp   r4, r2                               */
+	0xf9, 0xff, 0xff, 0x2a, /* bhs   .Lloop_div2                          */
+	0x03, 0x10, 0xa0, 0xe1, /* mov   r1, r3                               */
+
+				/*  ; Set new Divisor Latch Low               */
+				/*  ; UART_BASE[DLL] = r1 & 0xff              */
+	0x01, 0x20, 0xa0, 0xe1, /* mov   r2, r1                               */
+	0xff, 0x20, 0x02, 0xe2, /* and   r2, r2, #0xff                        */
+	0x00, 0x20, 0x80, 0xe5, /* str   r2, [r0, #0x00]                      */
+
+				/*  ; Set new Divisor Latch High              */
+				/*  ; UART_BASE[DLH] = r1>>8 & 0xff           */
+	0x41, 0x24, 0xa0, 0xe1, /* asr   r2, r1, #8                           */
+	0xff, 0x20, 0x02, 0xe2, /* and   r2, r2, #0xff                        */
+	0x04, 0x20, 0x80, 0xe5, /* str   r2, [r0, #0x04]                      */
+
+				/*  ; Clear Divisor Latch Access Bit          */
+				/*  ; UART_BASE[LCR] &= ~DLAB                 */
+	0x0c, 0x10, 0x90, 0xe5, /* ldr   r1, [r0, #0x0c]                      */
+	0x80, 0x10, 0xc1, 0xe3, /* bic   r1, r1, #0x80                        */
+	0x0c, 0x10, 0x80, 0xe5, /* str   r1, [r0, #0x0c]                      */
+
+				/*  ; Sleep 1ms ~~ 600000 cycles at 1200 MHz  */
+				/*  ; r1 = 600000                             */
+	0x9f, 0x1d, 0xa0, 0xe3, /* mov   r1, #0x27c0                          */
+	0x09, 0x10, 0x40, 0xe3, /* movt  r1, #0x0009                          */
+				/* .Lloop_sleep:                              */
+	0x01, 0x10, 0x41, 0xe2, /* sub   r1, r1, #1                           */
+	0x00, 0x00, 0x51, 0xe3, /* cmp   r1, #0                               */
+	0xfc, 0xff, 0xff, 0x1a, /* bne   .Lloop_sleep                         */
+
+				/*  ; Return 0 - no error                     */
+	0x00, 0x00, 0xa0, 0xe3, /* mov   r0, #0                               */
+	0xfe, 0x9f, 0xbd, 0xe8, /* pop   { r1 - r12, pc }                     */
+
+				/*  ; Preamble string                         */
+				/* preamble:                                  */
+	0x24, 0x62, 0x61, 0x75, /* .asciz "$baudratechange"                   */
+	0x64, 0x72, 0x61, 0x74,
+	0x65, 0x63, 0x68, 0x61,
+	0x6e, 0x67, 0x65, 0x00,
+
+				/*  ; Placeholder for old baudrate value      */
+				/* old_baudrate:                              */
+	0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+
+				/*  ; Placeholder for new baudrate value      */
+				/* new_baudrate:                              */
+	0x00, 0x00, 0x00, 0x00, /* .word 0                                    */
+};
+
+#define KWBOOT_BAUDRATE_BIN_HEADER_SZ (sizeof(kwboot_baud_code) + \
+				       sizeof(struct opt_hdr_v1) + 8)
+
+static const char kwb_baud_magic[16] = "$baudratechange";
 
 static int kwboot_verbose;
 
@@ -72,6 +265,23 @@
 static int msg_rsp_timeo = KWBOOT_MSG_RSP_TIMEO;
 static int blk_rsp_timeo = KWBOOT_BLK_RSP_TIMEO;
 
+static ssize_t
+kwboot_write(int fd, const char *buf, size_t len)
+{
+	size_t tot = 0;
+
+	while (tot < len) {
+		ssize_t wr = write(fd, buf + tot, len - tot);
+
+		if (wr < 0)
+			return -1;
+
+		tot += wr;
+	}
+
+	return tot;
+}
+
 static void
 kwboot_printv(const char *fmt, ...)
 {
@@ -122,12 +332,14 @@
 	fputc(c, stdout);
 
 	nl = "]\n";
-	pos++;
+	pos = (pos + 1) % width;
 
 	if (pct == 100) {
-		while (pos++ < width)
+		while (pos && pos++ < width)
 			fputc(' ', stdout);
 		fputs(nl, stdout);
+		nl = "";
+		pos = 0;
 	}
 
 	fflush(stdout);
@@ -144,6 +356,9 @@
 
 	if (kwboot_verbose)
 		__progress(pct, c);
+
+	if (pct == 100)
+		pct = 0;
 }
 
 static int
@@ -191,26 +406,13 @@
 static int
 kwboot_tty_send(int fd, const void *buf, size_t len)
 {
-	int rc;
-	ssize_t n;
-
 	if (!buf)
 		return 0;
 
-	rc = -1;
-
-	do {
-		n = write(fd, buf, len);
-		if (n < 0)
-			goto out;
-
-		buf = (char *)buf + n;
-		len -= n;
-	} while (len > 0);
+	if (kwboot_write(fd, buf, len) < 0)
+		return -1;
 
-	rc = tcdrain(fd);
-out:
-	return rc;
+	return tcdrain(fd);
 }
 
 static int
@@ -220,51 +422,260 @@
 }
 
 static speed_t
-kwboot_tty_speed(int baudrate)
+kwboot_tty_baudrate_to_speed(int baudrate)
 {
 	switch (baudrate) {
+#ifdef B4000000
+	case 4000000:
+		return B4000000;
+#endif
+#ifdef B3500000
+	case 3500000:
+		return B3500000;
+#endif
+#ifdef B3000000
+	case 3000000:
+		return B3000000;
+#endif
+#ifdef B2500000
+	case 2500000:
+		return B2500000;
+#endif
+#ifdef B2000000
+	case 2000000:
+		return B2000000;
+#endif
+#ifdef B1500000
+	case 1500000:
+		return B1500000;
+#endif
+#ifdef B1152000
+	case 1152000:
+		return B1152000;
+#endif
+#ifdef B1000000
+	case 1000000:
+		return B1000000;
+#endif
+#ifdef B921600
+	case 921600:
+		return B921600;
+#endif
+#ifdef B614400
+	case 614400:
+		return B614400;
+#endif
+#ifdef B576000
+	case 576000:
+		return B576000;
+#endif
+#ifdef B500000
+	case 500000:
+		return B500000;
+#endif
+#ifdef B460800
+	case 460800:
+		return B460800;
+#endif
+#ifdef B307200
+	case 307200:
+		return B307200;
+#endif
+#ifdef B230400
+	case 230400:
+		return B230400;
+#endif
+#ifdef B153600
+	case 153600:
+		return B153600;
+#endif
+#ifdef B115200
 	case 115200:
 		return B115200;
+#endif
+#ifdef B76800
+	case 76800:
+		return B76800;
+#endif
+#ifdef B57600
 	case 57600:
 		return B57600;
+#endif
+#ifdef B38400
 	case 38400:
 		return B38400;
+#endif
+#ifdef B19200
 	case 19200:
 		return B19200;
+#endif
+#ifdef B9600
 	case 9600:
 		return B9600;
+#endif
+#ifdef B4800
+	case 4800:
+		return B4800;
+#endif
+#ifdef B2400
+	case 2400:
+		return B2400;
+#endif
+#ifdef B1800
+	case 1800:
+		return B1800;
+#endif
+#ifdef B1200
+	case 1200:
+		return B1200;
+#endif
+#ifdef B600
+	case 600:
+		return B600;
+#endif
+#ifdef B300
+	case 300:
+		return B300;
+#endif
+#ifdef B200
+	case 200:
+		return B200;
+#endif
+#ifdef B150
+	case 150:
+		return B150;
+#endif
+#ifdef B134
+	case 134:
+		return B134;
+#endif
+#ifdef B110
+	case 110:
+		return B110;
+#endif
+#ifdef B75
+	case 75:
+		return B75;
+#endif
+#ifdef B50
+	case 50:
+		return B50;
+#endif
+	default:
+#ifdef BOTHER
+		return BOTHER;
+#else
+		return B0;
+#endif
+	}
+}
+
+static int
+_is_within_tolerance(int value, int reference, int tolerance)
+{
+	return 100 * value >= reference * (100 - tolerance) &&
+	       100 * value <= reference * (100 + tolerance);
+}
+
+static int
+kwboot_tty_change_baudrate(int fd, int baudrate)
+{
+	struct termios tio;
+	speed_t speed;
+	int rc;
+
+	rc = tcgetattr(fd, &tio);
+	if (rc)
+		return rc;
+
+	speed = kwboot_tty_baudrate_to_speed(baudrate);
+	if (speed == B0) {
+		errno = EINVAL;
+		return -1;
 	}
 
+#ifdef BOTHER
+	if (speed == BOTHER)
+		tio.c_ospeed = tio.c_ispeed = baudrate;
+#endif
+
+	rc = cfsetospeed(&tio, speed);
+	if (rc)
+		return rc;
+
+	rc = cfsetispeed(&tio, speed);
+	if (rc)
+		return rc;
+
+	rc = tcsetattr(fd, TCSANOW, &tio);
+	if (rc)
+		return rc;
+
+	rc = tcgetattr(fd, &tio);
+	if (rc)
+		return rc;
+
+	if (cfgetospeed(&tio) != speed || cfgetispeed(&tio) != speed)
+		goto baud_fail;
+
+#ifdef BOTHER
+	/*
+	 * Check whether set baudrate is within 3% tolerance.
+	 * If BOTHER is defined, Linux always fills out c_ospeed / c_ispeed
+	 * with real values.
+	 */
+	if (!_is_within_tolerance(tio.c_ospeed, baudrate, 3))
+		goto baud_fail;
+
+	if (!_is_within_tolerance(tio.c_ispeed, baudrate, 3))
+		goto baud_fail;
+#endif
+
+	return 0;
+
+baud_fail:
+	fprintf(stderr, "Could not set baudrate to requested value\n");
+	errno = EINVAL;
 	return -1;
 }
 
 static int
-kwboot_open_tty(const char *path, speed_t speed)
+kwboot_open_tty(const char *path, int baudrate)
 {
-	int rc, fd;
+	int rc, fd, flags;
 	struct termios tio;
 
 	rc = -1;
 
-	fd = open(path, O_RDWR|O_NOCTTY|O_NDELAY);
+	fd = open(path, O_RDWR | O_NOCTTY | O_NDELAY);
 	if (fd < 0)
 		goto out;
 
-	memset(&tio, 0, sizeof(tio));
-
-	tio.c_iflag = 0;
-	tio.c_cflag = CREAD|CLOCAL|CS8;
+	rc = tcgetattr(fd, &tio);
+	if (rc)
+		goto out;
 
+	cfmakeraw(&tio);
+	tio.c_cflag |= CREAD | CLOCAL;
 	tio.c_cc[VMIN] = 1;
-	tio.c_cc[VTIME] = 10;
-
-	cfsetospeed(&tio, speed);
-	cfsetispeed(&tio, speed);
+	tio.c_cc[VTIME] = 0;
 
 	rc = tcsetattr(fd, TCSANOW, &tio);
 	if (rc)
 		goto out;
 
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0)
+		goto out;
+
+	rc = fcntl(fd, F_SETFL, flags & ~O_NDELAY);
+	if (rc)
+		goto out;
+
+	rc = kwboot_tty_change_baudrate(fd, baudrate);
+	if (rc)
+		goto out;
+
 	rc = fd;
 out:
 	if (rc < 0) {
@@ -342,21 +753,19 @@
 	return rc;
 }
 
-static int
+static size_t
 kwboot_xm_makeblock(struct kwboot_block *block, const void *data,
 		    size_t size, int pnum)
 {
-	const size_t blksz = sizeof(block->data);
-	size_t n;
-	int i;
+	size_t i, n;
 
 	block->soh = SOH;
 	block->pnum = pnum;
 	block->_pnum = ~block->pnum;
 
-	n = size < blksz ? size : blksz;
+	n = size < KWBOOT_XM_BLKSZ ? size : KWBOOT_XM_BLKSZ;
 	memcpy(&block->data[0], data, n);
-	memset(&block->data[n], 0, blksz - n);
+	memset(&block->data[n], 0, KWBOOT_XM_BLKSZ - n);
 
 	block->csum = 0;
 	for (i = 0; i < n; i++)
@@ -365,34 +774,36 @@
 	return n;
 }
 
-static int
-kwboot_xm_sendblock(int fd, struct kwboot_block *block)
+static uint64_t
+_now(void)
 {
-	int rc, retries;
-	char c;
+	struct timespec ts;
 
-	retries = 16;
-	do {
-		rc = kwboot_tty_send(fd, block, sizeof(*block));
-		if (rc)
-			break;
+	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+		static int err_print;
 
-		do {
-			rc = kwboot_tty_recv(fd, &c, 1, blk_rsp_timeo);
-			if (rc)
-				break;
-
-			if (c != ACK && c != NAK && c != CAN)
-				printf("%c", c);
+		if (!err_print) {
+			perror("clock_gettime() does not work");
+			err_print = 1;
+		}
 
-		} while (c != ACK && c != NAK && c != CAN);
+		/* this will just make the timeout not work */
+		return -1ULL;
+	}
 
-		if (c != ACK)
-			kwboot_progress(-1, '+');
+	return ts.tv_sec * 1000ULL + (ts.tv_nsec + 500000) / 1000000;
+}
 
-	} while (c == NAK && retries-- > 0);
+static int
+_is_xm_reply(char c)
+{
+	return c == ACK || c == NAK || c == CAN;
+}
 
-	rc = -1;
+static int
+_xm_reply_to_error(int c)
+{
+	int rc = -1;
 
 	switch (c) {
 	case ACK:
@@ -413,56 +824,269 @@
 }
 
 static int
-kwboot_xmodem(int tty, const void *_data, size_t size)
+kwboot_baud_magic_handle(int fd, char c, int baudrate)
 {
-	const uint8_t *data = _data;
-	int rc, pnum, N, err;
+	static size_t rcv_len;
 
-	pnum = 1;
-	N = 0;
+	if (rcv_len < sizeof(kwb_baud_magic)) {
+		/* try to recognize whole magic word */
+		if (c == kwb_baud_magic[rcv_len]) {
+			rcv_len++;
+		} else {
+			printf("%.*s%c", (int)rcv_len, kwb_baud_magic, c);
+			fflush(stdout);
+			rcv_len = 0;
+		}
+	}
 
-	kwboot_printv("Sending boot image...\n");
+	if (rcv_len == sizeof(kwb_baud_magic)) {
+		/* magic word received */
+		kwboot_printv("\nChanging baudrate to %d Bd\n", baudrate);
 
-	sleep(2); /* flush isn't effective without it */
-	tcflush(tty, TCIOFLUSH);
+		return kwboot_tty_change_baudrate(fd, baudrate) ? : 1;
+	} else {
+		return 0;
+	}
+}
+
+static int
+kwboot_xm_recv_reply(int fd, char *c, int allow_non_xm, int *non_xm_print,
+		     int baudrate, int *baud_changed)
+{
+	int timeout = allow_non_xm ? KWBOOT_HDR_RSP_TIMEO : blk_rsp_timeo;
+	uint64_t recv_until = _now() + timeout;
+	int rc;
+
+	if (non_xm_print)
+		*non_xm_print = 0;
+	if (baud_changed)
+		*baud_changed = 0;
+
+	while (1) {
+		rc = kwboot_tty_recv(fd, c, 1, timeout);
+		if (rc) {
+			if (errno != ETIMEDOUT)
+				return rc;
+			else if (allow_non_xm && *non_xm_print)
+				return -1;
+			else
+				*c = NAK;
+		}
+
+		/* If received xmodem reply, end. */
+		if (_is_xm_reply(*c))
+			break;
+
+		/*
+		 * If receiving/printing non-xmodem text output is allowed and
+		 * such a byte was received, we want to increase receiving time
+		 * and either:
+		 * - print the byte, if it is not part of baudrate change magic
+		 *   sequence while baudrate change was requested (-B option)
+		 * - change baudrate
+		 * Otherwise decrease timeout by time elapsed.
+		 */
+		if (allow_non_xm) {
+			recv_until = _now() + timeout;
+
+			if (baudrate && !*baud_changed) {
+				rc = kwboot_baud_magic_handle(fd, *c, baudrate);
+				if (rc == 1)
+					*baud_changed = 1;
+				else if (!rc)
+					*non_xm_print = 1;
+				else
+					return rc;
+			} else if (!baudrate || !*baud_changed) {
+				putchar(*c);
+				fflush(stdout);
+				*non_xm_print = 1;
+			}
+		} else {
+			timeout = recv_until - _now();
+			if (timeout < 0) {
+				errno = ETIMEDOUT;
+				return -1;
+			}
+		}
+	}
 
+	return 0;
+}
+
+static int
+kwboot_xm_sendblock(int fd, struct kwboot_block *block, int allow_non_xm,
+		    int *done_print, int baudrate)
+{
+	int non_xm_print, baud_changed;
+	int rc, err, retries;
+	char c;
+
+	*done_print = 0;
+
+	retries = 16;
 	do {
-		struct kwboot_block block;
-		int n;
+		rc = kwboot_tty_send(fd, block, sizeof(*block));
+		if (rc)
+			return rc;
+
+		if (allow_non_xm && !*done_print) {
+			kwboot_progress(100, '.');
+			kwboot_printv("Done\n");
+			*done_print = 1;
+		}
 
-		n = kwboot_xm_makeblock(&block,
-					data + N, size - N,
-					pnum++);
-		if (n < 0)
+		rc = kwboot_xm_recv_reply(fd, &c, allow_non_xm, &non_xm_print,
+					  baudrate, &baud_changed);
+		if (rc)
 			goto can;
 
-		if (!n)
-			break;
+		if (!allow_non_xm && c != ACK)
+			kwboot_progress(-1, '+');
+	} while (c == NAK && retries-- > 0);
+
+	if (non_xm_print)
+		kwboot_printv("\n");
 
-		rc = kwboot_xm_sendblock(tty, &block);
+	if (allow_non_xm && baudrate && !baud_changed) {
+		fprintf(stderr, "Baudrate was not changed\n");
+		rc = -1;
+		errno = EPROTO;
+		goto can;
+	}
+
+	return _xm_reply_to_error(c);
+can:
+	err = errno;
+	kwboot_tty_send_char(fd, CAN);
+	kwboot_printv("\n");
+	errno = err;
+	return rc;
+}
+
+static int
+kwboot_xm_finish(int fd)
+{
+	int rc, retries;
+	char c;
+
+	kwboot_printv("Finishing transfer\n");
+
+	retries = 16;
+	do {
+		rc = kwboot_tty_send_char(fd, EOT);
+		if (rc)
+			return rc;
+
+		rc = kwboot_xm_recv_reply(fd, &c, 0, NULL, 0, NULL);
+		if (rc)
+			return rc;
+	} while (c == NAK && retries-- > 0);
+
+	return _xm_reply_to_error(c);
+}
+
+static int
+kwboot_xmodem_one(int tty, int *pnum, int header, const uint8_t *data,
+		  size_t size, int baudrate)
+{
+	int done_print = 0;
+	size_t sent, left;
+	int rc;
+
+	kwboot_printv("Sending boot image %s (%zu bytes)...\n",
+		      header ? "header" : "data", size);
+
+	left = size;
+	sent = 0;
+
+	while (sent < size) {
+		struct kwboot_block block;
+		int last_block;
+		size_t blksz;
+
+		blksz = kwboot_xm_makeblock(&block, data, left, (*pnum)++);
+		data += blksz;
+
+		last_block = (left <= blksz);
+
+		rc = kwboot_xm_sendblock(tty, &block, header && last_block,
+					 &done_print, baudrate);
 		if (rc)
 			goto out;
 
-		N += n;
-		kwboot_progress(N * 100 / size, '.');
-	} while (1);
+		sent += blksz;
+		left -= blksz;
+
+		if (!done_print)
+			kwboot_progress(sent * 100 / size, '.');
+	}
 
-	rc = kwboot_tty_send_char(tty, EOT);
+	if (!done_print)
+		kwboot_printv("Done\n");
 
+	return 0;
 out:
+	kwboot_printv("\n");
 	return rc;
+}
 
-can:
-	err = errno;
-	kwboot_tty_send_char(tty, CAN);
-	errno = err;
-	goto out;
+static int
+kwboot_xmodem(int tty, const void *_img, size_t size, int baudrate)
+{
+	const uint8_t *img = _img;
+	int rc, pnum;
+	size_t hdrsz;
+
+	hdrsz = kwbheader_size(img);
+
+	kwboot_printv("Waiting 2s and flushing tty\n");
+	sleep(2); /* flush isn't effective without it */
+	tcflush(tty, TCIOFLUSH);
+
+	pnum = 1;
+
+	rc = kwboot_xmodem_one(tty, &pnum, 1, img, hdrsz, baudrate);
+	if (rc)
+		return rc;
+
+	img += hdrsz;
+	size -= hdrsz;
+
+	rc = kwboot_xmodem_one(tty, &pnum, 0, img, size, 0);
+	if (rc)
+		return rc;
+
+	rc = kwboot_xm_finish(tty);
+	if (rc)
+		return rc;
+
+	if (baudrate) {
+		char buf[sizeof(kwb_baud_magic)];
+
+		/* Wait 1s for baudrate change magic */
+		rc = kwboot_tty_recv(tty, buf, sizeof(buf), 1000);
+		if (rc)
+			return rc;
+
+		if (memcmp(buf, kwb_baud_magic, sizeof(buf))) {
+			errno = EPROTO;
+			return -1;
+		}
+
+		kwboot_printv("\nChanging baudrate back to 115200 Bd\n\n");
+		rc = kwboot_tty_change_baudrate(tty, 115200);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
 }
 
 static int
-kwboot_term_pipe(int in, int out, char *quit, int *s)
+kwboot_term_pipe(int in, int out, const char *quit, int *s)
 {
-	ssize_t nin, nout;
+	ssize_t nin;
 	char _buf[128], *buf = _buf;
 
 	nin = read(in, buf, sizeof(_buf));
@@ -480,22 +1104,15 @@
 				buf++;
 				nin--;
 			} else {
-				while (*s > 0) {
-					nout = write(out, quit, *s);
-					if (nout <= 0)
-						return -1;
-					(*s) -= nout;
-				}
+				if (kwboot_write(out, quit, *s) < 0)
+					return -1;
+				*s = 0;
 			}
 		}
 	}
 
-	while (nin > 0) {
-		nout = write(out, buf, nin);
-		if (nout <= 0)
-			return -1;
-		nin -= nout;
-	}
+	if (kwboot_write(out, buf, nin) < 0)
+		return -1;
 
 	return 0;
 }
@@ -504,7 +1121,7 @@
 kwboot_terminal(int tty)
 {
 	int rc, in, s;
-	char *quit = "\34c";
+	const char *quit = "\34c";
 	struct termios otio, tio;
 
 	rc = -1;
@@ -523,7 +1140,7 @@
 		}
 
 		kwboot_printv("[Type Ctrl-%c + %c to quit]\r\n",
-			      quit[0]|0100, quit[1]);
+			      quit[0] | 0100, quit[1]);
 	} else
 		in = -1;
 
@@ -552,7 +1169,7 @@
 				break;
 		}
 
-		if (FD_ISSET(in, &rfds)) {
+		if (in >= 0 && FD_ISSET(in, &rfds)) {
 			rc = kwboot_term_pipe(in, tty, quit, &s);
 			if (rc)
 				break;
@@ -567,11 +1184,12 @@
 }
 
 static void *
-kwboot_mmap_image(const char *path, size_t *size, int prot)
+kwboot_read_image(const char *path, size_t *size, size_t reserve)
 {
-	int rc, fd, flags;
+	int rc, fd;
 	struct stat st;
 	void *img;
+	off_t tot;
 
 	rc = -1;
 	img = NULL;
@@ -584,19 +1202,30 @@
 	if (rc)
 		goto out;
 
-	flags = (prot & PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED;
-
-	img = mmap(NULL, st.st_size, prot, flags, fd, 0);
-	if (img == MAP_FAILED) {
-		img = NULL;
+	img = malloc(st.st_size + reserve);
+	if (!img)
 		goto out;
+
+	tot = 0;
+	while (tot < st.st_size) {
+		ssize_t rd = read(fd, img + tot, st.st_size - tot);
+
+		if (rd < 0)
+			goto out;
+
+		tot += rd;
+
+		if (!rd && tot < st.st_size) {
+			errno = EIO;
+			goto out;
+		}
 	}
 
 	rc = 0;
 	*size = st.st_size;
 out:
 	if (rc && img) {
-		munmap(img, st.st_size);
+		free(img);
 		img = NULL;
 	}
 	if (fd >= 0)
@@ -606,9 +1235,13 @@
 }
 
 static uint8_t
-kwboot_img_csum8(void *_data, size_t size)
+kwboot_hdr_csum8(const void *hdr)
 {
-	uint8_t *data = _data, csum;
+	const uint8_t *data = hdr;
+	uint8_t csum;
+	size_t size;
+
+	size = kwbheader_size_for_csum(hdr);
 
 	for (csum = 0; size-- > 0; data++)
 		csum += *data;
@@ -617,80 +1250,299 @@
 }
 
 static int
-kwboot_img_patch_hdr(void *img, size_t size)
+kwboot_img_is_secure(void *img)
 {
-	int rc;
+	struct opt_hdr_v1 *ohdr;
+
+	for_each_opt_hdr_v1 (ohdr, img)
+		if (ohdr->headertype == OPT_HDR_V1_SECURE_TYPE)
+			return 1;
+
+	return 0;
+}
+
+static void *
+kwboot_img_grow_data_left(void *img, size_t *size, size_t grow)
+{
+	uint32_t hdrsz, datasz, srcaddr;
+	struct main_hdr_v1 *hdr = img;
+	uint8_t *data;
+
+	srcaddr = le32_to_cpu(hdr->srcaddr);
+
+	hdrsz = kwbheader_size(hdr);
+	data = (uint8_t *)img + srcaddr;
+	datasz = *size - srcaddr;
+
+	/* only move data if there is not enough space */
+	if (hdrsz + grow > srcaddr) {
+		size_t need = hdrsz + grow - srcaddr;
+
+		/* move data by enough bytes */
+		memmove(data + need, data, datasz);
+		*size += need;
+		srcaddr += need;
+	}
+
+	srcaddr -= grow;
+	hdr->srcaddr = cpu_to_le32(srcaddr);
+	hdr->destaddr = cpu_to_le32(le32_to_cpu(hdr->destaddr) - grow);
+	hdr->blocksize = cpu_to_le32(le32_to_cpu(hdr->blocksize) + grow);
+
+	return (uint8_t *)img + srcaddr;
+}
+
+static void
+kwboot_img_grow_hdr(void *img, size_t *size, size_t grow)
+{
+	uint32_t hdrsz, datasz, srcaddr;
+	struct main_hdr_v1 *hdr = img;
+	uint8_t *data;
+
+	srcaddr = le32_to_cpu(hdr->srcaddr);
+
+	hdrsz = kwbheader_size(img);
+	data = (uint8_t *)img + srcaddr;
+	datasz = *size - srcaddr;
+
+	/* only move data if there is not enough space */
+	if (hdrsz + grow > srcaddr) {
+		size_t need = hdrsz + grow - srcaddr;
+
+		/* move data by enough bytes */
+		memmove(data + need, data, datasz);
+
+		hdr->srcaddr = cpu_to_le32(srcaddr + need);
+		*size += need;
+	}
+
+	if (kwbimage_version(img) == 1) {
+		hdrsz += grow;
+		hdr->headersz_msb = hdrsz >> 16;
+		hdr->headersz_lsb = cpu_to_le16(hdrsz & 0xffff);
+	}
+}
+
+static void *
+kwboot_add_bin_ohdr_v1(void *img, size_t *size, uint32_t binsz)
+{
+	struct main_hdr_v1 *hdr = img;
+	struct opt_hdr_v1 *ohdr;
+	uint32_t ohdrsz;
+
+	ohdrsz = binsz + 8 + sizeof(*ohdr);
+	kwboot_img_grow_hdr(img, size, ohdrsz);
+
+	if (hdr->ext & 0x1) {
+		for_each_opt_hdr_v1 (ohdr, img)
+			if (opt_hdr_v1_next(ohdr) == NULL)
+				break;
+
+		*opt_hdr_v1_ext(ohdr) |= 1;
+		ohdr = opt_hdr_v1_next(ohdr);
+	} else {
+		hdr->ext |= 1;
+		ohdr = (void *)(hdr + 1);
+	}
+
+	ohdr->headertype = OPT_HDR_V1_BINARY_TYPE;
+	ohdr->headersz_msb = ohdrsz >> 16;
+	ohdr->headersz_lsb = cpu_to_le16(ohdrsz & 0xffff);
+
+	memset(&ohdr->data[0], 0, ohdrsz - sizeof(*ohdr));
+
+	return &ohdr->data[4];
+}
+
+static void
+_copy_baudrate_change_code(struct main_hdr_v1 *hdr, void *dst, int pre,
+			   int old_baud, int new_baud)
+{
+	size_t codesz = sizeof(kwboot_baud_code);
+	uint8_t *code = dst;
+
+	if (pre) {
+		size_t presz = sizeof(kwboot_pre_baud_code);
+
+		/*
+		 * We need to prepend code that loads lr register with original
+		 * value of hdr->execaddr. We do this by putting the original
+		 * exec address before the code that loads it relatively from
+		 * it's beginning.
+		 * Afterwards we change the exec address to this code (which is
+		 * at offset 4, because the first 4 bytes contain the original
+		 * exec address).
+		 */
+		memcpy(code, kwboot_pre_baud_code, presz);
+		*(uint32_t *)code = hdr->execaddr;
+
+		hdr->execaddr = cpu_to_le32(le32_to_cpu(hdr->destaddr) + 4);
+
+		code += presz;
+	}
+
+	memcpy(code, kwboot_baud_code, codesz - 8);
+	*(uint32_t *)(code + codesz - 8) = cpu_to_le32(old_baud);
+	*(uint32_t *)(code + codesz - 4) = cpu_to_le32(new_baud);
+}
+
+static int
+kwboot_img_patch(void *img, size_t *size, int baudrate)
+{
 	struct main_hdr_v1 *hdr;
+	uint32_t srcaddr;
 	uint8_t csum;
-	size_t hdrsz = sizeof(*hdr);
+	size_t hdrsz;
 	int image_ver;
+	int is_secure;
 
-	rc = -1;
 	hdr = img;
 
-	if (size < hdrsz) {
-		errno = EINVAL;
-		goto out;
-	}
+	if (*size < sizeof(struct main_hdr_v1))
+		goto err;
 
-	image_ver = image_version(img);
+	image_ver = kwbimage_version(img);
 	if (image_ver != 0 && image_ver != 1) {
 		fprintf(stderr, "Invalid image header version\n");
-		errno = EINVAL;
-		goto out;
+		goto err;
 	}
 
-	if (image_ver == 0)
-		hdrsz = sizeof(*hdr);
-	else
-		hdrsz = KWBHEADER_V1_SIZE(hdr);
+	hdrsz = kwbheader_size(hdr);
 
-	if (size < hdrsz) {
-		errno = EINVAL;
-		goto out;
+	if (*size < hdrsz)
+		goto err;
+
+	csum = kwboot_hdr_csum8(hdr) - hdr->checksum;
+	if (csum != hdr->checksum)
+		goto err;
+
+	if (image_ver == 0) {
+		struct main_hdr_v0 *hdr_v0 = img;
+
+		hdr_v0->nandeccmode = IBR_HDR_ECC_DISABLED;
+		hdr_v0->nandpagesize = 0;
 	}
 
-	csum = kwboot_img_csum8(hdr, hdrsz) - hdr->checksum;
-	if (csum != hdr->checksum) {
-		errno = EINVAL;
-		goto out;
+	srcaddr = le32_to_cpu(hdr->srcaddr);
+
+	switch (hdr->blockid) {
+	case IBR_HDR_SATA_ID:
+		if (srcaddr < 1)
+			goto err;
+
+		hdr->srcaddr = cpu_to_le32((srcaddr - 1) * 512);
+		break;
+
+	case IBR_HDR_SDIO_ID:
+		hdr->srcaddr = cpu_to_le32(srcaddr * 512);
+		break;
+
+	case IBR_HDR_PEX_ID:
+		if (srcaddr == 0xFFFFFFFF)
+			hdr->srcaddr = cpu_to_le32(hdrsz);
+		break;
+
+	case IBR_HDR_SPI_ID:
+		if (hdr->destaddr == cpu_to_le32(0xFFFFFFFF)) {
+			kwboot_printv("Patching destination and execution addresses from SPI/NOR XIP area to DDR area 0x00800000\n");
+			hdr->destaddr = cpu_to_le32(0x00800000);
+			hdr->execaddr = cpu_to_le32(0x00800000);
+		}
+		break;
 	}
 
-	if (hdr->blockid == IBR_HDR_UART_ID) {
-		rc = 0;
-		goto out;
+	if (hdrsz > le32_to_cpu(hdr->srcaddr) ||
+	    *size < le32_to_cpu(hdr->srcaddr) + le32_to_cpu(hdr->blocksize))
+		goto err;
+
+	is_secure = kwboot_img_is_secure(img);
+
+	if (hdr->blockid != IBR_HDR_UART_ID) {
+		if (is_secure) {
+			fprintf(stderr,
+				"Image has secure header with signature for non-UART booting\n");
+			goto err;
+		}
+
+		kwboot_printv("Patching image boot signature to UART\n");
+		hdr->blockid = IBR_HDR_UART_ID;
 	}
 
-	hdr->blockid = IBR_HDR_UART_ID;
+	if (baudrate) {
+		uint32_t codesz = sizeof(kwboot_baud_code);
+		void *code;
 
-	if (image_ver == 0) {
-		struct main_hdr_v0 *hdr_v0 = img;
+		if (image_ver == 0) {
+			fprintf(stderr,
+				"Cannot inject code for changing baudrate into v0 image header\n");
+			goto err;
+		}
 
-		hdr_v0->nandeccmode = IBR_HDR_ECC_DISABLED;
-		hdr_v0->nandpagesize = 0;
+		if (is_secure) {
+			fprintf(stderr,
+				"Cannot inject code for changing baudrate into image with secure header\n");
+			goto err;
+		}
+
+		/*
+		 * First inject code that changes the baudrate from the default
+		 * value of 115200 Bd to requested value. This code is inserted
+		 * as a new opt hdr, so it is executed by BootROM after the
+		 * header part is received.
+		 */
+		kwboot_printv("Injecting binary header code for changing baudrate to %d Bd\n",
+			      baudrate);
+
+		code = kwboot_add_bin_ohdr_v1(img, size, codesz);
+		_copy_baudrate_change_code(hdr, code, 0, 115200, baudrate);
+
+		/*
+		 * Now inject code that changes the baudrate back to 115200 Bd.
+		 * This code is prepended to the data part of the image, so it
+		 * is executed before U-Boot proper.
+		 */
+		kwboot_printv("Injecting code for changing baudrate back\n");
+
+		codesz += sizeof(kwboot_pre_baud_code);
+		code = kwboot_img_grow_data_left(img, size, codesz);
+		_copy_baudrate_change_code(hdr, code, 1, baudrate, 115200);
 
-		hdr_v0->srcaddr = hdr_v0->ext
-			? sizeof(struct kwb_header)
-			: sizeof(*hdr_v0);
+		/* recompute header size */
+		hdrsz = kwbheader_size(hdr);
 	}
 
-	hdr->checksum = kwboot_img_csum8(hdr, hdrsz) - csum;
+	if (hdrsz % KWBOOT_XM_BLKSZ) {
+		size_t offset = (KWBOOT_XM_BLKSZ - hdrsz % KWBOOT_XM_BLKSZ) %
+				KWBOOT_XM_BLKSZ;
 
-	rc = 0;
-out:
-	return rc;
+		if (is_secure) {
+			fprintf(stderr, "Cannot align image with secure header\n");
+			goto err;
+		}
+
+		kwboot_printv("Aligning image header to Xmodem block size\n");
+		kwboot_img_grow_hdr(img, size, offset);
+	}
+
+	hdr->checksum = kwboot_hdr_csum8(hdr) - csum;
+
+	*size = le32_to_cpu(hdr->srcaddr) + le32_to_cpu(hdr->blocksize);
+	return 0;
+err:
+	errno = EINVAL;
+	return -1;
 }
 
 static void
 kwboot_usage(FILE *stream, char *progname)
 {
+	fprintf(stream, "kwboot version %s\n", PLAIN_VERSION);
 	fprintf(stream,
 		"Usage: %s [OPTIONS] [-b <image> | -D <image> ] [-B <baud> ] <TTY>\n",
 		progname);
 	fprintf(stream, "\n");
 	fprintf(stream,
 		"  -b <image>: boot <image> with preamble (Kirkwood, Armada 370/XP)\n");
-	fprintf(stream, "  -p: patch <image> to type 0x69 (uart boot)\n");
 	fprintf(stream,
 		"  -D <image>: boot <image> without preamble (Dove)\n");
 	fprintf(stream, "  -d: enter debug mode\n");
@@ -710,12 +1562,13 @@
 main(int argc, char **argv)
 {
 	const char *ttypath, *imgpath;
-	int rv, rc, tty, term, prot, patch;
+	int rv, rc, tty, term;
 	void *bootmsg;
 	void *debugmsg;
 	void *img;
 	size_t size;
-	speed_t speed;
+	size_t after_img_rsv;
+	int baudrate;
 
 	rv = 1;
 	tty = -1;
@@ -724,9 +1577,9 @@
 	imgpath = NULL;
 	img = NULL;
 	term = 0;
-	patch = 0;
 	size = 0;
-	speed = B115200;
+	after_img_rsv = KWBOOT_XM_BLKSZ;
+	baudrate = 115200;
 
 	kwboot_verbose = isatty(STDOUT_FILENO);
 
@@ -751,7 +1604,7 @@
 			break;
 
 		case 'p':
-			patch = 1;
+			/* nop, for backward compatibility */
 			break;
 
 		case 't':
@@ -776,9 +1629,7 @@
 			break;
 
 		case 'B':
-			speed = kwboot_tty_speed(atoi(optarg));
-			if (speed == -1)
-				goto usage;
+			baudrate = atoi(optarg);
 			break;
 
 		case 'h':
@@ -791,32 +1642,34 @@
 	if (!bootmsg && !term && !debugmsg)
 		goto usage;
 
-	if (patch && !imgpath)
-		goto usage;
-
 	if (argc - optind < 1)
 		goto usage;
 
 	ttypath = argv[optind++];
 
-	tty = kwboot_open_tty(ttypath, speed);
+	tty = kwboot_open_tty(ttypath, imgpath ? 115200 : baudrate);
 	if (tty < 0) {
 		perror(ttypath);
 		goto out;
 	}
 
-	if (imgpath) {
-		prot = PROT_READ | (patch ? PROT_WRITE : 0);
+	if (baudrate == 115200)
+		/* do not change baudrate during Xmodem to the same value */
+		baudrate = 0;
+	else
+		/* ensure we have enough space for baudrate change code */
+		after_img_rsv += KWBOOT_BAUDRATE_BIN_HEADER_SZ +
+				 sizeof(kwboot_pre_baud_code) +
+				 sizeof(kwboot_baud_code);
 
-		img = kwboot_mmap_image(imgpath, &size, prot);
+	if (imgpath) {
+		img = kwboot_read_image(imgpath, &size, after_img_rsv);
 		if (!img) {
 			perror(imgpath);
 			goto out;
 		}
-	}
 
-	if (patch) {
-		rc = kwboot_img_patch_hdr(img, size);
+		rc = kwboot_img_patch(img, &size, baudrate);
 		if (rc) {
 			fprintf(stderr, "%s: Invalid image.\n", imgpath);
 			goto out;
@@ -838,7 +1691,7 @@
 	}
 
 	if (img) {
-		rc = kwboot_xmodem(tty, img, size);
+		rc = kwboot_xmodem(tty, img, size, baudrate);
 		if (rc) {
 			perror("xmodem");
 			goto out;
@@ -859,7 +1712,7 @@
 		close(tty);
 
 	if (img)
-		munmap(img, size);
+		free(img);
 
 	return rv;
 
diff --git a/tools/termios_linux.h b/tools/termios_linux.h
new file mode 100644
index 0000000..d73989b
--- /dev/null
+++ b/tools/termios_linux.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * termios fuctions to support arbitrary baudrates (on Linux)
+ *
+ * Copyright (c) 2021 Pali Rohár <pali@kernel.org>
+ * Copyright (c) 2021 Marek Behún <marek.behun@nic.cz>
+ */
+
+#ifndef _TERMIOS_LINUX_H_
+#define _TERMIOS_LINUX_H_
+
+/*
+ * We need to use raw TCGETS2/TCSETS2 or TCGETS/TCSETS ioctls with the BOTHER
+ * flag in struct termios2/termios, defined in Linux headers <asm/ioctls.h>
+ * (included by <sys/ioctl.h>) and <asm/termbits.h>. Since these headers
+ * conflict with glibc's header file <termios.h>, it is not possible to use
+ * libc's termios functions and we need to reimplement them via ioctl() calls.
+ *
+ * An arbitrary baudrate is supported when the macro BOTHER is defined. The
+ * baudrate value itself is then stored into the c_ospeed and c_ispeed members.
+ * If ioctls TCGETS2/TCSETS2 are defined and supported then these fields are
+ * present in struct termios2, otherwise these fields are present in struct
+ * termios.
+ *
+ * Note that the Bnnn constants from <termios.h> need not be compatible with Bnnn
+ * constants from <asm/termbits.h>.
+ */
+
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <asm/termbits.h>
+
+#if defined(BOTHER) && defined(TCGETS2)
+#define termios termios2
+#endif
+
+static inline int tcgetattr(int fd, struct termios *t)
+{
+#if defined(BOTHER) && defined(TCGETS2)
+	return ioctl(fd, TCGETS2, t);
+#else
+	return ioctl(fd, TCGETS, t);
+#endif
+}
+
+static inline int tcsetattr(int fd, int a, const struct termios *t)
+{
+	int cmd;
+
+	switch (a) {
+#if defined(BOTHER) && defined(TCGETS2)
+	case TCSANOW:
+		cmd = TCSETS2;
+		break;
+	case TCSADRAIN:
+		cmd = TCSETSW2;
+		break;
+	case TCSAFLUSH:
+		cmd = TCSETSF2;
+		break;
+#else
+	case TCSANOW:
+		cmd = TCSETS;
+		break;
+	case TCSADRAIN:
+		cmd = TCSETSW;
+		break;
+	case TCSAFLUSH:
+		cmd = TCSETSF;
+		break;
+#endif
+	default:
+		errno = EINVAL;
+		return -1;
+	}
+
+	return ioctl(fd, cmd, t);
+}
+
+static inline int tcdrain(int fd)
+{
+	return ioctl(fd, TCSBRK, 1);
+}
+
+static inline int tcflush(int fd, int q)
+{
+	return ioctl(fd, TCFLSH, q);
+}
+
+static inline int tcsendbreak(int fd, int d)
+{
+	return ioctl(fd, TCSBRK, d);
+}
+
+static inline int tcflow(int fd, int a)
+{
+	return ioctl(fd, TCXONC, a);
+}
+
+static inline pid_t tcgetsid(int fd)
+{
+	pid_t sid;
+
+	if (ioctl(fd, TIOCGSID, &sid) < 0)
+		return (pid_t)-1;
+
+	return sid;
+}
+
+static inline speed_t cfgetospeed(const struct termios *t)
+{
+	return t->c_cflag & CBAUD;
+}
+
+static inline int cfsetospeed(struct termios *t, speed_t s)
+{
+	if (s & ~CBAUD) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	t->c_cflag &= ~CBAUD;
+	t->c_cflag |= s;
+
+	return 0;
+}
+
+#ifdef IBSHIFT
+static inline speed_t cfgetispeed(const struct termios *t)
+{
+	speed_t s = (t->c_cflag >> IBSHIFT) & CBAUD;
+
+	if (s == B0)
+		return cfgetospeed(t);
+	else
+		return s;
+}
+
+static inline int cfsetispeed(struct termios *t, speed_t s)
+{
+	if (s == 0)
+		s = B0;
+
+	if (s & ~CBAUD) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	t->c_cflag &= ~(CBAUD << IBSHIFT);
+	t->c_cflag |= s << IBSHIFT;
+
+	return 0;
+}
+#else /* !IBSHIFT */
+static inline speed_t cfgetispeed(const struct termios *t)
+{
+	return cfgetospeed(t);
+}
+
+static inline int cfsetispeed(struct termios *t, speed_t s)
+{
+	return cfsetospeed(t, s);
+}
+#endif /* !IBSHIFT */
+
+static inline int cfsetspeed(struct termios *t, speed_t s)
+{
+	if (cfsetospeed(t, s))
+		return -1;
+#ifdef IBSHIFT
+	if (cfsetispeed(t, s))
+		return -1;
+#endif
+
+	return 0;
+}
+
+static void cfmakeraw(struct termios *t)
+{
+	t->c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR |
+			ICRNL | IXON);
+	t->c_oflag &= ~OPOST;
+	t->c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN);
+	t->c_cflag &= ~(CSIZE | PARENB);
+	t->c_cflag |= CS8;
+}
+
+#endif /* _TERMIOS_LINUX_H_ */
