perf(imx): speed-up console/uart TX using FIFO

The current putc version test for TXEMPTY bit set (#6) instead
of waiting for TXFULL bit clear (#4), that slows the global
boot time as we are not taking benefit of the 32-byte FIFO.

We then need to implement the flush function to be sure the
transmit is complete (FIFO and shift register empty).

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Change-Id: I54873a5203e2afdc230e44ce73284e7a80985b4f
diff --git a/plat/imx/common/imx_uart_console.S b/plat/imx/common/imx_uart_console.S
index ceeb3a7..4d17288 100644
--- a/plat/imx/common/imx_uart_console.S
+++ b/plat/imx/common/imx_uart_console.S
@@ -12,6 +12,7 @@
 
 #define URXD  0x0  /* Receiver Register */
 #define UTXD  0x40 /* Transmitter Register */
+#define USR2  0x98 /* UART Status Register 2 */
 #define UTS   0xb4 /* UART Test Register (mx31) */
 #define  URXD_RX_DATA    (0xFF)
 
@@ -53,13 +54,13 @@
 1:
 	/* Check if the transmit FIFO is full */
 	ldr	w2, [x1, #UTS]
-	tbz	w2, #6, 1b
+	tbnz	w2, #4, 1b
 	mov	w2, #0xD
 	str	w2, [x1, #UTXD]
 2:
 	/* Check if the transmit FIFO is full */
 	ldr	w2, [x1, #UTS]
-	tbz	w2, #6, 2b
+	tbnz	w2, #4, 2b
 	str	w0, [x1, #UTXD]
 	ret
 putc_error:
@@ -84,5 +85,13 @@
 endfunc console_imx_uart_getc
 
 func console_imx_uart_flush
+	ldr	x0, [x0, #CONSOLE_T_BASE]
+	cbz	x0, flush_exit
+1:
+	/* Wait for the transmit complete bit */
+	ldr	w1, [x0, #USR2]
+	tbz	w1, #3, 1b
+
+flush_exit:
 	ret
 endfunc console_imx_uart_flush