drivers/crypto/fsl : Allocate output ring with size aligned to CACHELNE SIZE

The output ring needs to be invalidated before enqueuing the job to SEC.
While allocation of space to output ring, it should be taken care that the
size is cacheline size aligned inorder to prevent invalidating valid data.

The patch also correct the method of aligning end of structs while flushing caches

    Since start = align(start_of_struct), it is incorrect to assign
    end = align(start + struct_size). It should instead be,
    end = align(start_of_struct + struct_size).

Signed-off-by: Saksham Jain <saksham@nxp.com>
Signed-off-by: Ruchika Gupta <ruchika.gupta@nxp.com>
Reviewed-by: York Sun <york.sun@nxp.com>
diff --git a/drivers/crypto/fsl/jr.c b/drivers/crypto/fsl/jr.c
index b553e3c..4566ec3 100644
--- a/drivers/crypto/fsl/jr.c
+++ b/drivers/crypto/fsl/jr.c
@@ -95,14 +95,16 @@
 				JR_SIZE * sizeof(dma_addr_t));
 	if (!jr.input_ring)
 		return -1;
+
+	jr.op_size = roundup(JR_SIZE * sizeof(struct op_ring),
+			     ARCH_DMA_MINALIGN);
 	jr.output_ring =
-	    (struct op_ring *)memalign(ARCH_DMA_MINALIGN,
-				JR_SIZE * sizeof(struct op_ring));
+	    (struct op_ring *)memalign(ARCH_DMA_MINALIGN, jr.op_size);
 	if (!jr.output_ring)
 		return -1;
 
 	memset(jr.input_ring, 0, JR_SIZE * sizeof(dma_addr_t));
-	memset(jr.output_ring, 0, JR_SIZE * sizeof(struct op_ring));
+	memset(jr.output_ring, 0, jr.op_size);
 
 	start_jr0();
 
@@ -190,8 +192,8 @@
 
 	unsigned long start = (unsigned long)&jr.info[head] &
 					~(ARCH_DMA_MINALIGN - 1);
-	unsigned long end = ALIGN(start + sizeof(struct jr_info),
-					ARCH_DMA_MINALIGN);
+	unsigned long end = ALIGN((unsigned long)&jr.info[head] +
+				  sizeof(struct jr_info), ARCH_DMA_MINALIGN);
 	flush_dcache_range(start, end);
 
 #ifdef CONFIG_PHYS_64BIT
@@ -216,11 +218,19 @@
 #endif /* ifdef CONFIG_PHYS_64BIT */
 
 	start = (unsigned long)&jr.input_ring[head] & ~(ARCH_DMA_MINALIGN - 1);
-	end = ALIGN(start + sizeof(phys_addr_t), ARCH_DMA_MINALIGN);
+	end = ALIGN((unsigned long)&jr.input_ring[head] +
+		     sizeof(dma_addr_t), ARCH_DMA_MINALIGN);
 	flush_dcache_range(start, end);
 
 	jr.head = (head + 1) & (jr.size - 1);
 
+	/* Invalidate output ring */
+	start = (unsigned long)jr.output_ring &
+					~(ARCH_DMA_MINALIGN - 1);
+	end = ALIGN((unsigned long)jr.output_ring + jr.op_size,
+		     ARCH_DMA_MINALIGN);
+	invalidate_dcache_range(start, end);
+
 	sec_out32(&regs->irja, 1);
 
 	return 0;
@@ -241,12 +251,6 @@
 #endif
 
 	while (sec_in32(&regs->orsf) && CIRC_CNT(jr.head, jr.tail, jr.size)) {
-		unsigned long start = (unsigned long)jr.output_ring &
-					~(ARCH_DMA_MINALIGN - 1);
-		unsigned long end = ALIGN(start +
-					  sizeof(struct op_ring)*JR_SIZE,
-					  ARCH_DMA_MINALIGN);
-		invalidate_dcache_range(start, end);
 
 		found = 0;
 
diff --git a/drivers/crypto/fsl/jr.h b/drivers/crypto/fsl/jr.h
index 5899696..545d964 100644
--- a/drivers/crypto/fsl/jr.h
+++ b/drivers/crypto/fsl/jr.h
@@ -72,6 +72,8 @@
 	int write_idx;
 	/* Size of the rings. */
 	int size;
+	/* Op ring size aligned to cache line size */
+	int op_size;
 	/* The ip and output rings have to be accessed by SEC. So the
 	 * pointers will ahve to point to the housekeeping region provided
 	 * by SEC