blob: d298f2b6674933ac64bd62625a3b8df86b0e7283 [file] [log] [blame]
Achin Gupta4f6ad662013-10-25 09:08:21 +01001/*
Varun Wadekar17425d32019-01-10 15:46:34 -08002 * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
Achin Gupta4f6ad662013-10-25 09:08:21 +01003 *
dp-armfa3cf0b2017-05-03 09:38:09 +01004 * SPDX-License-Identifier: BSD-3-Clause
Achin Gupta4f6ad662013-10-25 09:08:21 +01005 */
6
Dan Handley2bd4ef22014-04-09 13:14:54 +01007#include <arch.h>
Andrew Thoelke38bde412014-03-18 13:46:55 +00008#include <asm_macros.S>
Soby Mathew041f62a2014-07-14 16:58:03 +01009#include <assert_macros.S>
Varun Wadekar17425d32019-01-10 15:46:34 -080010#include <common/bl_common.h>
Antonio Nino Diaze0f90632018-12-14 00:18:21 +000011#include <lib/xlat_tables/xlat_tables_defs.h>
Achin Gupta4a826dd2013-11-25 14:00:56 +000012
Achin Gupta4f6ad662013-10-25 09:08:21 +010013 .globl smc
14
Douglas Raillard21362a92016-12-02 13:51:54 +000015 .globl zero_normalmem
16 .globl zeromem
Sandrine Bailleux65f546a2013-11-28 09:43:06 +000017 .globl memcpy16
Achin Gupta4f6ad662013-10-25 09:08:21 +010018
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +010019 .globl disable_mmu_el1
Andrew Thoelke438c63a2014-04-28 12:06:18 +010020 .globl disable_mmu_el3
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +010021 .globl disable_mmu_icache_el1
Andrew Thoelke438c63a2014-04-28 12:06:18 +010022 .globl disable_mmu_icache_el3
Soby Mathew4e28c202018-10-14 08:09:22 +010023 .globl fixup_gdt_reloc
Andrew Thoelke3f78dc32014-06-02 15:44:43 +010024#if SUPPORT_VFP
25 .globl enable_vfp
26#endif
27
Andrew Thoelke38bde412014-03-18 13:46:55 +000028func smc
Achin Gupta4f6ad662013-10-25 09:08:21 +010029 smc #0
Kévin Petita877c252015-03-24 14:03:57 +000030endfunc smc
Sandrine Bailleux65f546a2013-11-28 09:43:06 +000031
32/* -----------------------------------------------------------------------
Douglas Raillard21362a92016-12-02 13:51:54 +000033 * void zero_normalmem(void *mem, unsigned int length);
34 *
35 * Initialise a region in normal memory to 0. This functions complies with the
36 * AAPCS and can be called from C code.
37 *
38 * NOTE: MMU must be enabled when using this function as it can only operate on
39 * normal memory. It is intended to be mainly used from C code when MMU
40 * is usually enabled.
41 * -----------------------------------------------------------------------
42 */
43.equ zero_normalmem, zeromem_dczva
44
45/* -----------------------------------------------------------------------
46 * void zeromem(void *mem, unsigned int length);
47 *
48 * Initialise a region of device memory to 0. This functions complies with the
49 * AAPCS and can be called from C code.
50 *
51 * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
52 * used instead for faster zeroing.
53 *
54 * -----------------------------------------------------------------------
55 */
56func zeromem
57 /* x2 is the address past the last zeroed address */
58 add x2, x0, x1
59 /*
60 * Uses the fallback path that does not use DC ZVA instruction and
61 * therefore does not need enabled MMU
62 */
63 b .Lzeromem_dczva_fallback_entry
64endfunc zeromem
65
66/* -----------------------------------------------------------------------
67 * void zeromem_dczva(void *mem, unsigned int length);
68 *
69 * Fill a region of normal memory of size "length" in bytes with null bytes.
70 * MMU must be enabled and the memory be of
71 * normal type. This is because this function internally uses the DC ZVA
72 * instruction, which generates an Alignment fault if used on any type of
73 * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
74 * is disabled, all memory behaves like Device-nGnRnE memory (see section
75 * D4.2.8), hence the requirement on the MMU being enabled.
76 * NOTE: The code assumes that the block size as defined in DCZID_EL0
77 * register is at least 16 bytes.
78 *
79 * -----------------------------------------------------------------------
80 */
81func zeromem_dczva
82
83 /*
84 * The function consists of a series of loops that zero memory one byte
85 * at a time, 16 bytes at a time or using the DC ZVA instruction to
86 * zero aligned block of bytes, which is assumed to be more than 16.
87 * In the case where the DC ZVA instruction cannot be used or if the
88 * first 16 bytes loop would overflow, there is fallback path that does
89 * not use DC ZVA.
90 * Note: The fallback path is also used by the zeromem function that
91 * branches to it directly.
92 *
93 * +---------+ zeromem_dczva
94 * | entry |
95 * +----+----+
96 * |
97 * v
98 * +---------+
99 * | checks |>o-------+ (If any check fails, fallback)
100 * +----+----+ |
101 * | |---------------+
102 * v | Fallback path |
103 * +------+------+ |---------------+
104 * | 1 byte loop | |
105 * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
106 * | |
107 * v |
108 * +-------+-------+ |
109 * | 16 bytes loop | |
110 * +-------+-------+ |
111 * | |
112 * v |
113 * +------+------+ .Lzeromem_dczva_blocksize_aligned
114 * | DC ZVA loop | |
115 * +------+------+ |
116 * +--------+ | |
117 * | | | |
118 * | v v |
119 * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
120 * | | 16 bytes loop | |
121 * | +-------+-------+ |
122 * | | |
123 * | v |
124 * | +------+------+ .Lzeromem_dczva_final_1byte_aligned
125 * | | 1 byte loop | |
126 * | +-------------+ |
127 * | | |
128 * | v |
129 * | +---+--+ |
130 * | | exit | |
131 * | +------+ |
132 * | |
133 * | +--------------+ +------------------+ zeromem
134 * | | +----------------| zeromem function |
135 * | | | +------------------+
136 * | v v
137 * | +-------------+ .Lzeromem_dczva_fallback_entry
138 * | | 1 byte loop |
139 * | +------+------+
140 * | |
141 * +-----------+
142 */
143
144 /*
145 * Readable names for registers
146 *
147 * Registers x0, x1 and x2 are also set by zeromem which
148 * branches into the fallback path directly, so cursor, length and
149 * stop_address should not be retargeted to other registers.
150 */
151 cursor .req x0 /* Start address and then current address */
152 length .req x1 /* Length in bytes of the region to zero out */
153 /* Reusing x1 as length is never used after block_mask is set */
154 block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */
155 stop_address .req x2 /* Address past the last zeroed byte */
156 block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
157 tmp1 .req x4
158 tmp2 .req x5
159
Antonio Nino Diaz7c65c1e2017-04-20 09:58:28 +0100160#if ENABLE_ASSERTIONS
Douglas Raillard21362a92016-12-02 13:51:54 +0000161 /*
162 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
163 * register value and panic if the MMU is disabled.
164 */
Masahiro Yamadaffe92542018-02-01 13:17:29 +0900165#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && BL2_AT_EL3)
Douglas Raillard21362a92016-12-02 13:51:54 +0000166 mrs tmp1, sctlr_el3
167#else
168 mrs tmp1, sctlr_el1
Soby Mathew041f62a2014-07-14 16:58:03 +0100169#endif
Douglas Raillard21362a92016-12-02 13:51:54 +0000170
171 tst tmp1, #SCTLR_M_BIT
172 ASM_ASSERT(ne)
Antonio Nino Diaz7c65c1e2017-04-20 09:58:28 +0100173#endif /* ENABLE_ASSERTIONS */
Douglas Raillard21362a92016-12-02 13:51:54 +0000174
175 /* stop_address is the address past the last to zero */
176 add stop_address, cursor, length
177
178 /*
179 * Get block_size = (log2(<block size>) >> 2) (see encoding of
180 * dczid_el0 reg)
181 */
182 mrs block_size, dczid_el0
183
184 /*
185 * Select the 4 lowest bits and convert the extracted log2(<block size
186 * in words>) to <block size in bytes>
187 */
188 ubfx block_size, block_size, #0, #4
189 mov tmp2, #(1 << 2)
190 lsl block_size, tmp2, block_size
191
Antonio Nino Diaz7c65c1e2017-04-20 09:58:28 +0100192#if ENABLE_ASSERTIONS
Douglas Raillard21362a92016-12-02 13:51:54 +0000193 /*
194 * Assumes block size is at least 16 bytes to avoid manual realignment
195 * of the cursor at the end of the DCZVA loop.
196 */
197 cmp block_size, #16
198 ASM_ASSERT(hs)
199#endif
200 /*
201 * Not worth doing all the setup for a region less than a block and
202 * protects against zeroing a whole block when the area to zero is
203 * smaller than that. Also, as it is assumed that the block size is at
204 * least 16 bytes, this also protects the initial aligning loops from
205 * trying to zero 16 bytes when length is less than 16.
206 */
207 cmp length, block_size
208 b.lo .Lzeromem_dczva_fallback_entry
209
210 /*
211 * Calculate the bitmask of the block alignment. It will never
212 * underflow as the block size is between 4 bytes and 2kB.
213 * block_mask = block_size - 1
214 */
215 sub block_mask, block_size, #1
216
217 /*
218 * length alias should not be used after this point unless it is
219 * defined as a register other than block_mask's.
220 */
221 .unreq length
222
223 /*
224 * If the start address is already aligned to zero block size, go
225 * straight to the cache zeroing loop. This is safe because at this
226 * point, the length cannot be smaller than a block size.
227 */
228 tst cursor, block_mask
229 b.eq .Lzeromem_dczva_blocksize_aligned
230
231 /*
232 * Calculate the first block-size-aligned address. It is assumed that
233 * the zero block size is at least 16 bytes. This address is the last
234 * address of this initial loop.
235 */
236 orr tmp1, cursor, block_mask
237 add tmp1, tmp1, #1
238
239 /*
240 * If the addition overflows, skip the cache zeroing loops. This is
241 * quite unlikely however.
242 */
243 cbz tmp1, .Lzeromem_dczva_fallback_entry
244
245 /*
246 * If the first block-size-aligned address is past the last address,
247 * fallback to the simpler code.
248 */
249 cmp tmp1, stop_address
250 b.hi .Lzeromem_dczva_fallback_entry
251
252 /*
253 * If the start address is already aligned to 16 bytes, skip this loop.
254 * It is safe to do this because tmp1 (the stop address of the initial
255 * 16 bytes loop) will never be greater than the final stop address.
256 */
257 tst cursor, #0xf
258 b.eq .Lzeromem_dczva_initial_1byte_aligned_end
259
260 /* Calculate the next address aligned to 16 bytes */
261 orr tmp2, cursor, #0xf
262 add tmp2, tmp2, #1
263 /* If it overflows, fallback to the simple path (unlikely) */
264 cbz tmp2, .Lzeromem_dczva_fallback_entry
265 /*
266 * Next aligned address cannot be after the stop address because the
267 * length cannot be smaller than 16 at this point.
268 */
269
270 /* First loop: zero byte per byte */
2711:
272 strb wzr, [cursor], #1
273 cmp cursor, tmp2
274 b.ne 1b
275.Lzeromem_dczva_initial_1byte_aligned_end:
276
277 /*
278 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
279 * before being able to use the code that deals with block-size-aligned
280 * addresses.
281 */
282 cmp cursor, tmp1
283 b.hs 2f
2841:
285 stp xzr, xzr, [cursor], #16
286 cmp cursor, tmp1
287 b.lo 1b
2882:
289
290 /*
291 * Third loop: zero a block at a time using DC ZVA cache block zeroing
292 * instruction.
293 */
294.Lzeromem_dczva_blocksize_aligned:
295 /*
296 * Calculate the last block-size-aligned address. If the result equals
297 * to the start address, the loop will exit immediately.
298 */
299 bic tmp1, stop_address, block_mask
300
301 cmp cursor, tmp1
302 b.hs 2f
3031:
304 /* Zero the block containing the cursor */
305 dc zva, cursor
306 /* Increment the cursor by the size of a block */
307 add cursor, cursor, block_size
308 cmp cursor, tmp1
309 b.lo 1b
3102:
311
312 /*
313 * Fourth loop: zero 16 bytes at a time and then byte per byte the
314 * remaining area
315 */
316.Lzeromem_dczva_final_16bytes_aligned:
317 /*
318 * Calculate the last 16 bytes aligned address. It is assumed that the
319 * block size will never be smaller than 16 bytes so that the current
320 * cursor is aligned to at least 16 bytes boundary.
321 */
322 bic tmp1, stop_address, #15
323
324 cmp cursor, tmp1
325 b.hs 2f
3261:
327 stp xzr, xzr, [cursor], #16
328 cmp cursor, tmp1
329 b.lo 1b
3302:
331
332 /* Fifth and final loop: zero byte per byte */
333.Lzeromem_dczva_final_1byte_aligned:
334 cmp cursor, stop_address
335 b.eq 2f
3361:
337 strb wzr, [cursor], #1
338 cmp cursor, stop_address
339 b.ne 1b
3402:
Kévin Petita877c252015-03-24 14:03:57 +0000341 ret
Douglas Raillard21362a92016-12-02 13:51:54 +0000342
343 /* Fallback for unaligned start addresses */
344.Lzeromem_dczva_fallback_entry:
345 /*
346 * If the start address is already aligned to 16 bytes, skip this loop.
347 */
348 tst cursor, #0xf
349 b.eq .Lzeromem_dczva_final_16bytes_aligned
350
351 /* Calculate the next address aligned to 16 bytes */
352 orr tmp1, cursor, #15
353 add tmp1, tmp1, #1
354 /* If it overflows, fallback to byte per byte zeroing */
355 cbz tmp1, .Lzeromem_dczva_final_1byte_aligned
356 /* If the next aligned address is after the stop address, fall back */
357 cmp tmp1, stop_address
358 b.hs .Lzeromem_dczva_final_1byte_aligned
359
360 /* Fallback entry loop: zero byte per byte */
3611:
362 strb wzr, [cursor], #1
363 cmp cursor, tmp1
364 b.ne 1b
365
366 b .Lzeromem_dczva_final_16bytes_aligned
Sandrine Bailleux65f546a2013-11-28 09:43:06 +0000367
Douglas Raillard21362a92016-12-02 13:51:54 +0000368 .unreq cursor
369 /*
370 * length is already unreq'ed to reuse the register for another
371 * variable.
372 */
373 .unreq stop_address
374 .unreq block_size
375 .unreq block_mask
376 .unreq tmp1
377 .unreq tmp2
378endfunc zeromem_dczva
Sandrine Bailleux65f546a2013-11-28 09:43:06 +0000379
380/* --------------------------------------------------------------------------
381 * void memcpy16(void *dest, const void *src, unsigned int length)
382 *
383 * Copy length bytes from memory area src to memory area dest.
384 * The memory areas should not overlap.
385 * Destination and source addresses must be 16-byte aligned.
386 * --------------------------------------------------------------------------
387 */
Andrew Thoelke38bde412014-03-18 13:46:55 +0000388func memcpy16
Antonio Nino Diaz7c65c1e2017-04-20 09:58:28 +0100389#if ENABLE_ASSERTIONS
Soby Mathew041f62a2014-07-14 16:58:03 +0100390 orr x3, x0, x1
391 tst x3, #0xf
392 ASM_ASSERT(eq)
393#endif
Sandrine Bailleux65f546a2013-11-28 09:43:06 +0000394/* copy 16 bytes at a time */
395m_loop16:
396 cmp x2, #16
Douglas Raillard861be8e2016-12-02 13:56:06 +0000397 b.lo m_loop1
Sandrine Bailleux65f546a2013-11-28 09:43:06 +0000398 ldp x3, x4, [x1], #16
399 stp x3, x4, [x0], #16
400 sub x2, x2, #16
401 b m_loop16
402/* copy byte per byte */
403m_loop1:
404 cbz x2, m_end
405 ldrb w3, [x1], #1
406 strb w3, [x0], #1
407 subs x2, x2, #1
408 b.ne m_loop1
Kévin Petita877c252015-03-24 14:03:57 +0000409m_end:
410 ret
411endfunc memcpy16
Andrew Thoelke438c63a2014-04-28 12:06:18 +0100412
413/* ---------------------------------------------------------------------------
414 * Disable the MMU at EL3
Andrew Thoelke438c63a2014-04-28 12:06:18 +0100415 * ---------------------------------------------------------------------------
416 */
417
418func disable_mmu_el3
419 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +0100420do_disable_mmu_el3:
Andrew Thoelke438c63a2014-04-28 12:06:18 +0100421 mrs x0, sctlr_el3
422 bic x0, x0, x1
423 msr sctlr_el3, x0
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +0100424 isb /* ensure MMU is off */
Achin Guptae9c4a642015-09-11 16:03:13 +0100425 dsb sy
426 ret
Kévin Petita877c252015-03-24 14:03:57 +0000427endfunc disable_mmu_el3
Andrew Thoelke438c63a2014-04-28 12:06:18 +0100428
429
430func disable_mmu_icache_el3
431 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +0100432 b do_disable_mmu_el3
Kévin Petita877c252015-03-24 14:03:57 +0000433endfunc disable_mmu_icache_el3
Andrew Thoelke438c63a2014-04-28 12:06:18 +0100434
Andrew Thoelke3f78dc32014-06-02 15:44:43 +0100435/* ---------------------------------------------------------------------------
Antonio Nino Diaz4613d5f2017-10-05 15:19:42 +0100436 * Disable the MMU at EL1
437 * ---------------------------------------------------------------------------
438 */
439
440func disable_mmu_el1
441 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
442do_disable_mmu_el1:
443 mrs x0, sctlr_el1
444 bic x0, x0, x1
445 msr sctlr_el1, x0
446 isb /* ensure MMU is off */
447 dsb sy
448 ret
449endfunc disable_mmu_el1
450
451
452func disable_mmu_icache_el1
453 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
454 b do_disable_mmu_el1
455endfunc disable_mmu_icache_el1
456
457/* ---------------------------------------------------------------------------
Andrew Thoelke3f78dc32014-06-02 15:44:43 +0100458 * Enable the use of VFP at EL3
459 * ---------------------------------------------------------------------------
460 */
461#if SUPPORT_VFP
462func enable_vfp
463 mrs x0, cpacr_el1
464 orr x0, x0, #CPACR_VFP_BITS
465 msr cpacr_el1, x0
466 mrs x0, cptr_el3
467 mov x1, #AARCH64_CPTR_TFP
468 bic x0, x0, x1
469 msr cptr_el3, x0
470 isb
471 ret
Kévin Petita877c252015-03-24 14:03:57 +0000472endfunc enable_vfp
Andrew Thoelke3f78dc32014-06-02 15:44:43 +0100473#endif
Soby Mathew4e28c202018-10-14 08:09:22 +0100474
475/* ---------------------------------------------------------------------------
476 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
477 * (.rela.dyn) at runtime.
478 *
479 * This function is meant to be used when the firmware is compiled with -fpie
480 * and linked with -pie options. We rely on the linker script exporting
481 * appropriate markers for start and end of the section. For GOT, we
482 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
483 * __RELA_START__ and __RELA_END__.
484 *
485 * The function takes the limits of the memory to apply fixups to as
486 * arguments (which is usually the limits of the relocable BL image).
487 * x0 - the start of the fixup region
488 * x1 - the limit of the fixup region
489 * These addresses have to be page (4KB aligned).
490 * ---------------------------------------------------------------------------
491 */
492func fixup_gdt_reloc
493 mov x6, x0
494 mov x7, x1
495
496 /* Test if the limits are 4K aligned */
497#if ENABLE_ASSERTIONS
498 orr x0, x0, x1
499 tst x0, #(PAGE_SIZE - 1)
500 ASM_ASSERT(eq)
501#endif
502 /*
503 * Calculate the offset based on return address in x30.
Louis Mayencourt462ec472019-03-01 14:36:46 +0000504 * Assume that this function is called within a page at the start of
505 * fixup region.
Soby Mathew4e28c202018-10-14 08:09:22 +0100506 */
507 and x2, x30, #~(PAGE_SIZE - 1)
508 sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */
509
510 adrp x1, __GOT_START__
511 add x1, x1, :lo12:__GOT_START__
512 adrp x2, __GOT_END__
513 add x2, x2, :lo12:__GOT_END__
514
515 /*
516 * GOT is an array of 64_bit addresses which must be fixed up as
517 * new_addr = old_addr + Diff(S).
518 * The new_addr is the address currently the binary is executing from
519 * and old_addr is the address at compile time.
520 */
5211:
522 ldr x3, [x1]
523 /* Skip adding offset if address is < lower limit */
524 cmp x3, x6
525 b.lo 2f
526 /* Skip adding offset if address is >= upper limit */
527 cmp x3, x7
528 b.ge 2f
529 add x3, x3, x0
530 str x3, [x1]
5312:
532 add x1, x1, #8
533 cmp x1, x2
534 b.lo 1b
535
536 /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
537 adrp x1, __RELA_START__
538 add x1, x1, :lo12:__RELA_START__
539 adrp x2, __RELA_END__
540 add x2, x2, :lo12:__RELA_END__
541 /*
542 * According to ELF-64 specification, the RELA data structure is as
543 * follows:
544 * typedef struct
545 * {
546 * Elf64_Addr r_offset;
547 * Elf64_Xword r_info;
548 * Elf64_Sxword r_addend;
549 * } Elf64_Rela;
550 *
551 * r_offset is address of reference
552 * r_info is symbol index and type of relocation (in this case
Louis Mayencourt462ec472019-03-01 14:36:46 +0000553 * 0x403 which corresponds to R_AARCH64_RELATIVE).
Soby Mathew4e28c202018-10-14 08:09:22 +0100554 * r_addend is constant part of expression.
555 *
556 * Size of Elf64_Rela structure is 24 bytes.
557 */
5581:
Louis Mayencourt462ec472019-03-01 14:36:46 +0000559 /* Assert that the relocation type is R_AARCH64_RELATIVE */
Soby Mathew4e28c202018-10-14 08:09:22 +0100560#if ENABLE_ASSERTIONS
561 ldr x3, [x1, #8]
562 cmp x3, #0x403
563 ASM_ASSERT(eq)
564#endif
565 ldr x3, [x1] /* r_offset */
566 add x3, x0, x3
567 ldr x4, [x1, #16] /* r_addend */
568
569 /* Skip adding offset if r_addend is < lower limit */
570 cmp x4, x6
571 b.lo 2f
572 /* Skip adding offset if r_addend entry is >= upper limit */
573 cmp x4, x7
574 b.ge 2f
575
576 add x4, x0, x4 /* Diff(S) + r_addend */
577 str x4, [x3]
578
5792: add x1, x1, #24
580 cmp x1, x2
581 b.lo 1b
582
583 ret
584endfunc fixup_gdt_reloc