arm64: Reduce PT size estimation complexity
count_required_pts()'s complexity is high if mappings are not using the
largest possible block size (due to some other requirement such as tracking
dirty pages, for example).
Let's switch to a method that follows the pattern established with
the add_map() helper, and make it almost instantaneous instead of
taking a large amount of time if 2MB mappings are in use instead of
1GB.
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Pierre-Clément Tosi <ptosi@google.com>
[ Paul: pick from the Android tree. Fixup Pierre's commit. Rebase to the
upstream ]
Signed-off-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org>
Cc: Tom Rini <trini@konsulko.com>
Link: https://android.googlesource.com/platform/external/u-boot/+/5d756d147e31a1cdaaa261a50e526404ca5968f5
Link: https://android.googlesource.com/platform/external/u-boot/+/6be9330601d81545c7c941e3609f35bf68a09059
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 876344e..6973340 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -352,98 +352,57 @@
(u64 *)gd->arch.tlb_addr, attrs);
}
-enum pte_type {
- PTE_INVAL,
- PTE_BLOCK,
- PTE_LEVEL,
-};
-
-/*
- * This is a recursively called function to count the number of
- * page tables we need to cover a particular PTE range. If you
- * call this with level = -1 you basically get the full 48 bit
- * coverage.
- */
-static int count_required_pts(u64 addr, int level, u64 maxaddr)
+static void count_range(u64 virt, u64 size, int level, int *cntp)
{
- int levelshift = level2shift(level);
- u64 levelsize = 1ULL << levelshift;
- u64 levelmask = levelsize - 1;
- u64 levelend = addr + levelsize;
- int r = 0;
- int i;
- enum pte_type pte_type = PTE_INVAL;
+ u64 map_size = BIT_ULL(level2shift(level));
+ int i, idx;
- for (i = 0; mem_map[i].size || mem_map[i].attrs; i++) {
- struct mm_region *map = &mem_map[i];
- u64 start = map->virt;
- u64 end = start + map->size;
+ idx = (virt >> level2shift(level)) & (MAX_PTE_ENTRIES - 1);
+ for (i = idx; size; i++) {
+ u64 next_size;
- /* Check if the PTE would overlap with the map */
- if (max(addr, start) <= min(levelend, end)) {
- start = max(addr, start);
- end = min(levelend, end);
+ if (level >= 1 &&
+ size >= map_size && !(virt & (map_size - 1))) {
+ virt += map_size;
+ size -= map_size;
- /* We need a sub-pt for this level */
- if ((start & levelmask) || (end & levelmask)) {
- pte_type = PTE_LEVEL;
- break;
- }
+ continue;
+ }
- /* Lv0 can not do block PTEs, so do levels here too */
- if (level <= 0) {
- pte_type = PTE_LEVEL;
- break;
- }
+ /* Going one level down */
+ (*cntp)++;
+ next_size = min(map_size - (virt & (map_size - 1)), size);
- /* PTE is active, but fits into a block */
- pte_type = PTE_BLOCK;
- }
- }
+ count_range(virt, next_size, level + 1, cntp);
- /*
- * Block PTEs at this level are already covered by the parent page
- * table, so we only need to count sub page tables.
- */
- if (pte_type == PTE_LEVEL) {
- int sublevel = level + 1;
- u64 sublevelsize = 1ULL << level2shift(sublevel);
+ virt += next_size;
+ size -= next_size;
+ }
+}
- /* Account for the new sub page table ... */
- r = 1;
+static int count_ranges(void)
+{
+ int i, count = 0, level = 0;
+ u64 va_bits;
- /* ... and for all child page tables that one might have */
- for (i = 0; i < MAX_PTE_ENTRIES; i++) {
- r += count_required_pts(addr, sublevel, maxaddr);
- addr += sublevelsize;
+ get_tcr(NULL, &va_bits);
+ if (va_bits < 39)
+ level = 1;
- if (addr >= maxaddr) {
- /*
- * We reached the end of address space, no need
- * to look any further.
- */
- break;
- }
- }
- }
+ for (i = 0; mem_map[i].size || mem_map[i].attrs; i++)
+ count_range(mem_map[i].virt, mem_map[i].size, level, &count);
- return r;
+ return count;
}
/* Returns the estimated required size of all page tables */
__weak u64 get_page_table_size(void)
{
u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
- u64 size = 0;
- u64 va_bits;
- int start_level = 0;
-
- get_tcr(NULL, &va_bits);
- if (va_bits < 39)
- start_level = 1;
+ u64 size;
/* Account for all page tables we would need to cover our memory map */
- size = one_pt * count_required_pts(0, start_level - 1, 1ULL << va_bits);
+ size = one_pt * count_ranges();
/*
* We need to duplicate our page table once to have an emergency pt to