blob: aabdc94bb0e24d32ebf909d8230c73ce1ef23aef [file] [log] [blame]
Simon Glass16a624b2017-01-16 07:03:57 -07001/*
2 * (C) Copyright 2008-2011
3 * Graeme Russ, <graeme.russ@gmail.com>
4 *
5 * (C) Copyright 2002
6 * Daniel Engström, Omicron Ceti AB, <daniel@omicron.se>
7 *
8 * (C) Copyright 2002
9 * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
10 * Marius Groeger <mgroeger@sysgo.de>
11 *
12 * (C) Copyright 2002
13 * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
14 * Alex Zuepke <azu@sysgo.de>
15 *
16 * Part of this file is adapted from coreboot
17 * src/arch/x86/lib/cpu.c
18 *
19 * SPDX-License-Identifier: GPL-2.0+
20 */
21
22#include <common.h>
23#include <malloc.h>
24#include <asm/control_regs.h>
25#include <asm/cpu.h>
26#include <asm/mp.h>
27#include <asm/msr.h>
28#include <asm/mtrr.h>
29#include <asm/processor-flags.h>
30
31DECLARE_GLOBAL_DATA_PTR;
32
33/*
34 * Constructor for a conventional segment GDT (or LDT) entry
35 * This is a macro so it can be used in initialisers
36 */
37#define GDT_ENTRY(flags, base, limit) \
38 ((((base) & 0xff000000ULL) << (56-24)) | \
39 (((flags) & 0x0000f0ffULL) << 40) | \
40 (((limit) & 0x000f0000ULL) << (48-16)) | \
41 (((base) & 0x00ffffffULL) << 16) | \
42 (((limit) & 0x0000ffffULL)))
43
44struct gdt_ptr {
45 u16 len;
46 u32 ptr;
47} __packed;
48
49struct cpu_device_id {
50 unsigned vendor;
51 unsigned device;
52};
53
54struct cpuinfo_x86 {
55 uint8_t x86; /* CPU family */
56 uint8_t x86_vendor; /* CPU vendor */
57 uint8_t x86_model;
58 uint8_t x86_mask;
59};
60
61/*
62 * List of cpu vendor strings along with their normalized
63 * id values.
64 */
65static const struct {
66 int vendor;
67 const char *name;
68} x86_vendors[] = {
69 { X86_VENDOR_INTEL, "GenuineIntel", },
70 { X86_VENDOR_CYRIX, "CyrixInstead", },
71 { X86_VENDOR_AMD, "AuthenticAMD", },
72 { X86_VENDOR_UMC, "UMC UMC UMC ", },
73 { X86_VENDOR_NEXGEN, "NexGenDriven", },
74 { X86_VENDOR_CENTAUR, "CentaurHauls", },
75 { X86_VENDOR_RISE, "RiseRiseRise", },
76 { X86_VENDOR_TRANSMETA, "GenuineTMx86", },
77 { X86_VENDOR_TRANSMETA, "TransmetaCPU", },
78 { X86_VENDOR_NSC, "Geode by NSC", },
79 { X86_VENDOR_SIS, "SiS SiS SiS ", },
80};
81
82static void load_ds(u32 segment)
83{
84 asm volatile("movl %0, %%ds" : : "r" (segment * X86_GDT_ENTRY_SIZE));
85}
86
87static void load_es(u32 segment)
88{
89 asm volatile("movl %0, %%es" : : "r" (segment * X86_GDT_ENTRY_SIZE));
90}
91
92static void load_fs(u32 segment)
93{
94 asm volatile("movl %0, %%fs" : : "r" (segment * X86_GDT_ENTRY_SIZE));
95}
96
97static void load_gs(u32 segment)
98{
99 asm volatile("movl %0, %%gs" : : "r" (segment * X86_GDT_ENTRY_SIZE));
100}
101
102static void load_ss(u32 segment)
103{
104 asm volatile("movl %0, %%ss" : : "r" (segment * X86_GDT_ENTRY_SIZE));
105}
106
107static void load_gdt(const u64 *boot_gdt, u16 num_entries)
108{
109 struct gdt_ptr gdt;
110
111 gdt.len = (num_entries * X86_GDT_ENTRY_SIZE) - 1;
112 gdt.ptr = (ulong)boot_gdt;
113
114 asm volatile("lgdtl %0\n" : : "m" (gdt));
115}
116
117void arch_setup_gd(gd_t *new_gd)
118{
119 u64 *gdt_addr;
120
121 gdt_addr = new_gd->arch.gdt;
122
123 /*
124 * CS: code, read/execute, 4 GB, base 0
125 *
126 * Some OS (like VxWorks) requires GDT entry 1 to be the 32-bit CS
127 */
128 gdt_addr[X86_GDT_ENTRY_UNUSED] = GDT_ENTRY(0xc09b, 0, 0xfffff);
129 gdt_addr[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
130
131 /* DS: data, read/write, 4 GB, base 0 */
132 gdt_addr[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
133
134 /* FS: data, read/write, 4 GB, base (Global Data Pointer) */
135 new_gd->arch.gd_addr = new_gd;
136 gdt_addr[X86_GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093,
137 (ulong)&new_gd->arch.gd_addr, 0xfffff);
138
139 /* 16-bit CS: code, read/execute, 64 kB, base 0 */
140 gdt_addr[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x009b, 0, 0x0ffff);
141
142 /* 16-bit DS: data, read/write, 64 kB, base 0 */
143 gdt_addr[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x0093, 0, 0x0ffff);
144
145 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_CS] = GDT_ENTRY(0x809b, 0, 0xfffff);
146 gdt_addr[X86_GDT_ENTRY_16BIT_FLAT_DS] = GDT_ENTRY(0x8093, 0, 0xfffff);
147
148 load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES);
149 load_ds(X86_GDT_ENTRY_32BIT_DS);
150 load_es(X86_GDT_ENTRY_32BIT_DS);
151 load_gs(X86_GDT_ENTRY_32BIT_DS);
152 load_ss(X86_GDT_ENTRY_32BIT_DS);
153 load_fs(X86_GDT_ENTRY_32BIT_FS);
154}
155
156#ifdef CONFIG_HAVE_FSP
157/*
158 * Setup FSP execution environment GDT
159 *
160 * Per Intel FSP external architecture specification, before calling any FSP
161 * APIs, we need make sure the system is in flat 32-bit mode and both the code
162 * and data selectors should have full 4GB access range. Here we reuse the one
163 * we used in arch/x86/cpu/start16.S, and reload the segement registers.
164 */
165void setup_fsp_gdt(void)
166{
167 load_gdt((const u64 *)(gdt_rom + CONFIG_RESET_SEG_START), 4);
168 load_ds(X86_GDT_ENTRY_32BIT_DS);
169 load_ss(X86_GDT_ENTRY_32BIT_DS);
170 load_es(X86_GDT_ENTRY_32BIT_DS);
171 load_fs(X86_GDT_ENTRY_32BIT_DS);
172 load_gs(X86_GDT_ENTRY_32BIT_DS);
173}
174#endif
175
176/*
177 * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
178 * by the fact that they preserve the flags across the division of 5/2.
179 * PII and PPro exhibit this behavior too, but they have cpuid available.
180 */
181
182/*
183 * Perform the Cyrix 5/2 test. A Cyrix won't change
184 * the flags, while other 486 chips will.
185 */
186static inline int test_cyrix_52div(void)
187{
188 unsigned int test;
189
190 __asm__ __volatile__(
191 "sahf\n\t" /* clear flags (%eax = 0x0005) */
192 "div %b2\n\t" /* divide 5 by 2 */
193 "lahf" /* store flags into %ah */
194 : "=a" (test)
195 : "0" (5), "q" (2)
196 : "cc");
197
198 /* AH is 0x02 on Cyrix after the divide.. */
199 return (unsigned char) (test >> 8) == 0x02;
200}
201
202/*
203 * Detect a NexGen CPU running without BIOS hypercode new enough
204 * to have CPUID. (Thanks to Herbert Oppmann)
205 */
206static int deep_magic_nexgen_probe(void)
207{
208 int ret;
209
210 __asm__ __volatile__ (
211 " movw $0x5555, %%ax\n"
212 " xorw %%dx,%%dx\n"
213 " movw $2, %%cx\n"
214 " divw %%cx\n"
215 " movl $0, %%eax\n"
216 " jnz 1f\n"
217 " movl $1, %%eax\n"
218 "1:\n"
219 : "=a" (ret) : : "cx", "dx");
220 return ret;
221}
222
223static bool has_cpuid(void)
224{
225 return flag_is_changeable_p(X86_EFLAGS_ID);
226}
227
228static bool has_mtrr(void)
229{
230 return cpuid_edx(0x00000001) & (1 << 12) ? true : false;
231}
232
233static int build_vendor_name(char *vendor_name)
234{
235 struct cpuid_result result;
236 result = cpuid(0x00000000);
237 unsigned int *name_as_ints = (unsigned int *)vendor_name;
238
239 name_as_ints[0] = result.ebx;
240 name_as_ints[1] = result.edx;
241 name_as_ints[2] = result.ecx;
242
243 return result.eax;
244}
245
246static void identify_cpu(struct cpu_device_id *cpu)
247{
248 char vendor_name[16];
249 int i;
250
251 vendor_name[0] = '\0'; /* Unset */
252 cpu->device = 0; /* fix gcc 4.4.4 warning */
253
254 /* Find the id and vendor_name */
255 if (!has_cpuid()) {
256 /* Its a 486 if we can modify the AC flag */
257 if (flag_is_changeable_p(X86_EFLAGS_AC))
258 cpu->device = 0x00000400; /* 486 */
259 else
260 cpu->device = 0x00000300; /* 386 */
261 if ((cpu->device == 0x00000400) && test_cyrix_52div()) {
262 memcpy(vendor_name, "CyrixInstead", 13);
263 /* If we ever care we can enable cpuid here */
264 }
265 /* Detect NexGen with old hypercode */
266 else if (deep_magic_nexgen_probe())
267 memcpy(vendor_name, "NexGenDriven", 13);
268 }
269 if (has_cpuid()) {
270 int cpuid_level;
271
272 cpuid_level = build_vendor_name(vendor_name);
273 vendor_name[12] = '\0';
274
275 /* Intel-defined flags: level 0x00000001 */
276 if (cpuid_level >= 0x00000001) {
277 cpu->device = cpuid_eax(0x00000001);
278 } else {
279 /* Have CPUID level 0 only unheard of */
280 cpu->device = 0x00000400;
281 }
282 }
283 cpu->vendor = X86_VENDOR_UNKNOWN;
284 for (i = 0; i < ARRAY_SIZE(x86_vendors); i++) {
285 if (memcmp(vendor_name, x86_vendors[i].name, 12) == 0) {
286 cpu->vendor = x86_vendors[i].vendor;
287 break;
288 }
289 }
290}
291
292static inline void get_fms(struct cpuinfo_x86 *c, uint32_t tfms)
293{
294 c->x86 = (tfms >> 8) & 0xf;
295 c->x86_model = (tfms >> 4) & 0xf;
296 c->x86_mask = tfms & 0xf;
297 if (c->x86 == 0xf)
298 c->x86 += (tfms >> 20) & 0xff;
299 if (c->x86 >= 0x6)
300 c->x86_model += ((tfms >> 16) & 0xF) << 4;
301}
302
303u32 cpu_get_family_model(void)
304{
305 return gd->arch.x86_device & 0x0fff0ff0;
306}
307
308u32 cpu_get_stepping(void)
309{
310 return gd->arch.x86_mask;
311}
312
313int x86_cpu_init_f(void)
314{
315 const u32 em_rst = ~X86_CR0_EM;
316 const u32 mp_ne_set = X86_CR0_MP | X86_CR0_NE;
317
318 if (ll_boot_init()) {
319 /* initialize FPU, reset EM, set MP and NE */
320 asm ("fninit\n" \
321 "movl %%cr0, %%eax\n" \
322 "andl %0, %%eax\n" \
323 "orl %1, %%eax\n" \
324 "movl %%eax, %%cr0\n" \
325 : : "i" (em_rst), "i" (mp_ne_set) : "eax");
326 }
327
328 /* identify CPU via cpuid and store the decoded info into gd->arch */
329 if (has_cpuid()) {
330 struct cpu_device_id cpu;
331 struct cpuinfo_x86 c;
332
333 identify_cpu(&cpu);
334 get_fms(&c, cpu.device);
335 gd->arch.x86 = c.x86;
336 gd->arch.x86_vendor = cpu.vendor;
337 gd->arch.x86_model = c.x86_model;
338 gd->arch.x86_mask = c.x86_mask;
339 gd->arch.x86_device = cpu.device;
340
341 gd->arch.has_mtrr = has_mtrr();
342 }
343 /* Don't allow PCI region 3 to use memory in the 2-4GB memory hole */
344 gd->pci_ram_top = 0x80000000U;
345
346 /* Configure fixed range MTRRs for some legacy regions */
347 if (gd->arch.has_mtrr) {
348 u64 mtrr_cap;
349
350 mtrr_cap = native_read_msr(MTRR_CAP_MSR);
351 if (mtrr_cap & MTRR_CAP_FIX) {
352 /* Mark the VGA RAM area as uncacheable */
353 native_write_msr(MTRR_FIX_16K_A0000_MSR,
354 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE),
355 MTRR_FIX_TYPE(MTRR_TYPE_UNCACHEABLE));
356
357 /*
358 * Mark the PCI ROM area as cacheable to improve ROM
359 * execution performance.
360 */
361 native_write_msr(MTRR_FIX_4K_C0000_MSR,
362 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK),
363 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK));
364 native_write_msr(MTRR_FIX_4K_C8000_MSR,
365 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK),
366 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK));
367 native_write_msr(MTRR_FIX_4K_D0000_MSR,
368 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK),
369 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK));
370 native_write_msr(MTRR_FIX_4K_D8000_MSR,
371 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK),
372 MTRR_FIX_TYPE(MTRR_TYPE_WRBACK));
373
374 /* Enable the fixed range MTRRs */
375 msr_setbits_64(MTRR_DEF_TYPE_MSR, MTRR_DEF_TYPE_FIX_EN);
376 }
377 }
378
379#ifdef CONFIG_I8254_TIMER
380 /* Set up the i8254 timer if required */
381 i8254_init();
382#endif
383
384 return 0;
385}
386
387void x86_enable_caches(void)
388{
389 unsigned long cr0;
390
391 cr0 = read_cr0();
392 cr0 &= ~(X86_CR0_NW | X86_CR0_CD);
393 write_cr0(cr0);
394 wbinvd();
395}
396void enable_caches(void) __attribute__((weak, alias("x86_enable_caches")));
397
398void x86_disable_caches(void)
399{
400 unsigned long cr0;
401
402 cr0 = read_cr0();
403 cr0 |= X86_CR0_NW | X86_CR0_CD;
404 wbinvd();
405 write_cr0(cr0);
406 wbinvd();
407}
408void disable_caches(void) __attribute__((weak, alias("x86_disable_caches")));
409
410int dcache_status(void)
411{
412 return !(read_cr0() & X86_CR0_CD);
413}
414
415void cpu_enable_paging_pae(ulong cr3)
416{
417 __asm__ __volatile__(
418 /* Load the page table address */
419 "movl %0, %%cr3\n"
420 /* Enable pae */
421 "movl %%cr4, %%eax\n"
422 "orl $0x00000020, %%eax\n"
423 "movl %%eax, %%cr4\n"
424 /* Enable paging */
425 "movl %%cr0, %%eax\n"
426 "orl $0x80000000, %%eax\n"
427 "movl %%eax, %%cr0\n"
428 :
429 : "r" (cr3)
430 : "eax");
431}
432
433void cpu_disable_paging_pae(void)
434{
435 /* Turn off paging */
436 __asm__ __volatile__ (
437 /* Disable paging */
438 "movl %%cr0, %%eax\n"
439 "andl $0x7fffffff, %%eax\n"
440 "movl %%eax, %%cr0\n"
441 /* Disable pae */
442 "movl %%cr4, %%eax\n"
443 "andl $0xffffffdf, %%eax\n"
444 "movl %%eax, %%cr4\n"
445 :
446 :
447 : "eax");
448}
449
450static bool can_detect_long_mode(void)
451{
452 return cpuid_eax(0x80000000) > 0x80000000UL;
453}
454
455static bool has_long_mode(void)
456{
457 return cpuid_edx(0x80000001) & (1 << 29) ? true : false;
458}
459
460int cpu_has_64bit(void)
461{
462 return has_cpuid() && can_detect_long_mode() &&
463 has_long_mode();
464}
465
466#define PAGETABLE_SIZE (6 * 4096)
467
468/**
469 * build_pagetable() - build a flat 4GiB page table structure for 64-bti mode
470 *
471 * @pgtable: Pointer to a 24iKB block of memory
472 */
473static void build_pagetable(uint32_t *pgtable)
474{
475 uint i;
476
477 memset(pgtable, '\0', PAGETABLE_SIZE);
478
479 /* Level 4 needs a single entry */
480 pgtable[0] = (ulong)&pgtable[1024] + 7;
481
482 /* Level 3 has one 64-bit entry for each GiB of memory */
483 for (i = 0; i < 4; i++)
484 pgtable[1024 + i * 2] = (ulong)&pgtable[2048] + 0x1000 * i + 7;
485
486 /* Level 2 has 2048 64-bit entries, each repesenting 2MiB */
487 for (i = 0; i < 2048; i++)
488 pgtable[2048 + i * 2] = 0x183 + (i << 21UL);
489}
490
491int cpu_jump_to_64bit(ulong setup_base, ulong target)
492{
493 uint32_t *pgtable;
494
495 pgtable = memalign(4096, PAGETABLE_SIZE);
496 if (!pgtable)
497 return -ENOMEM;
498
499 build_pagetable(pgtable);
500 cpu_call64((ulong)pgtable, setup_base, target);
501 free(pgtable);
502
503 return -EFAULT;
504}
505
Simon Glass1e32ede2017-01-16 07:04:15 -0700506/*
507 * Jump from SPL to U-Boot
508 *
509 * This function is work-in-progress with many issues to resolve.
510 *
511 * It works by setting up several regions:
512 * ptr - a place to put the code that jumps into 64-bit mode
513 * gdt - a place to put the global descriptor table
514 * pgtable - a place to put the page tables
515 *
516 * The cpu_call64() code is copied from ROM and then manually patched so that
517 * it has the correct GDT address in RAM. U-Boot is copied from ROM into
518 * its pre-relocation address. Then we jump to the cpu_call64() code in RAM,
519 * which changes to 64-bit mode and starts U-Boot.
520 */
521int cpu_jump_to_64bit_uboot(ulong target)
522{
523 typedef void (*func_t)(ulong pgtable, ulong setup_base, ulong target);
524 uint32_t *pgtable;
525 func_t func;
526
527 /* TODO(sjg@chromium.org): Find a better place for this */
528 pgtable = (uint32_t *)0x1000000;
529 if (!pgtable)
530 return -ENOMEM;
531
532 build_pagetable(pgtable);
533
534 /* TODO(sjg@chromium.org): Find a better place for this */
535 char *ptr = (char *)0x3000000;
536 char *gdt = (char *)0x3100000;
537
538 extern char gdt64[];
539
540 memcpy(ptr, cpu_call64, 0x1000);
541 memcpy(gdt, gdt64, 0x100);
542
543 /*
544 * TODO(sjg@chromium.org): This manually inserts the pointers into
545 * the code. Tidy this up to avoid this.
546 */
547 func = (func_t)ptr;
548 ulong ofs = (ulong)cpu_call64 - (ulong)ptr;
549 *(ulong *)(ptr + 7) = (ulong)gdt;
550 *(ulong *)(ptr + 0xc) = (ulong)gdt + 2;
551 *(ulong *)(ptr + 0x13) = (ulong)gdt;
552 *(ulong *)(ptr + 0x117 - 0xd4) -= ofs;
553
554 /*
555 * Copy U-Boot from ROM
556 * TODO(sjg@chromium.org): Figure out a way to get the text base
557 * correctly here, and in the device-tree binman definition.
558 *
559 * Also consider using FIT so we get the correct image length and
560 * parameters.
561 */
562 memcpy((char *)target, (char *)0xfff00000, 0x100000);
563
564 /* Jump to U-Boot */
565 func((ulong)pgtable, 0, (ulong)target);
566
567 return -EFAULT;
568}
569
Simon Glass16a624b2017-01-16 07:03:57 -0700570#ifdef CONFIG_SMP
571static int enable_smis(struct udevice *cpu, void *unused)
572{
573 return 0;
574}
575
576static struct mp_flight_record mp_steps[] = {
577 MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL),
578 /* Wait for APs to finish initialization before proceeding */
579 MP_FR_BLOCK_APS(NULL, NULL, enable_smis, NULL),
580};
581
582int x86_mp_init(void)
583{
584 struct mp_params mp_params;
585
586 mp_params.parallel_microcode_load = 0,
587 mp_params.flight_plan = &mp_steps[0];
588 mp_params.num_records = ARRAY_SIZE(mp_steps);
589 mp_params.microcode_pointer = 0;
590
591 if (mp_init(&mp_params)) {
592 printf("Warning: MP init failure\n");
593 return -EIO;
594 }
595
596 return 0;
597}
598#endif