Hey $potential_committer, here's the patch. As you'll notice in pmap_init_pat() +#ifdef notyet + if (cpu_vendor_id != CPU_VENDOR_INTEL || + (I386_CPU_FAMILY(cpu_id) == 6 && I386_CPU_MODEL(cpu_id) >= 0xe)) { +#else + if (!0) { +#endif so we need at least I386_CPU_{FAMILY,MODEL} (to be found in i386/include/specialreg.h) and having the companion code would be nice too (ISTR it was straightforward but not trivial, not sure if I remember correctly now. Looks trivial enough :/) I *have* tried using the PMAP in mxge(4) so I don't think you'll get any panics ;) HTH, Aggelos diff --git a/sys/cpu/i386/include/cpufunc.h b/sys/cpu/i386/include/cpufunc.h index 9b7e80e..9c57446 100644 --- a/sys/cpu/i386/include/cpufunc.h +++ b/sys/cpu/i386/include/cpufunc.h @@ -396,6 +396,14 @@ cpu_invltlb(void) #endif +#ifndef _CPU_WBINVL_DEFINED +static __inline void +cpu_wbinvl(void) +{ + __asm __volatile("wbinvd"); +} +#endif + static __inline void cpu_nop(void) { diff --git a/sys/cpu/i386/include/pmap.h b/sys/cpu/i386/include/pmap.h index f7f0def..ccaef4e 100644 --- a/sys/cpu/i386/include/pmap.h +++ b/sys/cpu/i386/include/pmap.h @@ -62,10 +62,12 @@ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ +#define PG_PTE_PAT 0x080 /* PAT PAT index */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ +#define PG_PDE_PAT 0x1000 /* PAT PAT index */ /* Our various interpretations of the above */ diff --git a/sys/cpu/i386/include/specialreg.h b/sys/cpu/i386/include/specialreg.h index e0207ec..36a9811 100644 --- a/sys/cpu/i386/include/specialreg.h +++ b/sys/cpu/i386/include/specialreg.h @@ -159,6 +159,7 @@ #define MSR_MTRR64kBase 0x250 #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 +#define MSR_PAT 0x277 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 @@ -184,6 +185,17 @@ #define MSR_THERM_INTERRUPT 0x19b #define MSR_THERM_STATUS 0x19c +/* + * PAT modes. + */ +#define PAT_UNCACHEABLE 0x00 +#define PAT_WRITE_COMBINING 0x01 +#define PAT_WRITE_THROUGH 0x04 +#define PAT_WRITE_PROTECTED 0x05 +#define PAT_WRITE_BACK 0x06 +#define PAT_UNCACHED 0x07 +#define PAT_VALUE(i, m) ((long long)(m) << (8 * (i))) +#define PAT_MASK(i) PAT_VALUE(i, 0xff) /* * Constants related to MTRRs diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index eba146f..5e82c3c 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -624,6 +624,8 @@ init_secondary(void) load_cr0(cr0); pmap_set_opt(); /* PSE/4MB pages, etc */ + pmap_init_pat(); /* Page Attribute Table */ + /* set up CPU registers and state */ cpu_setregs(); diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index 0504439..bf04044 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -158,6 +158,7 @@ vm_offset_t KvaSize; /* max size of kernel virtual address space */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static int pgeflag; /* PG_G or-in */ static int pseflag; /* PG_PS or-in */ +static int pat_works; /* Is page attribute table sane? */ static vm_object_t kptobj; @@ -216,6 +217,7 @@ static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +static int pmap_cache_bits(int, boolean_t); static unsigned pdir4mb; @@ -295,6 +297,55 @@ pmap_pte_quick(pmap_t pmap, vm_offset_t va) return (0); } +/* + * Setup the PAT MSR. + */ +void +pmap_init_pat(void) +{ + uint64_t pat_msr; + + /* Bail if this CPU doesn't implement PAT. */ + if (!(cpu_feature & CPUID_PAT)) + return; + +#ifdef notyet + if (cpu_vendor_id != CPU_VENDOR_INTEL || + (I386_CPU_FAMILY(cpu_id) == 6 && I386_CPU_MODEL(cpu_id) >= 0xe)) { +#else + if (!0) { +#endif + /* + * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. + * Program 4 and 5 as WP and WC. + * Leave 6 and 7 as UC and UC-. + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); + pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | + PAT_VALUE(5, PAT_WRITE_COMBINING); + pat_works = 1; + } else { + /* + * Due to some Intel errata, we can only safely use the lower 4 + * PAT entries. Thus, just replace PAT Index 2 with WC instead + * of UC-. + * + * Intel Pentium III Processor Specification Update + * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B + * or Mode C Paging) + * + * Intel Pentium IV Processor Specification Update + * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~PAT_MASK(2); + pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); + pat_works = 0; + } + wrmsr(MSR_PAT, pat_msr); +} + /* * Bootstrap the system enough to run with virtual memory. @@ -446,6 +497,7 @@ pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) } #endif + pmap_init_pat(); /* * We need to finish setting up the globaldata page for the BSP. * locore has already populated the page table for the mdglobaldata @@ -554,6 +606,89 @@ pmap_init2(void) * Low level helper routines..... ***************************************************/ +/* + * Determine the appropriate bits to set in a PTE or PDE for a specified + * caching mode. + */ +static int +pmap_cache_bits(int mode, boolean_t is_pde) +{ + int pat_flag, pat_index, cache_bits; + + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; + + /* If we don't support PAT, map extended modes to older ones. */ + if (!(cpu_feature & CPUID_PAT)) { + switch (mode) { + case PAT_UNCACHEABLE: + case PAT_WRITE_THROUGH: + case PAT_WRITE_BACK: + break; + case PAT_UNCACHED: + case PAT_WRITE_COMBINING: + case PAT_WRITE_PROTECTED: + mode = PAT_UNCACHEABLE; + break; + } + } + + /* Map the caching mode to a PAT index. */ + if (pat_works) { + switch (mode) { + case PAT_UNCACHEABLE: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_UNCACHED: + pat_index = 2; + break; + case PAT_WRITE_COMBINING: + pat_index = 5; + break; + case PAT_WRITE_PROTECTED: + pat_index = 4; + break; + default: + panic("Unknown caching mode %d\n", mode); + } + } else { + switch (mode) { + case PAT_UNCACHED: + case PAT_UNCACHEABLE: + case PAT_WRITE_PROTECTED: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_WRITE_COMBINING: + pat_index = 2; + break; + default: + panic("Unknown caching mode %d\n", mode); + } + } + + /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ + cache_bits = 0; + if (pat_index & 0x4) + cache_bits |= pat_flag; + if (pat_index & 0x2) + cache_bits |= PG_NC_PCD; + if (pat_index & 0x1) + cache_bits |= PG_NC_PWT; + return (cache_bits); +} + #if defined(PMAP_DIAGNOSTIC) /* @@ -3210,6 +3345,70 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) kmem_free(&kernel_map, base, size); } +int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + pt_entry_t *pte; + u_int opte, npte; + pd_entry_t *pde; + pmap_inval_info info; + + base = trunc_page(va); + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + + /* + * Only supported on kernel virtual addresses + */ + if (base < KvaStart) + return (EINVAL); + + /* 4MB pages and pages that aren't mapped aren't supported. */ + for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { + pde = pmap_pde(&kernel_pmap, tmpva); + if (*pde & PG_PS) + return (EINVAL); + if (*pde == 0) + return (EINVAL); + pte = vtopte(tmpva); + if (*pte == 0) + return (EINVAL); + } + + pmap_inval_init(&info); + /* + * Ok, all the pages exist and are 4k, so run through them updating + * their cache mode. + */ + for (tmpva = base; size > 0; ) { + pte = vtopte(tmpva); + + /* + * The cache mode bits are all in the low 32-bits of the + * PTE, so we can just spin on updating the low 32-bits. + */ + do { + opte = *(u_int *)pte; + npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); + npte |= pmap_cache_bits(mode, 0); + } while (npte != opte && + !atomic_cmpset_int((u_int *)pte, opte, npte)); + pmap_inval_add(&info, &kernel_pmap, tmpva); + tmpva += PAGE_SIZE; + size -= PAGE_SIZE; + } + + /* + * Flush CPU caches to make sure any data isn't cached that shouldn't + * be, etc. + */ + pmap_inval_cache_add(&info, &kernel_pmap, -1); + pmap_inval_flush(&info); + return (0); +} + + /* * perform the pmap work for mincore */ diff --git a/sys/platform/pc32/i386/pmap_inval.c b/sys/platform/pc32/i386/pmap_inval.c index b6b68f3..35999ba 100644 --- a/sys/platform/pc32/i386/pmap_inval.c +++ b/sys/platform/pc32/i386/pmap_inval.c @@ -67,7 +67,7 @@ #ifdef SMP static void -_cpu_invltlb(void *dummy) +_cpu_invltlb(void *dummy __unused) { cpu_invltlb(); } @@ -78,6 +78,12 @@ _cpu_invl1pg(void *data) cpu_invlpg(data); } +static void +_cpu_wbinvl(void *dummy __unused) +{ + cpu_wbinvl(); +} + #endif /* @@ -89,6 +95,7 @@ pmap_inval_init(pmap_inval_info_t info) info->pir_flags = 0; } +#ifdef SMP /* * Add a (pmap, va) pair to the invalidation list and protect access * as appropriate. @@ -96,7 +103,6 @@ pmap_inval_init(pmap_inval_info_t info) void pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { -#ifdef SMP if ((info->pir_flags & PIRF_CPUSYNC) == 0) { info->pir_flags |= PIRF_CPUSYNC; info->pir_cpusync.cs_run_func = NULL; @@ -106,46 +112,86 @@ pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); } -#else - if (pmap->pm_active == 0) - return; -#endif if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { if (va == (vm_offset_t)-1) { info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif } else { info->pir_flags |= PIRF_INVL1PG; info->pir_cpusync.cs_data = (void *)va; -#ifdef SMP info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; -#endif } } else { info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif } } +void +pmap_inval_cache_add(pmap_inval_info_t info, pmap_t pmap, + vm_offset_t va __unused) +{ + if ((info->pir_flags & PIRF_CPUSYNC) == 0) { + info->pir_flags |= PIRF_CPUSYNC; + info->pir_cpusync.cs_run_func = NULL; + info->pir_cpusync.cs_fin1_func = NULL; + info->pir_cpusync.cs_fin2_func = NULL; + lwkt_cpusync_start(pmap->pm_active, &info->pir_cpusync); + } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { + lwkt_cpusync_add(pmap->pm_active, &info->pir_cpusync); + } + info->pir_flags |= PIRF_WBINVL; + info->pir_cpusync.cs_fin2_func = _cpu_wbinvl; +} + /* * Synchronize changes with target cpus. */ void pmap_inval_flush(pmap_inval_info_t info) { -#ifdef SMP if (info->pir_flags & PIRF_CPUSYNC) lwkt_cpusync_finish(&info->pir_cpusync); -#else + info->pir_flags = 0; +} + +#else /* !SMP */ + +void +pmap_inval_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; + } + } else { + info->pir_flags |= PIRF_INVLTLB; + } +} + +void +pmap_inval_cache_add(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + info->pir_flags |= PIRF_WBINVL; +} + +void +pmap_inval_flush(pmap_inval_info_t info) +{ if (info->pir_flags & PIRF_INVLTLB) cpu_invltlb(); else if (info->pir_flags & PIRF_INVL1PG) cpu_invlpg(info->pir_cpusync.cs_data); -#endif + if (info->pir_flags & PIRF_WBINVL) + cpu_wbinvl(); info->pir_flags = 0; } +#endif /* SMP */ diff --git a/sys/platform/pc32/include/pmap.h b/sys/platform/pc32/include/pmap.h index 9d7dda8..8efc52e 100644 --- a/sys/platform/pc32/include/pmap.h +++ b/sys/platform/pc32/include/pmap.h @@ -249,6 +249,8 @@ int pmap_get_pgeflag(void); #ifdef SMP void pmap_set_opt (void); #endif +void pmap_init_pat(void); +int pmap_change_attr(vm_offset_t, vm_size_t, int); #endif /* _KERNEL */ diff --git a/sys/platform/pc32/include/pmap_inval.h b/sys/platform/pc32/include/pmap_inval.h index e8cd668..23e149f 100644 --- a/sys/platform/pc32/include/pmap_inval.h +++ b/sys/platform/pc32/include/pmap_inval.h @@ -51,6 +51,7 @@ typedef pmap_inval_info *pmap_inval_info_t; #define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ #define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ #define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ +#define PIRF_WBINVL 0x0008 /* request cache invalidation */ #ifdef _KERNEL @@ -60,6 +61,7 @@ typedef pmap_inval_info *pmap_inval_info_t; void pmap_inval_init(pmap_inval_info_t); void pmap_inval_add(pmap_inval_info_t, pmap_t, vm_offset_t); +void pmap_inval_cache_add(pmap_inval_info_t, pmap_t, vm_offset_t); void pmap_inval_flush(pmap_inval_info_t); #endif