From c19e9188a7aa86aab13ee8e57cd4e6865cf98a8e Mon Sep 17 00:00:00 2001 From: Charlie Date: Thu, 29 Apr 2010 03:50:55 +0300 Subject: [PATCH] WIP PAT --- sys/cpu/i386/include/cpufunc.h | 8 + sys/cpu/i386/include/pmap.h | 2 + sys/platform/pc32/i386/mp_machdep.c | 3 + sys/platform/pc32/i386/pmap.c | 200 ++++++++++++++++++++++++ sys/platform/pc32/i386/pmap_inval.c | 135 ++++++++++++----- sys/platform/pc32/include/pmap.h | 2 + sys/platform/pc32/include/pmap_inval.h | 2 + sys/platform/pc64/x86_64/mp_machdep.c | 6 +- sys/platform/pc64/x86_64/pmap.c | 259 ++++++++++++++++++++++++++++++++ sys/platform/pc64/x86_64/pmap_inval.c | 129 ++++++++++++----- 10 files changed, 666 insertions(+), 80 deletions(-) diff --git a/sys/cpu/i386/include/cpufunc.h b/sys/cpu/i386/include/cpufunc.h index 3f68235..4b3ba87 100644 --- a/sys/cpu/i386/include/cpufunc.h +++ b/sys/cpu/i386/include/cpufunc.h @@ -404,6 +404,14 @@ cpu_invltlb(void) #endif +#ifndef _CPU_WBINVL_DEFINED +static __inline void +cpu_wbinvl(void) +{ + __asm __volatile("wbinvd"); +} +#endif + static __inline void cpu_nop(void) { diff --git a/sys/cpu/i386/include/pmap.h b/sys/cpu/i386/include/pmap.h index f7f0def..ccaef4e 100644 --- a/sys/cpu/i386/include/pmap.h +++ b/sys/cpu/i386/include/pmap.h @@ -62,10 +62,12 @@ #define PG_A 0x020 /* A Accessed */ #define PG_M 0x040 /* D Dirty */ #define PG_PS 0x080 /* PS Page size (0=4k,1=4M) */ +#define PG_PTE_PAT 0x080 /* PAT PAT index */ #define PG_G 0x100 /* G Global */ #define PG_AVAIL1 0x200 /* / Available for system */ #define PG_AVAIL2 0x400 /* < programmers use */ #define PG_AVAIL3 0x800 /* \ */ +#define PG_PDE_PAT 0x1000 /* PAT PAT index */ /* Our various interpretations of the above */ diff --git a/sys/platform/pc32/i386/mp_machdep.c b/sys/platform/pc32/i386/mp_machdep.c index 9078ecd..e9cba97 100644 --- a/sys/platform/pc32/i386/mp_machdep.c +++ b/sys/platform/pc32/i386/mp_machdep.c @@ -628,8 +628,11 @@ init_secondary(void) cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); + pmap_set_opt(); /* PSE/4MB pages, etc */ + pmap_init_pat(); /* Page Attribute Table */ + /* set up CPU registers and state */ cpu_setregs(); diff --git a/sys/platform/pc32/i386/pmap.c b/sys/platform/pc32/i386/pmap.c index bbac355..15d1b1d 100644 --- a/sys/platform/pc32/i386/pmap.c +++ b/sys/platform/pc32/i386/pmap.c @@ -163,6 +163,7 @@ vm_offset_t KvaSize; /* max size of kernel virtual address space */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ static int pgeflag; /* PG_G or-in */ static int pseflag; /* PG_PS or-in */ +static int pat_works; /* Is page attribute table sane? */ static vm_object_t kptobj; @@ -230,6 +231,7 @@ static unsigned * pmap_pte_quick (pmap_t pmap, vm_offset_t va); static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t, pmap_inval_info_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +static int pmap_cache_bits(int, boolean_t); static unsigned pdir4mb; @@ -312,6 +314,55 @@ pmap_pte_quick(pmap_t pmap, vm_offset_t va) return (0); } +/* + * Setup the PAT MSR. + */ +void +pmap_init_pat(void) +{ + uint64_t pat_msr; + + /* Bail if this CPU doesn't implement PAT. */ + if (!(cpu_feature & CPUID_PAT)) + return; + +#ifdef notyet + if (cpu_vendor_id != CPU_VENDOR_INTEL || + (I386_CPU_FAMILY(cpu_id) == 6 && I386_CPU_MODEL(cpu_id) >= 0xe)) { +#else + if (!0) { +#endif + /* + * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. + * Program 4 and 5 as WP and WC. + * Leave 6 and 7 as UC and UC-. + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); + pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | + PAT_VALUE(5, PAT_WRITE_COMBINING); + pat_works = 1; + } else { + /* + * Due to some Intel errata, we can only safely use the lower 4 + * PAT entries. Thus, just replace PAT Index 2 with WC instead + * of UC-. + * + * Intel Pentium III Processor Specification Update + * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B + * or Mode C Paging) + * + * Intel Pentium IV Processor Specification Update + * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~PAT_MASK(2); + pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); + pat_works = 0; + } + wrmsr(MSR_PAT, pat_msr); +} + /* * Bootstrap the system enough to run with virtual memory. @@ -463,6 +514,7 @@ pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr) } #endif + pmap_init_pat(); /* * We need to finish setting up the globaldata page for the BSP. * locore has already populated the page table for the mdglobaldata @@ -613,6 +665,89 @@ ptbase_assert(struct pmap *pmap) #endif +/* + * Determine the appropriate bits to set in a PTE or PDE for a specified + * caching mode. + */ +static int +pmap_cache_bits(int mode, boolean_t is_pde) +{ + int pat_flag, pat_index, cache_bits; + + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; + + /* If we don't support PAT, map extended modes to older ones. */ + if (!(cpu_feature & CPUID_PAT)) { + switch (mode) { + case PAT_UNCACHEABLE: + case PAT_WRITE_THROUGH: + case PAT_WRITE_BACK: + break; + case PAT_UNCACHED: + case PAT_WRITE_COMBINING: + case PAT_WRITE_PROTECTED: + mode = PAT_UNCACHEABLE; + break; + } + } + + /* Map the caching mode to a PAT index. */ + if (pat_works) { + switch (mode) { + case PAT_UNCACHEABLE: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_UNCACHED: + pat_index = 2; + break; + case PAT_WRITE_COMBINING: + pat_index = 5; + break; + case PAT_WRITE_PROTECTED: + pat_index = 4; + break; + default: + panic("Unknown caching mode %d\n", mode); + } + } else { + switch (mode) { + case PAT_UNCACHED: + case PAT_UNCACHEABLE: + case PAT_WRITE_PROTECTED: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_WRITE_COMBINING: + pat_index = 2; + break; + default: + panic("Unknown caching mode %d\n", mode); + } + } + + /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ + cache_bits = 0; + if (pat_index & 0x4) + cache_bits |= pat_flag; + if (pat_index & 0x2) + cache_bits |= PG_NC_PCD; + if (pat_index & 0x1) + cache_bits |= PG_NC_PWT; + return (cache_bits); +} + #if defined(PMAP_DIAGNOSTIC) /* @@ -3321,6 +3456,71 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) kmem_free(&kernel_map, base, size); } +int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + pt_entry_t *pte; + u_int opte, npte; + pd_entry_t *pde; + pmap_inval_info info; + + base = trunc_page(va); + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + + /* + * Only supported on kernel virtual addresses + */ + if (base < KvaStart) + return (EINVAL); + + /* 4MB pages and pages that aren't mapped aren't supported. */ + for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { + pde = pmap_pde(&kernel_pmap, tmpva); + if (*pde & PG_PS) + return (EINVAL); + if (*pde == 0) + return (EINVAL); + pte = vtopte(tmpva); + if (*pte == 0) + return (EINVAL); + } + + pmap_inval_init(&info); + /* + * Ok, all the pages exist and are 4k, so run through them updating + * their cache mode. + */ + for (tmpva = base; size > 0; ) { + pte = vtopte(tmpva); + + /* + * The cache mode bits are all in the low 32-bits of the + * PTE, so we can just spin on updating the low 32-bits. + */ + do { + opte = *(u_int *)pte; + npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); + npte |= pmap_cache_bits(mode, 0); + } while (npte != opte && + !atomic_cmpset_int((u_int *)pte, opte, npte)); + pmap_inval_interlock(&info, &kernel_pmap, tmpva); + tmpva += PAGE_SIZE; + size -= PAGE_SIZE; + } + + /* + * Flush CPU caches to make sure any data isn't cached that shouldn't + * be, etc. + */ + pmap_inval_cache_interlock(&info, &kernel_pmap, -1); + pmap_inval_deinterlock(&info, &kernel_pmap); + pmap_inval_done(&info); + return (0); +} + + /* * perform the pmap work for mincore */ diff --git a/sys/platform/pc32/i386/pmap_inval.c b/sys/platform/pc32/i386/pmap_inval.c index 9b26855..a96cbbc 100644 --- a/sys/platform/pc32/i386/pmap_inval.c +++ b/sys/platform/pc32/i386/pmap_inval.c @@ -30,8 +30,6 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $DragonFly: src/sys/platform/pc32/i386/pmap_inval.c,v 1.5 2005/11/04 08:57:27 dillon Exp $ */ /* @@ -78,6 +76,12 @@ _cpu_invl1pg(void *data) cpu_invlpg(data); } +static void +_cpu_wbinvl(void *dummy __unused) +{ + cpu_wbinvl(); +} + #endif /* @@ -90,16 +94,38 @@ pmap_inval_init(pmap_inval_info_t info) crit_enter_id("inval"); } +static void +pmap_do_invalidation(pmap_inval_info_t info, vm_offset_t va) +{ + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +#ifdef SMP + /* - * Add a (pmap, va) pair to the invalidation list and protect access - * as appropriate. - * * CPUMASK_LOCK is used to interlock thread switchins */ -void -pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +static void +pmap_inval_prepare(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { -#ifdef SMP cpumask_t oactive; cpumask_t nactive; @@ -122,37 +148,25 @@ pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { lwkt_cpusync_add(oactive, &info->pir_cpusync); } -#else - if (pmap->pm_active == 0) - return; -#endif - if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { - if (va == (vm_offset_t)-1) { - info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif - } else { - info->pir_flags |= PIRF_INVL1PG; - info->pir_cpusync.cs_data = (void *)va; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; -#endif - } - } else { - info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif - } } +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ void -pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) +pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { -#ifdef SMP - atomic_clear_int(&pmap->pm_active, CPUMASK_LOCK); -#endif + pmap_inval_prepare(info, pmap, va); + pmap_do_invalidation(info, va); +} + +void +pmap_inval_cache_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + pmap_inval_prepare(info, pmap, va); + info->pir_flags |= PIRF_WBINVL; + info->pir_cpusync.cs_fin2_func = _cpu_wbinvl; } /* @@ -161,22 +175,63 @@ pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) void pmap_inval_flush(pmap_inval_info_t info) { -#ifdef SMP if (info->pir_flags & PIRF_CPUSYNC) lwkt_cpusync_finish(&info->pir_cpusync); -#else + info->pir_flags = 0; +} + +#else /* !SMP */ + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + * + * CPUMASK_LOCK is used to interlock thread switchins + */ +void +pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + pmap_do_invalidation(info, va); +} + +void +pmap_inval_cache_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + info->pir_flags |= PIRF_WBINVL; +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ if (info->pir_flags & PIRF_INVLTLB) cpu_invltlb(); else if (info->pir_flags & PIRF_INVL1PG) cpu_invlpg(info->pir_cpusync.cs_data); -#endif + if (info->pir_flags & PIRF_WBINVL) + cpu_wbinvl(); info->pir_flags = 0; } +#endif /* SMP */ + +void +pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) +{ +#ifdef SMP + atomic_clear_int(&pmap->pm_active, CPUMASK_LOCK); +#endif +} + void pmap_inval_done(pmap_inval_info_t info) { pmap_inval_flush(info); - crit_exit_id("flush"); + crit_exit_id("inval"); } - diff --git a/sys/platform/pc32/include/pmap.h b/sys/platform/pc32/include/pmap.h index fdf6385..c4f4ebe 100644 --- a/sys/platform/pc32/include/pmap.h +++ b/sys/platform/pc32/include/pmap.h @@ -292,6 +292,8 @@ int pmap_get_pgeflag(void); #ifdef SMP void pmap_set_opt (void); #endif +void pmap_init_pat(void); +int pmap_change_attr(vm_offset_t, vm_size_t, int); #endif /* _KERNEL */ diff --git a/sys/platform/pc32/include/pmap_inval.h b/sys/platform/pc32/include/pmap_inval.h index c4a18d4..7d3d15e 100644 --- a/sys/platform/pc32/include/pmap_inval.h +++ b/sys/platform/pc32/include/pmap_inval.h @@ -51,6 +51,7 @@ typedef pmap_inval_info *pmap_inval_info_t; #define PIRF_INVLTLB 0x0001 /* request invalidation of whole table */ #define PIRF_INVL1PG 0x0002 /* else request invalidation of one page */ #define PIRF_CPUSYNC 0x0004 /* cpusync is currently active */ +#define PIRF_WBINVL 0x0008 /* request cache invalidation */ #ifdef _KERNEL @@ -61,6 +62,7 @@ typedef pmap_inval_info *pmap_inval_info_t; void pmap_inval_init(pmap_inval_info_t); void pmap_inval_interlock(pmap_inval_info_t, pmap_t, vm_offset_t); void pmap_inval_deinterlock(pmap_inval_info_t, pmap_t); +void pmap_inval_cache_interlock(pmap_inval_info_t, pmap_t, vm_offset_t); void pmap_inval_flush(pmap_inval_info_t); void pmap_inval_done(pmap_inval_info_t); diff --git a/sys/platform/pc64/x86_64/mp_machdep.c b/sys/platform/pc64/x86_64/mp_machdep.c index 17ff29b..e79cb12 100644 --- a/sys/platform/pc64/x86_64/mp_machdep.c +++ b/sys/platform/pc64/x86_64/mp_machdep.c @@ -511,10 +511,8 @@ init_secondary(void) wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); pmap_set_opt(); /* PSE/4MB pages, etc */ -#if JGXXX - /* Initialize the PAT MSR. */ - pmap_init_pat(); -#endif + + pmap_init_pat(); /* Page Attribute Table */ /* set up CPU registers and state */ cpu_setregs(); diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index 7c45c0d..ee14076 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -719,6 +719,68 @@ pmap_bootstrap(vm_paddr_t *firstaddr) cpu_invltlb(); } +/* + * Setup the PAT MSR. + */ +void +pmap_init_pat(void) +{ + uint64_t pat_msr; + char *sysenv; + static int pat_tested = 0; + + /* Bail if this CPU doesn't implement PAT. */ + if (!(cpu_feature & CPUID_PAT)) + panic("no PAT??"); + + /* + * Some Apple Macs based on nVidia chipsets cannot enter ACPI mode + * via SMI# when we use upper 4 PAT entries for unknown reason. + */ + if (!pat_tested) { + pat_works = 1; +#if 0 + sysenv = getenv("smbios.system.product"); + if (sysenv != NULL) { + if (strncmp(sysenv, "MacBook5,1", 10) == 0 || + strncmp(sysenv, "MacBookPro5,5", 13) == 0 || + strncmp(sysenv, "Macmini3,1", 10) == 0) + pat_works = 0; + freeenv(sysenv); + } +#endif + pat_tested = 1; + } + + /* Initialize default PAT entries. */ + pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | + PAT_VALUE(1, PAT_WRITE_THROUGH) | + PAT_VALUE(2, PAT_UNCACHED) | + PAT_VALUE(3, PAT_UNCACHEABLE) | + PAT_VALUE(4, PAT_WRITE_BACK) | + PAT_VALUE(5, PAT_WRITE_THROUGH) | + PAT_VALUE(6, PAT_UNCACHED) | + PAT_VALUE(7, PAT_UNCACHEABLE); + + if (pat_works) { + /* + * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. + * Program 4 and 5 as WP and WC. + * Leave 6 and 7 as UC- and UC. + */ + pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); + pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | + PAT_VALUE(5, PAT_WRITE_COMBINING); + } else { + /* + * Just replace PAT Index 2 with WC instead of UC-. + */ + pat_msr &= ~PAT_MASK(2); + pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); + } + wrmsr(MSR_PAT, pat_msr); +} + #ifdef SMP /* * Set 4mb pdir for mp startup @@ -3654,6 +3716,203 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) kmem_free(&kernel_map, base, size); } +static int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + vm_offset_t base, offset, tmpva; + vm_paddr_t pa_start, pa_end; + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + int cache_bits_pte, cache_bits_pde, error; + boolean_t changed; + pmap_inval_info info; + + base = trunc_page(va); + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + + /* + * Only supported on kernel virtual addresses, including the direct + * map but excluding the recursive map. + */ + if (base < KvaStart) + return (EINVAL); + + cache_bits_pde = pmap_cache_bits(mode, 1); + cache_bits_pte = pmap_cache_bits(mode, 0); + changed = FALSE; + + /* + * Pages that aren't mapped aren't supported. Also break down 2MB pages + * into 4KB pages if required. + */ + for (tmpva = base; tmpva < base + size; ) { + pdpe = pmap_pdpe(kernel_pmap, tmpva); + if (*pdpe == 0) + return (EINVAL); + if (*pdpe & PG_PS) { + /* + * If the current 1GB page already has the required + * memory type, then we need not demote this page. Just + * increment tmpva to the next 1GB page frame. + */ + if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) { + tmpva = trunc_1gpage(tmpva) + NBPDP; + continue; + } + + /* + * If the current offset aligns with a 1GB page frame + * and there is at least 1GB left within the range, then + * we need not break down this page into 2MB pages. + */ + if ((tmpva & PDPMASK) == 0 && + tmpva + PDPMASK < base + size) { + tmpva += NBPDP; + continue; + } + if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) + return (ENOMEM); + } + pde = pmap_pdpe_to_pde(pdpe, tmpva); + if (*pde == 0) + return (EINVAL); + if (*pde & PG_PS) { + /* + * If the current 2MB page already has the required + * memory type, then we need not demote this page. Just + * increment tmpva to the next 2MB page frame. + */ + if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { + tmpva = trunc_2mpage(tmpva) + NBPDR; + continue; + } + + /* + * If the current offset aligns with a 2MB page frame + * and there is at least 2MB left within the range, then + * we need not break down this page into 4KB pages. + */ + if ((tmpva & PDRMASK) == 0 && + tmpva + PDRMASK < base + size) { + tmpva += NBPDR; + continue; + } + if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) + return (ENOMEM); + } + pte = pmap_pde_to_pte(pde, tmpva); + if (*pte == 0) + return (EINVAL); + tmpva += PAGE_SIZE; + } + error = 0; + + /* + * Ok, all the pages exist, so run through them updating their + * cache mode if required. + */ + pa_start = pa_end = 0; + for (tmpva = base; tmpva < base + size; ) { + pdpe = pmap_pdpe(kernel_pmap, tmpva); + if (*pdpe & PG_PS) { + if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) { + pmap_pde_attr(pdpe, cache_bits_pde); + changed = TRUE; + } + if (tmpva >= VM_MIN_KERNEL_ADDRESS) { + if (pa_start == pa_end) { + /* Start physical address run. */ + pa_start = *pdpe & PG_PS_FRAME; + pa_end = pa_start + NBPDP; + } else if (pa_end == (*pdpe & PG_PS_FRAME)) + pa_end += NBPDP; + else { + /* Run ended, update direct map. */ + error = pmap_change_attr( + PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + if (error != 0) + break; + /* Start physical address run. */ + pa_start = *pdpe & PG_PS_FRAME; + pa_end = pa_start + NBPDP; + } + } + tmpva = trunc_1gpage(tmpva) + NBPDP; + continue; + } + pde = pmap_pdpe_to_pde(pdpe, tmpva); + if (*pde & PG_PS) { + if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { + pmap_pde_attr(pde, cache_bits_pde); + changed = TRUE; + } + if (tmpva >= VM_MIN_KERNEL_ADDRESS) { + if (pa_start == pa_end) { + /* Start physical address run. */ + pa_start = *pde & PG_PS_FRAME; + pa_end = pa_start + NBPDR; + } else if (pa_end == (*pde & PG_PS_FRAME)) + pa_end += NBPDR; + else { + /* Run ended, update direct map. */ + error = pmap_change_attr( + PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + if (error != 0) + break; + /* Start physical address run. */ + pa_start = *pde & PG_PS_FRAME; + pa_end = pa_start + NBPDR; + } + } + tmpva = trunc_2mpage(tmpva) + NBPDR; + } else { + pte = pmap_pde_to_pte(pde, tmpva); + if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { + pmap_pte_attr(pte, cache_bits_pte); + changed = TRUE; + } + if (tmpva >= VM_MIN_KERNEL_ADDRESS) { + if (pa_start == pa_end) { + /* Start physical address run. */ + pa_start = *pte & PG_FRAME; + pa_end = pa_start + PAGE_SIZE; + } else if (pa_end == (*pte & PG_FRAME)) + pa_end += PAGE_SIZE; + else { + /* Run ended, update direct map. */ + error = pmap_change_attr( + PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + if (error != 0) + break; + /* Start physical address run. */ + pa_start = *pte & PG_FRAME; + pa_end = pa_start + PAGE_SIZE; + } + } + tmpva += PAGE_SIZE; + } + } + if (error == 0 && pa_start != pa_end) + error = pmap_change_attr(PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + + /* + * Flush CPU caches if required to make sure any data isn't cached that + * shouldn't be, etc. + */ + if (changed) { + pmap_inval_init(&info); + pmap_inval_cache_interlock(&info, &kernel_pmap, tmpva); + pmap_invalidate_cache_range(base, tmpva); + } + return (error); +} + /* * perform the pmap work for mincore */ diff --git a/sys/platform/pc64/x86_64/pmap_inval.c b/sys/platform/pc64/x86_64/pmap_inval.c index 2aafd0b..5558d16 100644 --- a/sys/platform/pc64/x86_64/pmap_inval.c +++ b/sys/platform/pc64/x86_64/pmap_inval.c @@ -76,6 +76,12 @@ _cpu_invl1pg(void *data) cpu_invlpg(data); } +static void +_cpu_wbinvl(void *dummy __unused) +{ + cpu_wbinvl(); +} + #endif /* @@ -88,16 +94,38 @@ pmap_inval_init(pmap_inval_info_t info) crit_enter_id("inval"); } +static void +pmap_do_invalidation(pmap_inval_info_t info, vm_offset_t va) +{ + if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { + if (va == (vm_offset_t)-1) { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } else { + info->pir_flags |= PIRF_INVL1PG; + info->pir_cpusync.cs_data = (void *)va; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; +#endif + } + } else { + info->pir_flags |= PIRF_INVLTLB; +#ifdef SMP + info->pir_cpusync.cs_fin2_func = _cpu_invltlb; +#endif + } +} + +#ifdef SMP + /* - * Add a (pmap, va) pair to the invalidation list and protect access - * as appropriate. - * * CPUMASK_LOCK is used to interlock thread switchins */ -void -pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +static void +pmap_inval_prepare(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { -#ifdef SMP cpumask_t oactive; cpumask_t nactive; @@ -120,37 +148,25 @@ pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) } else if (pmap->pm_active & ~info->pir_cpusync.cs_mask) { lwkt_cpusync_add(oactive, &info->pir_cpusync); } -#else - if (pmap->pm_active == 0) - return; -#endif - if ((info->pir_flags & (PIRF_INVLTLB|PIRF_INVL1PG)) == 0) { - if (va == (vm_offset_t)-1) { - info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif - } else { - info->pir_flags |= PIRF_INVL1PG; - info->pir_cpusync.cs_data = (void *)va; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invl1pg; -#endif - } - } else { - info->pir_flags |= PIRF_INVLTLB; -#ifdef SMP - info->pir_cpusync.cs_fin2_func = _cpu_invltlb; -#endif - } } +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + */ void -pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) +pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { -#ifdef SMP - atomic_clear_int(&pmap->pm_active, CPUMASK_LOCK); -#endif + pmap_inval_prepare(info, pmap, va); + pmap_do_invalidation(info, va); +} + +void +pmap_inval_cache_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + pmap_inval_prepare(info, pmap, va); + info->pir_flags |= PIRF_WBINVL; + info->pir_cpusync.cs_fin2_func = _cpu_wbinvl; } /* @@ -159,18 +175,59 @@ pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) void pmap_inval_flush(pmap_inval_info_t info) { -#ifdef SMP if (info->pir_flags & PIRF_CPUSYNC) lwkt_cpusync_finish(&info->pir_cpusync); -#else + info->pir_flags = 0; +} + +#else /* !SMP */ + +/* + * Add a (pmap, va) pair to the invalidation list and protect access + * as appropriate. + * + * CPUMASK_LOCK is used to interlock thread switchins + */ +void +pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + pmap_do_invalidation(info, va); +} + +void +pmap_inval_cache_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) +{ + if (pmap->pm_active == 0) + return; + info->pir_flags |= PIRF_WBINVL; +} + +/* + * Synchronize changes with target cpus. + */ +void +pmap_inval_flush(pmap_inval_info_t info) +{ if (info->pir_flags & PIRF_INVLTLB) cpu_invltlb(); else if (info->pir_flags & PIRF_INVL1PG) cpu_invlpg(info->pir_cpusync.cs_data); -#endif + if (info->pir_flags & PIRF_WBINVL) + cpu_wbinvl(); info->pir_flags = 0; } +#endif /* SMP */ + +void +pmap_inval_deinterlock(pmap_inval_info_t info, pmap_t pmap) +{ +#ifdef SMP + atomic_clear_int(&pmap->pm_active, CPUMASK_LOCK); +#endif +} void pmap_inval_done(pmap_inval_info_t info) { -- 1.6.6.2