DragonFly kernel List (threaded) for 2003-09
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]
namecache stage-2 patch for testing
This is the stage-2 namecache patch. It should work pretty much the
same as the namecache works now except I have reorgranized the namecache
structure's topology the way it will need to be to move forward.
Originally vnodes contained two lists: a list of namecache entries
for which the vnode is the 'parent directory', and a list of namecache
entries for which the vnode is the target (child in a directory). The
namecache structure would contain a reference to the two vnodes but
was otherwise disconnected from other namecache structures. The problem
with the old methodology is that it is impossible to reverse-engineer
multiple distinct paths flowing through the same vnode.
The new topology is based around the namecache structure itself. The
structure contains a parent pointer, a list of children, and a reference
to a single vnode. A vnode then has a list of all the namecache
structures flowing through it.
--
With the new topology we will be able to keep paths distinct by recording
a pointer to the namecache entry instead of the vnode as the 'directory'
being searched, and as the 'current directory', 'root directory' and
'jail directory'. This is what stage-3 is going to do. Amoung other
things it will allow you to 'mount' the same filesystem or a subdirectory
at multiple points in the directory tree without having to use a null
filesystem, and without causing the system to get confused vis-a-vie the
use of "..".
There are many other uses once stage-3 is in... for example, you could
literally cut-and-paste pieces of a filesystem together to form a new
topology that shares the related vnodes but otherwise appears as a
distinct directory tree in the system. It will be possible to
implement the concept of VFS 'environments' that I have been positing
for the last few months simply by writing a VFS that does nothing more
then resolve namecache issues, allowing actual operation within the
'environment' to occur without any significant interaction with the
environment once things are cached in the namecache. *VERY* important.
I would like to commit this stage-2 patch tomorrow evening so I can move
on to stage-3.
-Matt
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.24
diff -u -r1.24 init_main.c
--- kern/init_main.c 24 Sep 2003 18:38:51 -0000 1.24
+++ kern/init_main.c 24 Sep 2003 18:38:59 -0000
@@ -459,6 +459,7 @@
VREF(p->p_fd->fd_cdir);
p->p_fd->fd_rdir = rootvnode;
VREF(p->p_fd->fd_rdir);
+ vfs_cache_setroot(rootvnode);
VOP_UNLOCK(rootvnode, 0, curthread);
/*
Index: kern/vfs_cache.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.8
diff -u -r1.8 vfs_cache.c
--- kern/vfs_cache.c 23 Sep 2003 05:03:51 -0000 1.8
+++ kern/vfs_cache.c 25 Sep 2003 01:45:26 -0000
@@ -1,6 +1,8 @@
/*
* Copyright (c) 1989, 1993, 1995
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2003 Matthew Dillon <dillon@xxxxxxxxxxxxx>,
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Poul-Henning Kamp of the FreeBSD Project.
@@ -35,7 +37,7 @@
*
* @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
* $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $
- * $DragonFly: src/sys/kern/vfs_cache.c,v 1.8 2003/09/23 05:03:51 dillon Exp $
+ * $DragonFly$
*/
#include <sys/param.h>
@@ -52,16 +54,10 @@
#include <sys/fnv_hash.h>
/*
- * The namecache structure describes the elements in the cache of recent
- * names looked up by namei. The cache is nominally LRU, but namecache
- * path element sequences to active vnodes and in-progress lookups are
- * always retained. Non-inclusive of retained entries, the cache is
- * mananged in an LRU fashion.
- *
- * random lookups in the cache are accomplished with a hash atble using
+ * Random lookups in the cache are accomplished with a hash table using
* a hash key of (nc_src_vp, name).
*
- * Negative entries may exist and correspond to structures where nc_dst_vp
+ * Negative entries may exist and correspond to structures where nc_vp
* is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry
* corresponds to a whited-out directory entry (verses simply not finding the
* entry at all).
@@ -70,16 +66,6 @@
* or NOCACHE is set (rewrite), and the name is located in the cache, it
* will be dropped.
*/
-struct namecache {
- LIST_ENTRY(namecache) nc_hash; /* hash chain (nc_src_vp,name) */
- LIST_ENTRY(namecache) nc_src; /* scan via nc_src_vp based list */
- TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
- struct vnode *nc_src_vp; /* directory containing name */
- struct vnode *nc_dst_vp; /* vnode representing name or NULL */
- u_char nc_flag;
- u_char nc_nlen; /* The length of the name, 255 max */
- char nc_name[0]; /* The segment name (embedded) */
-};
/*
* Structures associated with name cacheing.
@@ -87,7 +73,11 @@
#define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
-static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
+static struct namecache_list ncneglist; /* instead of vnode */
+static struct namecache rootnamecache; /* Dummy node */
+
+static int nczapcheck; /* panic on bad release */
+SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, "");
static u_long nchash; /* size of hash table */
SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
@@ -126,51 +116,140 @@
static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
-static void cache_zap (struct namecache *ncp);
+static void cache_zap(struct namecache *ncp);
MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
/*
- * Flags in namecache.nc_flag
- */
-#define NCF_WHITE 0x01 /* negative entry corresponds to whiteout */
+ * cache_hold() and cache_drop() prevent the premature deletion of a
+ * namecache entry but do not prevent operations (such as zapping) on
+ * that namecache entry.
+ */
+static __inline
+struct namecache *
+cache_hold(struct namecache *ncp)
+{
+ ++ncp->nc_refs;
+ return(ncp);
+}
+
+static __inline
+void
+cache_drop(struct namecache *ncp)
+{
+ KKASSERT(ncp->nc_refs > 0);
+ if (ncp->nc_refs == 1 && (ncp->nc_flag & NCF_HASHED) == 0)
+ cache_zap(ncp);
+ else
+ --ncp->nc_refs;
+}
/*
- * Delete an entry from its hash list and move it to the front
- * of the LRU list for immediate reuse.
+ * Try to destroy a namecache entry. The entry is disassociated from its
+ * vnode or ncneglist and reverted to an UNRESOLVED state.
+ *
+ * Then, if there are no additional references to the ncp and we can
+ * successfully delete the children, the entry is also removed from the
+ * namecache hashlist / topology.
+ *
+ * References or undeletable children will prevent the entry from being
+ * removed from the topology. The entry may be revalidated (typically
+ * by cache_enter()) at a later time. Children remain because:
+ *
+ * + we have tried to delete a node rather then a leaf in the topology.
+ * + the presence of negative entries (we try to scrap these).
+ * + an entry or child has a non-zero ref count and cannot be scrapped.
+ *
+ * This function must be called with the ncp held and will drop the ref
+ * count during zapping.
*/
static void
cache_zap(struct namecache *ncp)
{
- LIST_REMOVE(ncp, nc_hash);
- LIST_REMOVE(ncp, nc_src);
- if (LIST_EMPTY(&ncp->nc_src_vp->v_cache_src))
- vdrop(ncp->nc_src_vp);
- if (ncp->nc_dst_vp) {
- TAILQ_REMOVE(&ncp->nc_dst_vp->v_cache_dst, ncp, nc_dst);
- } else {
- TAILQ_REMOVE(&ncneg, ncp, nc_dst);
- numneg--;
+ struct namecache *par;
+ struct vnode *vp;
+
+ /*
+ * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED.
+ */
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ ncp->nc_flag |= NCF_UNRESOLVED;
+ --numcache;
+ if ((vp = ncp->nc_vp) != NULL) {
+ ncp->nc_vp = NULL; /* safety */
+ TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode);
+ if (vp && !TAILQ_EMPTY(&ncp->nc_list))
+ vdrop(vp);
+ } else {
+ TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
+ --numneg;
+ }
+ }
+
+ /*
+ * Try to scrap the entry and possibly tail-recurse on its parent.
+ * We only scrap unref'd (other then our ref) unresolved entries,
+ * we do not scrap 'live' entries.
+ */
+ while (ncp->nc_flag & NCF_UNRESOLVED) {
+ /*
+ * Someone other then us has a ref, stop.
+ */
+ if (ncp->nc_refs > 1)
+ goto done;
+
+ /*
+ * We have children, stop.
+ */
+ if (!TAILQ_EMPTY(&ncp->nc_list))
+ goto done;
+
+ /*
+ * Ok, we can completely destroy and free this entry. Sanity
+ * check it against our static rootnamecache structure,
+ * then remove it from the hash.
+ */
+ KKASSERT(ncp != &rootnamecache);
+
+ if (ncp->nc_flag & NCF_HASHED) {
+ ncp->nc_flag &= ~NCF_HASHED;
+ LIST_REMOVE(ncp, nc_hash);
+ }
+
+ /*
+ * Unlink from its parent and free, then loop on the
+ * parent.
+ */
+ par = cache_hold(ncp->nc_parent);
+ TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
+ if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
+ vdrop(par->nc_vp);
+ ncp->nc_refs = -1; /* safety */
+ ncp->nc_parent = NULL; /* safety */
+ free(ncp, M_VFSCACHE);
+ ncp = par;
}
- numcache--;
- free(ncp, M_VFSCACHE);
+done:
+ --ncp->nc_refs;
}
/*
* Lookup an entry in the cache
*
- * We don't do this if the segment name is long, simply so the cache
- * can avoid holding long names (which would either waste space, or
- * add greatly to the complexity).
- *
* Lookup is called with dvp pointing to the directory to search,
- * cnp pointing to the name of the entry being sought. If the lookup
- * succeeds, the vnode is returned in *vpp, and a status of -1 is
- * returned. If the lookup determines that the name does not exist
- * (negative cacheing), a status of ENOENT is returned. If the lookup
- * fails, a status of zero is returned.
+ * cnp pointing to the name of the entry being sought.
+ *
+ * If the lookup succeeds, the vnode is returned in *vpp, and a
+ * status of -1 is returned.
+ *
+ * If the lookup determines that the name does not exist (negative cacheing),
+ * a status of ENOENT is returned.
+ *
+ * If the lookup fails, a status of zero is returned.
+ *
+ * Note that UNRESOLVED entries are ignored. They are not negative cache
+ * entries.
*/
-
int
cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
{
@@ -183,10 +262,12 @@
if (cnp->cn_namelen == 1) {
*vpp = dvp;
dothits++;
+ numposhits++; /* include in total statistics */
return (-1);
}
if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
dotdothits++;
+ numposhits++; /* include in total statistics */
if (dvp->v_dd->v_id != dvp->v_ddid ||
(cnp->cn_flags & CNP_MAKEENTRY) == 0) {
dvp->v_ddid = 0;
@@ -201,13 +282,18 @@
hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
numchecks++;
- if (ncp->nc_src_vp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
- !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
+ ncp->nc_parent->nc_vp == dvp &&
+ ncp->nc_nlen == cnp->cn_namelen &&
+ bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0
+ ) {
+ cache_hold(ncp);
break;
+ }
}
/* We failed to find an entry */
- if (ncp == 0) {
+ if (ncp == NULL) {
if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) {
nummisszap++;
} else {
@@ -226,10 +312,11 @@
}
/* We found a "positive" match, return the vnode */
- if (ncp->nc_dst_vp) {
+ if (ncp->nc_vp) {
numposhits++;
nchstats.ncs_goodhits++;
- *vpp = ncp->nc_dst_vp;
+ *vpp = ncp->nc_vp;
+ cache_drop(ncp);
return (-1);
}
@@ -242,29 +329,111 @@
}
numneghits++;
+
/*
* We found a "negative" match, ENOENT notifies client of this match.
- * The nc_flag field records whether this is a whiteout.
+ * The nc_flag field records whether this is a whiteout. Since there
+ * is no vnode we can use the vnode tailq link field with ncneglist.
*/
- TAILQ_REMOVE(&ncneg, ncp, nc_dst);
- TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
+ TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
+ TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
nchstats.ncs_neghits++;
- if (ncp->nc_flag & NCF_WHITE)
+ if (ncp->nc_flag & NCF_WHITEOUT)
cnp->cn_flags |= CNP_ISWHITEOUT;
+ cache_drop(ncp);
return (ENOENT);
}
/*
+ * Generate a special linkage between the mount point and the root of the
+ * mounted filesystem in order to maintain the namecache topology across
+ * a mount point. The special linkage has a 0-length name component
+ * and sets NCF_MOUNTPT.
+ */
+void
+cache_mount(struct vnode *dvp, struct vnode *tvp)
+{
+ struct namecache *ncp;
+ struct namecache *par;
+ struct nchashhead *ncpp;
+ u_int32_t hash;
+
+ /*
+ * If a linkage already exists we do not have to do anything.
+ */
+ hash = fnv_32_buf("", 0, FNV1_32_INIT);
+ hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
+ LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+ numchecks++;
+ if (ncp->nc_vp == tvp &&
+ ncp->nc_parent->nc_vp == dvp && ncp->nc_nlen == 0) {
+ return;
+ }
+ }
+
+ /*
+ * YYY temporary hack, the 'directory' dvp in the next stage
+ * will simply be a namecache pointer to the parent.
+ */
+ par = TAILQ_FIRST(&dvp->v_namecache);
+ if (par == NULL)
+ par = &rootnamecache;
+
+ /*
+ * Otherwise create a new linkage.
+ */
+ ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK);
+ bzero(ncp, sizeof(*ncp));
+ numcache++;
+
+ TAILQ_INIT(&ncp->nc_list);
+ ncp->nc_flag = NCF_MOUNTPT;
+ ncp->nc_parent = par;
+ if (TAILQ_EMPTY(&par->nc_list)) {
+ if (par->nc_vp)
+ vhold(par->nc_vp);
+ }
+ TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
+
+ /*
+ * Linkup the target vnode. The target vnode is NULL if this is
+ * to be a negative cache entry.
+ */
+ ncp->nc_vp = tvp;
+ TAILQ_INSERT_HEAD(&tvp->v_namecache, ncp, nc_vnode);
+#if 0
+ if (tvp->v_type == VDIR) {
+ vp->v_dd = dvp;
+ vp->v_ddid = dvp->v_id;
+ }
+#endif
+
+ /*
+ * Hash table
+ */
+ hash = fnv_32_buf("", 0, FNV1_32_INIT);
+ hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
+ ncpp = NCHHASH(hash);
+ LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+
+ ncp->nc_flag |= NCF_HASHED;
+}
+
+/*
* Add an entry to the cache.
*/
void
cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
struct namecache *ncp;
+ struct namecache *par;
struct nchashhead *ncpp;
u_int32_t hash;
- int len;
+ /*
+ * "." and ".." are degenerate cases, they are not added to the
+ * cache.
+ */
if (cnp->cn_nameptr[0] == '.') {
if (cnp->cn_namelen == 1) {
return;
@@ -280,63 +449,152 @@
return;
}
}
-
- ncp = (struct namecache *)
- malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
- bzero((char *)ncp, sizeof *ncp);
- numcache++;
- if (!vp) {
- numneg++;
- ncp->nc_flag = cnp->cn_flags & CNP_ISWHITEOUT ? NCF_WHITE : 0;
- } else if (vp->v_type == VDIR) {
- vp->v_dd = dvp;
- vp->v_ddid = dvp->v_id;
- }
/*
- * Fill in cache info, if vp is NULL this is a "negative" cache entry.
- * For negative entries, we have to record whether it is a whiteout.
- * the whiteout flag is stored in the nc_flag field.
- */
- ncp->nc_dst_vp = vp;
- ncp->nc_src_vp = dvp;
- len = ncp->nc_nlen = cnp->cn_namelen;
- hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
- bcopy(cnp->cn_nameptr, ncp->nc_name, len);
+ * Attempt to locate an unresolved entry in the cache and
+ * reassociate it, otherwise allocate a new entry. Unresolved
+ * entries can exist due to normal vnode reuse and/or cache
+ * purges issued by filesystems for various reasons.
+ */
+
+ hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
- ncpp = NCHHASH(hash);
- LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
- if (LIST_EMPTY(&dvp->v_cache_src))
- vhold(dvp);
- LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
- if (vp) {
- TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
+ LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+ numchecks++;
+ if ((ncp->nc_flag & NCF_UNRESOLVED) &&
+ ncp->nc_parent->nc_vp == dvp &&
+ ncp->nc_nlen == cnp->cn_namelen &&
+ bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0
+ ) {
+ printf("Reresolve %*.*s\n", (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr);
+ break;
+ }
+ }
+
+ if (ncp == NULL) {
+ ncp = malloc(sizeof(*ncp) + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
+ bzero(ncp, sizeof(*ncp));
+ TAILQ_INIT(&ncp->nc_list);
+
+ /*
+ * Linkup the parent pointer, bump the parent vnode's hold
+ * count when we go from 0->1 children. The calculation
+ * of 'par' is YYY a hack temporarily. par will eventually
+ * be passed as an argument.
+ */
+ par = TAILQ_FIRST(&dvp->v_namecache);
+ if (par == NULL) {
+ par = &rootnamecache;
+ if (dvp != rootvnode)
+ printf("cache_enter: parent has no namecache entry entering '%*.*s'\n", (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr);
+ }
+ ncp->nc_parent = par;
+ if (TAILQ_EMPTY(&par->nc_list)) {
+ if (par->nc_vp)
+ vhold(par->nc_vp);
+ }
+ TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
+
+ /*
+ * Add to the hash table
+ */
+ ncp->nc_nlen = cnp->cn_namelen;
+ bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen);
+ ncpp = NCHHASH(hash);
+ LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
+
+ ncp->nc_flag |= NCF_HASHED;
+ }
+
+ /*
+ * Linkup the target vnode. The target vnode is NULL if this is
+ * to be a negative cache entry.
+ */
+ numcache++;
+ ncp->nc_vp = vp;
+ ncp->nc_flag &= ~NCF_UNRESOLVED;
+ if (vp == NULL) {
+ ++numneg;
+ TAILQ_INSERT_HEAD(&ncneglist, ncp, nc_vnode);
+ ncp->nc_flag &= ~NCF_WHITEOUT;
+ if (cnp->cn_flags & CNP_ISWHITEOUT)
+ ncp->nc_flag |= NCF_WHITEOUT;
} else {
- TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
+ if (!TAILQ_EMPTY(&ncp->nc_list))
+ vhold(vp);
+ TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
+ if (vp->v_type == VDIR) {
+ vp->v_dd = dvp;
+ vp->v_ddid = dvp->v_id;
+ }
}
+
+ /*
+ * Don't cache too many negative hits
+ */
if (numneg * ncnegfactor > numcache) {
- ncp = TAILQ_FIRST(&ncneg);
+ ncp = TAILQ_FIRST(&ncneglist);
+ KKASSERT(ncp != NULL);
cache_zap(ncp);
}
}
/*
* Name cache initialization, from vfs_init() when we are booting
+ *
+ * rootnamecache is initialized such that it cannot be recursively deleted.
*/
void
nchinit(void)
{
- TAILQ_INIT(&ncneg);
+ TAILQ_INIT(&ncneglist);
nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash);
+ TAILQ_INIT(&rootnamecache.nc_list);
+ rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED;
+ rootnamecache.nc_refs = 1;
}
/*
- * Invalidate all entries to a particular vnode.
+ * vfs_cache_setroot()
*
- * Remove all entries in the namecache relating to this vnode and
- * change the v_id. We take the v_id from a global counter, since
- * it becomes a handy sequence number in crash-dumps that way.
- * No valid vnode will ever have (v_id == 0).
+ * Create an association between the root of our namecache and
+ * the root vnode. This routine may be called several times during
+ * booting.
+ */
+void
+vfs_cache_setroot(struct vnode *nvp)
+{
+ KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */
+ cache_zap(cache_hold(&rootnamecache));
+
+ rootnamecache.nc_vp = nvp;
+ rootnamecache.nc_flag &= ~NCF_UNRESOLVED;
+ ++numcache;
+ if (nvp) {
+ if (!TAILQ_EMPTY(&rootnamecache.nc_list))
+ vhold(nvp);
+ TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode);
+ } else {
+ ++numneg;
+ TAILQ_INSERT_HEAD(&ncneglist, &rootnamecache, nc_vnode);
+ rootnamecache.nc_flag &= ~NCF_WHITEOUT;
+ }
+}
+
+/*
+ * Invalidate all namecache entries to a particular vnode as well as
+ * any direct children of that vnode in the namecache. This is a
+ * 'catch all' purge used by filesystems that do not know any better.
+ *
+ * A new vnode v_id is generated. Note that no vnode will ever have a
+ * v_id of 0.
+ *
+ * Note that the linkage between the vnode and its namecache entries will
+ * be removed, but the namecache entries themselves might stay put due to
+ * active references from elsewhere in the system or due to the existance of
+ * the children. The namecache topology is left intact even if we do not
+ * know what the vnode association is. Such entries will be marked
+ * NCF_UNRESOLVED.
*
* XXX: Only time and the size of v_id prevents this from failing:
* XXX: In theory we should hunt down all (struct vnode*, v_id)
@@ -345,20 +603,36 @@
* XXX: by incrementing each vnodes v_id individually instead of
* XXX: using the global v_id.
*/
-
void
cache_purge(struct vnode *vp)
{
static u_long nextid;
+ struct namecache *ncp;
+ struct namecache *scan;
- while (!LIST_EMPTY(&vp->v_cache_src))
- cache_zap(LIST_FIRST(&vp->v_cache_src));
- while (!TAILQ_EMPTY(&vp->v_cache_dst))
- cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
+ /*
+ * Disassociate the vnode from its namecache entries along with
+ * (for historical reasons) any direct children.
+ */
+ while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) {
+ cache_hold(ncp);
+restart: /* YYY hack, fix me */
+ TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) {
+ if ((scan->nc_flag & NCF_UNRESOLVED) == 0) {
+ cache_zap(cache_hold(scan));
+ goto restart;
+ }
+ }
+ cache_zap(ncp);
+ }
+
+ /*
+ * Calculate a new unique id for ".." handling
+ */
do {
nextid++;
- } while (nextid == vp->v_id || !nextid);
+ } while (nextid == vp->v_id || nextid == 0);
vp->v_id = nextid;
vp->v_dd = vp;
vp->v_ddid = 0;
@@ -376,13 +650,22 @@
struct nchashhead *ncpp;
struct namecache *ncp, *nnp;
- /* Scan hash tables for applicable entries */
+ /*
+ * Scan hash tables for applicable entries.
+ */
for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
- for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
+ ncp = LIST_FIRST(ncpp);
+ if (ncp)
+ cache_hold(ncp);
+ while (ncp) {
nnp = LIST_NEXT(ncp, nc_hash);
- if (ncp->nc_src_vp->v_mount == mp) {
+ if (nnp)
+ cache_hold(nnp);
+ if (ncp->nc_vp && ncp->nc_vp->v_mount == mp)
cache_zap(ncp);
- }
+ else
+ cache_drop(ncp);
+ ncp = nnp;
}
}
}
@@ -390,24 +673,22 @@
/*
* cache_leaf_test()
*
- * Test whether this (directory) vnode's namei cache entry contains
- * subdirectories or not. Used to determine whether the directory is
- * a leaf in the namei cache or not. Note: the directory may still
- * contain files in the namei cache.
+ * Test whether the vnode is at a leaf in the nameicache tree.
*
- * Returns 0 if the directory is a leaf, -1 if it isn't.
+ * Returns 0 if it is a leaf, -1 if it isn't.
*/
int
cache_leaf_test(struct vnode *vp)
{
- struct namecache *ncpc;
+ struct namecache *scan;
+ struct namecache *ncp;
- for (ncpc = LIST_FIRST(&vp->v_cache_src);
- ncpc != NULL;
- ncpc = LIST_NEXT(ncpc, nc_src)
- ) {
- if (ncpc->nc_dst_vp != NULL && ncpc->nc_dst_vp->v_type == VDIR)
- return(-1);
+ TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) {
+ TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) {
+ /* YYY && ncp->nc_vp->v_type == VDIR ? */
+ if (ncp->nc_vp != NULL)
+ return(-1);
+ }
}
return(0);
}
@@ -443,8 +724,9 @@
return (ENOTDIR);
if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME))
+ (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) {
return (EROFS);
+ }
error = VOP_ACCESS(dvp, VEXEC, cred, td);
@@ -548,17 +830,15 @@
free(buf, M_TEMP);
return (ENOTDIR);
}
- ncp = TAILQ_FIRST(&vp->v_cache_dst);
- if (!ncp) {
+ TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
+ if (ncp->nc_parent->nc_vp == vp->v_dd)
+ break;
+ }
+ if (ncp == NULL) {
numcwdfail2++;
free(buf, M_TEMP);
return (ENOENT);
}
- if (ncp->nc_src_vp != vp->v_dd) {
- numcwdfail3++;
- free(buf, M_TEMP);
- return (EBADF);
- }
for (i = ncp->nc_nlen - 1; i >= 0; i--) {
if (bp == buf) {
numcwdfail4++;
@@ -644,17 +924,17 @@
free(buf, M_TEMP);
return (ENOTDIR);
}
- ncp = TAILQ_FIRST(&vp->v_cache_dst);
- if (!ncp) {
+ TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
+ if (vp == textvp)
+ break;
+ if (ncp->nc_parent->nc_vp == vp->v_dd)
+ break;
+ }
+ if (ncp == NULL) {
numfullpathfail2++;
free(buf, M_TEMP);
return (ENOENT);
}
- if (vp != textvp && ncp->nc_src_vp != vp->v_dd) {
- numfullpathfail3++;
- free(buf, M_TEMP);
- return (EBADF);
- }
for (i = ncp->nc_nlen - 1; i >= 0; i--) {
if (bp == buf) {
numfullpathfail4++;
@@ -670,7 +950,7 @@
}
*--bp = '/';
slash_prefixed = 1;
- vp = ncp->nc_src_vp;
+ vp = vp->v_dd;
}
if (!slash_prefixed) {
if (bp == buf) {
Index: kern/vfs_lookup.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_lookup.c,v
retrieving revision 1.6
diff -u -r1.6 vfs_lookup.c
--- kern/vfs_lookup.c 23 Sep 2003 05:03:51 -0000 1.6
+++ kern/vfs_lookup.c 24 Sep 2003 05:07:18 -0000
@@ -510,6 +510,7 @@
dpunlocked = 1;
goto bad2;
}
+ cache_mount(dp, tdp);
vrele(dp);
ndp->ni_vp = dp = tdp;
}
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.20
diff -u -r1.20 vfs_subr.c
--- kern/vfs_subr.c 23 Sep 2003 05:03:51 -0000 1.20
+++ kern/vfs_subr.c 24 Sep 2003 05:07:18 -0000
@@ -37,7 +37,7 @@
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
* $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $
- * $DragonFly: src/sys/kern/vfs_subr.c,v 1.20 2003/09/23 05:03:51 dillon Exp $
+ * $DragonFly$
*/
/*
@@ -117,8 +117,6 @@
SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
static int reassignbufmethod = 1;
SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
-static int nameileafonly = 0;
-SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
#ifdef ENABLE_VFS_IOOPT
int vfs_ioopt = 0;
@@ -673,33 +671,13 @@
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
KKASSERT(vp->v_flag & VFREE);
- if (LIST_FIRST(&vp->v_cache_src)) {
- /*
- * note: nameileafonly sysctl is temporary,
- * for debugging only, and will eventually be
- * removed.
- */
- if (nameileafonly > 0) {
- /*
- * Do not reuse namei-cached directory
- * vnodes that have cached
- * subdirectories.
- */
- if (cache_leaf_test(vp) < 0) {
- lwkt_reltoken(&vp->v_interlock);
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
- vp = NULL;
- continue;
- }
- } else if (nameileafonly < 0 ||
- vmiodirenable == 0) {
- /*
- * Do not reuse namei-cached directory
- * vnodes if nameileafonly is -1 or
- * if VMIO backing for directories is
- * turned off (otherwise we reuse them
- * too quickly).
- */
+ /*
+ * If we have children in the namecache we cannot
+ * reuse the vnode yet because it will break the
+ * namecache chain (YYY use nc_refs for the check?)
+ */
+ if (TAILQ_FIRST(&vp->v_namecache)) {
+ if (cache_leaf_test(vp) < 0) {
lwkt_reltoken(&vp->v_interlock);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
vp = NULL;
@@ -749,8 +727,7 @@
lwkt_inittoken(&vp->v_interlock);
vp->v_dd = vp;
cache_purge(vp);
- LIST_INIT(&vp->v_cache_src);
- TAILQ_INIT(&vp->v_cache_dst);
+ TAILQ_INIT(&vp->v_namecache);
numvnodes++;
}
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.17
diff -u -r1.17 vfs_syscalls.c
--- kern/vfs_syscalls.c 23 Sep 2003 05:03:51 -0000 1.17
+++ kern/vfs_syscalls.c 24 Sep 2003 05:07:18 -0000
@@ -394,6 +394,7 @@
vrele(rootvnode);
VREF(newdp);
rootvnode = newdp;
+ vfs_cache_setroot(rootvnode);
}
vput(newdp);
}
@@ -511,7 +512,7 @@
}
TAILQ_REMOVE(&mountlist, mp, mnt_list);
if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
- coveredvp->v_mountedhere = (struct mount *)0;
+ coveredvp->v_mountedhere = NULL;
vrele(coveredvp);
}
mp->mnt_vfc->vfc_refcount--;
Index: sys/namecache.h
===================================================================
RCS file: sys/namecache.h
diff -N sys/namecache.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ sys/namecache.h 24 Sep 2003 05:13:52 -0000
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2003 Matthew Dillon <dillon@xxxxxxxxxxxxx>,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly$
+ */
+
+#ifndef _SYS_NAMECACHE_H_
+#define _SYS_NAMECACHE_H_
+
+struct vnode;
+
+TAILQ_HEAD(namecache_list, namecache);
+
+/*
+ * The namecache structure is used to manage the filesystem namespace. Most
+ * vnodes cached by the system will reference one or more associated namecache
+ * structures.
+ */
+struct namecache {
+ LIST_ENTRY(namecache) nc_hash; /* hash chain (nc_parent,name) */
+ TAILQ_ENTRY(namecache) nc_entry; /* scan via nc_parent->nc_list */
+ TAILQ_ENTRY(namecache) nc_vnode; /* scan via vnode->v_namecache */
+ struct namecache_list nc_list; /* list of children */
+ struct namecache *nc_parent; /* namecache entry for parent */
+ struct vnode *nc_vp; /* vnode representing name or NULL */
+ int nc_refs; /* ref count prevents deletion */
+ u_char nc_flag;
+ u_char nc_nlen; /* The length of the name, 255 max */
+ char nc_name[0]; /* Separate allocated seg name */
+};
+
+typedef struct namecache *namecache_t;
+
+/*
+ * Flags in namecache.nc_flag (u_char)
+ */
+#define NCF_NEGATIVE 0x01 /* negative entry */
+#define NCF_WHITEOUT 0x02 /* negative entry corresponds to whiteout */
+#define NCF_UNRESOLVED 0x04 /* invalid or unresolved entry */
+#define NCF_MOUNTPT 0x08 /* mount point */
+#define NCF_ROOT 0x10 /* namecache root (static) */
+#define NCF_HASHED 0x20 /* namecache entry in hash table */
+
+#define CINV_SELF 0x0001 /* invalidate a specific (dvp,vp) entry */
+#define CINV_CHILDREN 0x0002 /* invalidate all children of vp */
+
+#ifdef _KERNEL
+
+struct vop_lookup_args;
+struct componentname;
+struct mount;
+
+int cache_lookup(struct vnode *dvp, struct vnode **vpp,
+ struct componentname *cnp);
+void cache_mount(struct vnode *dvp, struct vnode *tvp);
+void cache_enter(struct vnode *dvp, struct vnode *vp,
+ struct componentname *cnp);
+void vfs_cache_setroot(struct vnode *vp);
+void cache_purge(struct vnode *vp);
+void cache_purgevfs (struct mount *mp);
+int cache_leaf_test (struct vnode *vp);
+int vfs_cache_lookup(struct vop_lookup_args *ap);
+int textvp_fullpath (struct proc *p, char **retbuf, char **retfreebuf);
+
+#endif
+
+#endif
Index: sys/vnode.h
===================================================================
RCS file: /cvs/src/sys/sys/vnode.h,v
retrieving revision 1.7
diff -u -r1.7 vnode.h
--- sys/vnode.h 20 Aug 2003 07:31:22 -0000 1.7
+++ sys/vnode.h 24 Sep 2003 05:10:42 -0000
@@ -32,7 +32,7 @@
*
* @(#)vnode.h 8.7 (Berkeley) 2/4/94
* $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $
- * $DragonFly: src/sys/sys/vnode.h,v 1.7 2003/08/20 07:31:22 rob Exp $
+ * $DragonFly$
*/
#ifndef _SYS_VNODE_H_
@@ -43,6 +43,7 @@
#include <sys/select.h>
#include <sys/uio.h>
#include <sys/acl.h>
+#include <sys/namecache.h>
#include <machine/lock.h>
@@ -76,7 +77,6 @@
TAILQ_HEAD(buflists, buf);
typedef int vop_t (void *);
-struct namecache;
/*
* Reading or writing any of these items requires holding the appropriate lock.
@@ -120,8 +120,12 @@
struct lock *v_vnlock; /* used for non-locking fs's */
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
- LIST_HEAD(, namecache) v_cache_src; /* Cache entries from us */
- TAILQ_HEAD(, namecache) v_cache_dst; /* Cache entries to us */
+
+ /*
+ * YYY note: v_dd, v_ddid will become obsolete once the namecache
+ * code is finished.
+ */
+ struct namecache_list v_namecache; /* associated nc entries */
struct vnode *v_dd; /* .. vnode */
u_long v_ddid; /* .. capability identifier */
struct {
@@ -313,8 +317,8 @@
!((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
#define VMIGHTFREE(vp) \
- (!((vp)->v_flag & (VFREE|VDOOMED|VXLOCK)) && \
- LIST_EMPTY(&(vp)->v_cache_src) && !(vp)->v_usecount)
+ (((vp)->v_flag & (VFREE|VDOOMED|VXLOCK)) == 0 && \
+ cache_leaf_test(vp) == 0 && (vp)->v_usecount == 0)
#define VSHOULDBUSY(vp) \
(((vp)->v_flag & VFREE) && \
@@ -538,7 +542,6 @@
/*
* Public vnode manipulation functions.
*/
-struct componentname;
struct file;
struct mount;
struct nameidata;
@@ -558,14 +561,6 @@
void addalias (struct vnode *vp, dev_t nvp_rdev);
void addaliasu (struct vnode *vp, udev_t nvp_rdev);
int bdevvp (dev_t dev, struct vnode **vpp);
-/* cache_* may belong in namei.h. */
-void cache_enter (struct vnode *dvp, struct vnode *vp,
- struct componentname *cnp);
-int cache_lookup (struct vnode *dvp, struct vnode **vpp,
- struct componentname *cnp);
-void cache_purge (struct vnode *vp);
-void cache_purgevfs (struct mount *mp);
-int cache_leaf_test (struct vnode *vp);
void cvtstat (struct stat *st, struct ostat *ost);
void cvtnstat (struct stat *sb, struct nstat *nsb);
int getnewvnode (enum vtagtype tag,
@@ -573,7 +568,6 @@
int lease_check (struct vop_lease_args *ap);
int spec_vnoperate (struct vop_generic_args *);
int speedup_syncer (void);
-int textvp_fullpath (struct proc *p, char **retbuf, char **retfreebuf);
void vattr_null (struct vattr *vap);
int vcount (struct vnode *vp);
void vdrop (struct vnode *);
@@ -612,7 +606,6 @@
struct ucred *cred, int *aresid, struct thread *td);
int vn_stat (struct vnode *vp, struct stat *sb, struct thread *td);
dev_t vn_todev (struct vnode *vp);
-int vfs_cache_lookup (struct vop_lookup_args *ap);
int vfs_object_create (struct vnode *vp, struct thread *td);
void vfs_timestamp (struct timespec *);
int vn_writechk (struct vnode *vp);
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]