Message ID | 4F998F9F.6020802@redhat.com |
---|---|
State | Not Applicable, archived |
Headers | show |
How are you testing this? --b. On Thu, Apr 26, 2012 at 01:10:39PM -0500, Eric Sandeen wrote: > This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a > ext4: return 32/64-bit dir name hash according to usage type > by Fan Yong <yong.fan@whamcloud.com> > > Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() > to appease NFSv2, which can only handle a 32-bit cookie for seekdir() > and telldir(). However, this causes problems if there are 32-bit hash > collisions, since the NFSv2 server can get stuck resending the same > entries from the directory repeatedly. > > Allow ext3 to return a full 64-bit hash (both major and minor) for > telldir to decrease the chance of hash collisions. > > This patch does implement a new ext3_dir_llseek op, because with 64-bit > hashes, nfs will attempt to seek to a hash "offset" which is much > larger than ext3's s_maxbytes. So for dx dirs, we call > generic_file_llseek_size() with the appropriate max hash value as the > maximum seekable size. Otherwise we just pass through to > generic_file_llseek(). > > Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> > Patch-updated-by: Eric Sandeen <sandeen@redhat.com> > (blame us if something is not correct) > > Signed-off-by: Eric Sandeen <sandeen@redhat.com> > --- > > diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c > index cc761ad..92490e9 100644 > --- a/fs/ext3/dir.c > +++ b/fs/ext3/dir.c > @@ -21,30 +21,15 @@ > * > */ > > +#include <linux/compat.h> > #include "ext3.h" > > static unsigned char ext3_filetype_table[] = { > DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK > }; > > -static int ext3_readdir(struct file *, void *, filldir_t); > static int ext3_dx_readdir(struct file * filp, > void * dirent, filldir_t filldir); > -static int ext3_release_dir (struct inode * inode, > - struct file * filp); > - > -const struct file_operations ext3_dir_operations = { > - .llseek = generic_file_llseek, > - .read = generic_read_dir, > - .readdir = ext3_readdir, /* we take BKL. needed?*/ > - .unlocked_ioctl = ext3_ioctl, > -#ifdef CONFIG_COMPAT > - .compat_ioctl = ext3_compat_ioctl, > -#endif > - .fsync = ext3_sync_file, /* BKL held */ > - .release = ext3_release_dir, > -}; > - > > static unsigned char get_dtype(struct super_block *sb, int filetype) > { > @@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) > return (ext3_filetype_table[filetype]); > } > > +/** > + * Check if the given dir-inode refers to an htree-indexed directory > + * (or a directory which chould potentially get coverted to use htree > + * indexing). > + * > + * Return 1 if it is a dx dir, 0 if not > + */ > +static int is_dx_dir(struct inode *inode) > +{ > + struct super_block *sb = inode->i_sb; > + > + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, > + EXT3_FEATURE_COMPAT_DIR_INDEX) && > + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || > + ((inode->i_size >> sb->s_blocksize_bits) == 1))) > + return 1; > + > + return 0; > +} > > int ext3_check_dir_entry (const char * function, struct inode * dir, > struct ext3_dir_entry_2 * de, > @@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, > unsigned long offset; > int i, stored; > struct ext3_dir_entry_2 *de; > - struct super_block *sb; > int err; > struct inode *inode = filp->f_path.dentry->d_inode; > + struct super_block *sb = inode->i_sb; > int ret = 0; > int dir_has_error = 0; > > - sb = inode->i_sb; > - > - if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, > - EXT3_FEATURE_COMPAT_DIR_INDEX) && > - ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || > - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { > + if (is_dx_dir(inode)) { > err = ext3_dx_readdir(filp, dirent, filldir); > if (err != ERR_BAD_DX_DIR) { > ret = err; > @@ -227,22 +226,87 @@ out: > return ret; > } > > +static inline int is_32bit_api(void) > +{ > +#ifdef CONFIG_COMPAT > + return is_compat_task(); > +#else > + return (BITS_PER_LONG == 32); > +#endif > +} > + > /* > * These functions convert from the major/minor hash to an f_pos > - * value. > + * value for dx directories > * > - * Currently we only use major hash numer. This is unfortunate, but > - * on 32-bit machines, the same VFS interface is used for lseek and > - * llseek, so if we use the 64 bit offset, then the 32-bit versions of > - * lseek/telldir/seekdir will blow out spectacularly, and from within > - * the ext2 low-level routine, we don't know if we're being called by > - * a 64-bit version of the system call or the 32-bit version of the > - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir > - * cookie. Sigh. > + * Upper layer (for example NFS) should specify FMODE_32BITHASH or > + * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted > + * directly on both 32-bit and 64-bit nodes, under such case, neither > + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. > */ > -#define hash2pos(major, minor) (major >> 1) > -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) > -#define pos2min_hash(pos) (0) > +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return major >> 1; > + else > + return ((__u64)(major >> 1) << 32) | (__u64)minor; > +} > + > +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return (pos << 1) & 0xffffffff; > + else > + return ((pos >> 32) << 1) & 0xffffffff; > +} > + > +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return 0; > + else > + return pos & 0xffffffff; > +} > + > +/* > + * Return 32- or 64-bit end-of-file for dx directories > + */ > +static inline loff_t ext3_get_htree_eof(struct file *filp) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return EXT3_HTREE_EOF_32BIT; > + else > + return EXT3_HTREE_EOF_64BIT; > +} > + > + > +/* > + * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both > + * non-htree and htree directories, where the "offset" is in terms > + * of the filename hash value instead of the byte offset. > + * > + * Because we may return a 64-bit hash that is well beyond s_maxbytes, > + * we need to pass the max hash as the maximum allowable offset in > + * the htree directory case. > + * > + * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) > + * will be invalid once the directory was converted into a dx directory > + */ > +loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) > +{ > + struct inode *inode = file->f_mapping->host; > + int dx_dir = is_dx_dir(inode); > + > + if (likely(dx_dir)) > + return generic_file_llseek_size(file, offset, origin, > + ext3_get_htree_eof(file)); > + else > + return generic_file_llseek(file, offset, origin); > +} > > /* > * This structure holds the nodes of the red-black tree used to store > @@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) > } > > > -static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) > +static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, > + loff_t pos) > { > struct dir_private_info *p; > > p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); > if (!p) > return NULL; > - p->curr_hash = pos2maj_hash(pos); > - p->curr_minor_hash = pos2min_hash(pos); > + p->curr_hash = pos2maj_hash(filp, pos); > + p->curr_minor_hash = pos2min_hash(filp, pos); > return p; > } > > @@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, > printk("call_filldir: called with null fname?!?\n"); > return 0; > } > - curr_pos = hash2pos(fname->hash, fname->minor_hash); > + curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); > while (fname) { > error = filldir(dirent, fname->name, > fname->name_len, curr_pos, > @@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, > int ret; > > if (!info) { > - info = ext3_htree_create_dir_info(filp->f_pos); > + info = ext3_htree_create_dir_info(filp, filp->f_pos); > if (!info) > return -ENOMEM; > filp->private_data = info; > } > > - if (filp->f_pos == EXT3_HTREE_EOF) > + if (filp->f_pos == ext3_get_htree_eof(filp)) > return 0; /* EOF */ > > /* Some one has messed with f_pos; reset the world */ > @@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, > free_rb_tree_fname(&info->root); > info->curr_node = NULL; > info->extra_fname = NULL; > - info->curr_hash = pos2maj_hash(filp->f_pos); > - info->curr_minor_hash = pos2min_hash(filp->f_pos); > + info->curr_hash = pos2maj_hash(filp, filp->f_pos); > + info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); > } > > /* > @@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, > if (ret < 0) > return ret; > if (ret == 0) { > - filp->f_pos = EXT3_HTREE_EOF; > + filp->f_pos = ext3_get_htree_eof(filp); > break; > } > info->curr_node = rb_first(&info->root); > @@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, > info->curr_minor_hash = fname->minor_hash; > } else { > if (info->next_hash == ~0) { > - filp->f_pos = EXT3_HTREE_EOF; > + filp->f_pos = ext3_get_htree_eof(filp); > break; > } > info->curr_hash = info->next_hash; > @@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) > > return 0; > } > + > +const struct file_operations ext3_dir_operations = { > + .llseek = ext3_dir_llseek, > + .read = generic_read_dir, > + .readdir = ext3_readdir, > + .unlocked_ioctl = ext3_ioctl, > +#ifdef CONFIG_COMPAT > + .compat_ioctl = ext3_compat_ioctl, > +#endif > + .fsync = ext3_sync_file, > + .release = ext3_release_dir, > +}; > diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h > index b6515fd..fe5bef7 100644 > --- a/fs/ext3/ext3.h > +++ b/fs/ext3/ext3.h > @@ -920,7 +920,11 @@ struct dx_hash_info > u32 *seed; > }; > > -#define EXT3_HTREE_EOF 0x7fffffff > + > +/* 32 and 64 bit signed EOF for dx directories */ > +#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) > +#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) > + > > /* > * Control parameters used by ext3_htree_next_block > diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c > index d10231d..ede315c 100644 > --- a/fs/ext3/hash.c > +++ b/fs/ext3/hash.c > @@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) > return -1; > } > hash = hash & ~1; > - if (hash == (EXT3_HTREE_EOF << 1)) > - hash = (EXT3_HTREE_EOF-1) << 1; > + if (hash == (EXT3_HTREE_EOF_32BIT << 1)) > + hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; > hinfo->hash = hash; > hinfo->minor_hash = minor_hash; > return 0; > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 4/26/12 1:26 PM, J. Bruce Fields wrote: > How are you testing this? Basically as was suggested in the first ext4 patch series: I created a filesystem with 600,000 files in a dir: for x in $(seq 1 600000); do touch $x; done then exported and mounted that fs to localhost: mount -t nfs localhost:/mnt/export /mnt/nfs and then looked for dups: ls -l | dup -d If you have more .... substantial nfs testing I could do I'm all ears :) -Eric > --b. > > On Thu, Apr 26, 2012 at 01:10:39PM -0500, Eric Sandeen wrote: >> This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a >> ext4: return 32/64-bit dir name hash according to usage type >> by Fan Yong <yong.fan@whamcloud.com> >> >> Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() >> to appease NFSv2, which can only handle a 32-bit cookie for seekdir() >> and telldir(). However, this causes problems if there are 32-bit hash >> collisions, since the NFSv2 server can get stuck resending the same >> entries from the directory repeatedly. >> >> Allow ext3 to return a full 64-bit hash (both major and minor) for >> telldir to decrease the chance of hash collisions. >> >> This patch does implement a new ext3_dir_llseek op, because with 64-bit >> hashes, nfs will attempt to seek to a hash "offset" which is much >> larger than ext3's s_maxbytes. So for dx dirs, we call >> generic_file_llseek_size() with the appropriate max hash value as the >> maximum seekable size. Otherwise we just pass through to >> generic_file_llseek(). >> >> Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> >> Patch-updated-by: Eric Sandeen <sandeen@redhat.com> >> (blame us if something is not correct) >> >> Signed-off-by: Eric Sandeen <sandeen@redhat.com> >> --- >> >> diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c >> index cc761ad..92490e9 100644 >> --- a/fs/ext3/dir.c >> +++ b/fs/ext3/dir.c >> @@ -21,30 +21,15 @@ >> * >> */ >> >> +#include <linux/compat.h> >> #include "ext3.h" >> >> static unsigned char ext3_filetype_table[] = { >> DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK >> }; >> >> -static int ext3_readdir(struct file *, void *, filldir_t); >> static int ext3_dx_readdir(struct file * filp, >> void * dirent, filldir_t filldir); >> -static int ext3_release_dir (struct inode * inode, >> - struct file * filp); >> - >> -const struct file_operations ext3_dir_operations = { >> - .llseek = generic_file_llseek, >> - .read = generic_read_dir, >> - .readdir = ext3_readdir, /* we take BKL. needed?*/ >> - .unlocked_ioctl = ext3_ioctl, >> -#ifdef CONFIG_COMPAT >> - .compat_ioctl = ext3_compat_ioctl, >> -#endif >> - .fsync = ext3_sync_file, /* BKL held */ >> - .release = ext3_release_dir, >> -}; >> - >> >> static unsigned char get_dtype(struct super_block *sb, int filetype) >> { >> @@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) >> return (ext3_filetype_table[filetype]); >> } >> >> +/** >> + * Check if the given dir-inode refers to an htree-indexed directory >> + * (or a directory which chould potentially get coverted to use htree >> + * indexing). >> + * >> + * Return 1 if it is a dx dir, 0 if not >> + */ >> +static int is_dx_dir(struct inode *inode) >> +{ >> + struct super_block *sb = inode->i_sb; >> + >> + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, >> + EXT3_FEATURE_COMPAT_DIR_INDEX) && >> + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || >> + ((inode->i_size >> sb->s_blocksize_bits) == 1))) >> + return 1; >> + >> + return 0; >> +} >> >> int ext3_check_dir_entry (const char * function, struct inode * dir, >> struct ext3_dir_entry_2 * de, >> @@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, >> unsigned long offset; >> int i, stored; >> struct ext3_dir_entry_2 *de; >> - struct super_block *sb; >> int err; >> struct inode *inode = filp->f_path.dentry->d_inode; >> + struct super_block *sb = inode->i_sb; >> int ret = 0; >> int dir_has_error = 0; >> >> - sb = inode->i_sb; >> - >> - if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, >> - EXT3_FEATURE_COMPAT_DIR_INDEX) && >> - ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || >> - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { >> + if (is_dx_dir(inode)) { >> err = ext3_dx_readdir(filp, dirent, filldir); >> if (err != ERR_BAD_DX_DIR) { >> ret = err; >> @@ -227,22 +226,87 @@ out: >> return ret; >> } >> >> +static inline int is_32bit_api(void) >> +{ >> +#ifdef CONFIG_COMPAT >> + return is_compat_task(); >> +#else >> + return (BITS_PER_LONG == 32); >> +#endif >> +} >> + >> /* >> * These functions convert from the major/minor hash to an f_pos >> - * value. >> + * value for dx directories >> * >> - * Currently we only use major hash numer. This is unfortunate, but >> - * on 32-bit machines, the same VFS interface is used for lseek and >> - * llseek, so if we use the 64 bit offset, then the 32-bit versions of >> - * lseek/telldir/seekdir will blow out spectacularly, and from within >> - * the ext2 low-level routine, we don't know if we're being called by >> - * a 64-bit version of the system call or the 32-bit version of the >> - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir >> - * cookie. Sigh. >> + * Upper layer (for example NFS) should specify FMODE_32BITHASH or >> + * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted >> + * directly on both 32-bit and 64-bit nodes, under such case, neither >> + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. >> */ >> -#define hash2pos(major, minor) (major >> 1) >> -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) >> -#define pos2min_hash(pos) (0) >> +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) >> +{ >> + if ((filp->f_mode & FMODE_32BITHASH) || >> + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) >> + return major >> 1; >> + else >> + return ((__u64)(major >> 1) << 32) | (__u64)minor; >> +} >> + >> +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) >> +{ >> + if ((filp->f_mode & FMODE_32BITHASH) || >> + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) >> + return (pos << 1) & 0xffffffff; >> + else >> + return ((pos >> 32) << 1) & 0xffffffff; >> +} >> + >> +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) >> +{ >> + if ((filp->f_mode & FMODE_32BITHASH) || >> + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) >> + return 0; >> + else >> + return pos & 0xffffffff; >> +} >> + >> +/* >> + * Return 32- or 64-bit end-of-file for dx directories >> + */ >> +static inline loff_t ext3_get_htree_eof(struct file *filp) >> +{ >> + if ((filp->f_mode & FMODE_32BITHASH) || >> + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) >> + return EXT3_HTREE_EOF_32BIT; >> + else >> + return EXT3_HTREE_EOF_64BIT; >> +} >> + >> + >> +/* >> + * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both >> + * non-htree and htree directories, where the "offset" is in terms >> + * of the filename hash value instead of the byte offset. >> + * >> + * Because we may return a 64-bit hash that is well beyond s_maxbytes, >> + * we need to pass the max hash as the maximum allowable offset in >> + * the htree directory case. >> + * >> + * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) >> + * will be invalid once the directory was converted into a dx directory >> + */ >> +loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) >> +{ >> + struct inode *inode = file->f_mapping->host; >> + int dx_dir = is_dx_dir(inode); >> + >> + if (likely(dx_dir)) >> + return generic_file_llseek_size(file, offset, origin, >> + ext3_get_htree_eof(file)); >> + else >> + return generic_file_llseek(file, offset, origin); >> +} >> >> /* >> * This structure holds the nodes of the red-black tree used to store >> @@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) >> } >> >> >> -static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) >> +static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, >> + loff_t pos) >> { >> struct dir_private_info *p; >> >> p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); >> if (!p) >> return NULL; >> - p->curr_hash = pos2maj_hash(pos); >> - p->curr_minor_hash = pos2min_hash(pos); >> + p->curr_hash = pos2maj_hash(filp, pos); >> + p->curr_minor_hash = pos2min_hash(filp, pos); >> return p; >> } >> >> @@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, >> printk("call_filldir: called with null fname?!?\n"); >> return 0; >> } >> - curr_pos = hash2pos(fname->hash, fname->minor_hash); >> + curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); >> while (fname) { >> error = filldir(dirent, fname->name, >> fname->name_len, curr_pos, >> @@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, >> int ret; >> >> if (!info) { >> - info = ext3_htree_create_dir_info(filp->f_pos); >> + info = ext3_htree_create_dir_info(filp, filp->f_pos); >> if (!info) >> return -ENOMEM; >> filp->private_data = info; >> } >> >> - if (filp->f_pos == EXT3_HTREE_EOF) >> + if (filp->f_pos == ext3_get_htree_eof(filp)) >> return 0; /* EOF */ >> >> /* Some one has messed with f_pos; reset the world */ >> @@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, >> free_rb_tree_fname(&info->root); >> info->curr_node = NULL; >> info->extra_fname = NULL; >> - info->curr_hash = pos2maj_hash(filp->f_pos); >> - info->curr_minor_hash = pos2min_hash(filp->f_pos); >> + info->curr_hash = pos2maj_hash(filp, filp->f_pos); >> + info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); >> } >> >> /* >> @@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, >> if (ret < 0) >> return ret; >> if (ret == 0) { >> - filp->f_pos = EXT3_HTREE_EOF; >> + filp->f_pos = ext3_get_htree_eof(filp); >> break; >> } >> info->curr_node = rb_first(&info->root); >> @@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, >> info->curr_minor_hash = fname->minor_hash; >> } else { >> if (info->next_hash == ~0) { >> - filp->f_pos = EXT3_HTREE_EOF; >> + filp->f_pos = ext3_get_htree_eof(filp); >> break; >> } >> info->curr_hash = info->next_hash; >> @@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) >> >> return 0; >> } >> + >> +const struct file_operations ext3_dir_operations = { >> + .llseek = ext3_dir_llseek, >> + .read = generic_read_dir, >> + .readdir = ext3_readdir, >> + .unlocked_ioctl = ext3_ioctl, >> +#ifdef CONFIG_COMPAT >> + .compat_ioctl = ext3_compat_ioctl, >> +#endif >> + .fsync = ext3_sync_file, >> + .release = ext3_release_dir, >> +}; >> diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h >> index b6515fd..fe5bef7 100644 >> --- a/fs/ext3/ext3.h >> +++ b/fs/ext3/ext3.h >> @@ -920,7 +920,11 @@ struct dx_hash_info >> u32 *seed; >> }; >> >> -#define EXT3_HTREE_EOF 0x7fffffff >> + >> +/* 32 and 64 bit signed EOF for dx directories */ >> +#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) >> +#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) >> + >> >> /* >> * Control parameters used by ext3_htree_next_block >> diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c >> index d10231d..ede315c 100644 >> --- a/fs/ext3/hash.c >> +++ b/fs/ext3/hash.c >> @@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) >> return -1; >> } >> hash = hash & ~1; >> - if (hash == (EXT3_HTREE_EOF << 1)) >> - hash = (EXT3_HTREE_EOF-1) << 1; >> + if (hash == (EXT3_HTREE_EOF_32BIT << 1)) >> + hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; >> hinfo->hash = hash; >> hinfo->minor_hash = minor_hash; >> return 0; >> -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu 26-04-12 13:10:39, Eric Sandeen wrote: > This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a > ext4: return 32/64-bit dir name hash according to usage type > by Fan Yong <yong.fan@whamcloud.com> > > Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() > to appease NFSv2, which can only handle a 32-bit cookie for seekdir() > and telldir(). However, this causes problems if there are 32-bit hash > collisions, since the NFSv2 server can get stuck resending the same > entries from the directory repeatedly. > > Allow ext3 to return a full 64-bit hash (both major and minor) for > telldir to decrease the chance of hash collisions. > > This patch does implement a new ext3_dir_llseek op, because with 64-bit > hashes, nfs will attempt to seek to a hash "offset" which is much > larger than ext3's s_maxbytes. So for dx dirs, we call > generic_file_llseek_size() with the appropriate max hash value as the > maximum seekable size. Otherwise we just pass through to > generic_file_llseek(). > > Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> > Patch-updated-by: Eric Sandeen <sandeen@redhat.com> > (blame us if something is not correct) Thanks Eric. The patch looks good so I've added it to my tree. Honza > Signed-off-by: Eric Sandeen <sandeen@redhat.com> > --- > > diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c > index cc761ad..92490e9 100644 > --- a/fs/ext3/dir.c > +++ b/fs/ext3/dir.c > @@ -21,30 +21,15 @@ > * > */ > > +#include <linux/compat.h> > #include "ext3.h" > > static unsigned char ext3_filetype_table[] = { > DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK > }; > > -static int ext3_readdir(struct file *, void *, filldir_t); > static int ext3_dx_readdir(struct file * filp, > void * dirent, filldir_t filldir); > -static int ext3_release_dir (struct inode * inode, > - struct file * filp); > - > -const struct file_operations ext3_dir_operations = { > - .llseek = generic_file_llseek, > - .read = generic_read_dir, > - .readdir = ext3_readdir, /* we take BKL. needed?*/ > - .unlocked_ioctl = ext3_ioctl, > -#ifdef CONFIG_COMPAT > - .compat_ioctl = ext3_compat_ioctl, > -#endif > - .fsync = ext3_sync_file, /* BKL held */ > - .release = ext3_release_dir, > -}; > - > > static unsigned char get_dtype(struct super_block *sb, int filetype) > { > @@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) > return (ext3_filetype_table[filetype]); > } > > +/** > + * Check if the given dir-inode refers to an htree-indexed directory > + * (or a directory which chould potentially get coverted to use htree > + * indexing). > + * > + * Return 1 if it is a dx dir, 0 if not > + */ > +static int is_dx_dir(struct inode *inode) > +{ > + struct super_block *sb = inode->i_sb; > + > + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, > + EXT3_FEATURE_COMPAT_DIR_INDEX) && > + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || > + ((inode->i_size >> sb->s_blocksize_bits) == 1))) > + return 1; > + > + return 0; > +} > > int ext3_check_dir_entry (const char * function, struct inode * dir, > struct ext3_dir_entry_2 * de, > @@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, > unsigned long offset; > int i, stored; > struct ext3_dir_entry_2 *de; > - struct super_block *sb; > int err; > struct inode *inode = filp->f_path.dentry->d_inode; > + struct super_block *sb = inode->i_sb; > int ret = 0; > int dir_has_error = 0; > > - sb = inode->i_sb; > - > - if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, > - EXT3_FEATURE_COMPAT_DIR_INDEX) && > - ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || > - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { > + if (is_dx_dir(inode)) { > err = ext3_dx_readdir(filp, dirent, filldir); > if (err != ERR_BAD_DX_DIR) { > ret = err; > @@ -227,22 +226,87 @@ out: > return ret; > } > > +static inline int is_32bit_api(void) > +{ > +#ifdef CONFIG_COMPAT > + return is_compat_task(); > +#else > + return (BITS_PER_LONG == 32); > +#endif > +} > + > /* > * These functions convert from the major/minor hash to an f_pos > - * value. > + * value for dx directories > * > - * Currently we only use major hash numer. This is unfortunate, but > - * on 32-bit machines, the same VFS interface is used for lseek and > - * llseek, so if we use the 64 bit offset, then the 32-bit versions of > - * lseek/telldir/seekdir will blow out spectacularly, and from within > - * the ext2 low-level routine, we don't know if we're being called by > - * a 64-bit version of the system call or the 32-bit version of the > - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir > - * cookie. Sigh. > + * Upper layer (for example NFS) should specify FMODE_32BITHASH or > + * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted > + * directly on both 32-bit and 64-bit nodes, under such case, neither > + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. > */ > -#define hash2pos(major, minor) (major >> 1) > -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) > -#define pos2min_hash(pos) (0) > +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return major >> 1; > + else > + return ((__u64)(major >> 1) << 32) | (__u64)minor; > +} > + > +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return (pos << 1) & 0xffffffff; > + else > + return ((pos >> 32) << 1) & 0xffffffff; > +} > + > +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return 0; > + else > + return pos & 0xffffffff; > +} > + > +/* > + * Return 32- or 64-bit end-of-file for dx directories > + */ > +static inline loff_t ext3_get_htree_eof(struct file *filp) > +{ > + if ((filp->f_mode & FMODE_32BITHASH) || > + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) > + return EXT3_HTREE_EOF_32BIT; > + else > + return EXT3_HTREE_EOF_64BIT; > +} > + > + > +/* > + * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both > + * non-htree and htree directories, where the "offset" is in terms > + * of the filename hash value instead of the byte offset. > + * > + * Because we may return a 64-bit hash that is well beyond s_maxbytes, > + * we need to pass the max hash as the maximum allowable offset in > + * the htree directory case. > + * > + * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) > + * will be invalid once the directory was converted into a dx directory > + */ > +loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) > +{ > + struct inode *inode = file->f_mapping->host; > + int dx_dir = is_dx_dir(inode); > + > + if (likely(dx_dir)) > + return generic_file_llseek_size(file, offset, origin, > + ext3_get_htree_eof(file)); > + else > + return generic_file_llseek(file, offset, origin); > +} > > /* > * This structure holds the nodes of the red-black tree used to store > @@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) > } > > > -static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) > +static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, > + loff_t pos) > { > struct dir_private_info *p; > > p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); > if (!p) > return NULL; > - p->curr_hash = pos2maj_hash(pos); > - p->curr_minor_hash = pos2min_hash(pos); > + p->curr_hash = pos2maj_hash(filp, pos); > + p->curr_minor_hash = pos2min_hash(filp, pos); > return p; > } > > @@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, > printk("call_filldir: called with null fname?!?\n"); > return 0; > } > - curr_pos = hash2pos(fname->hash, fname->minor_hash); > + curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); > while (fname) { > error = filldir(dirent, fname->name, > fname->name_len, curr_pos, > @@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, > int ret; > > if (!info) { > - info = ext3_htree_create_dir_info(filp->f_pos); > + info = ext3_htree_create_dir_info(filp, filp->f_pos); > if (!info) > return -ENOMEM; > filp->private_data = info; > } > > - if (filp->f_pos == EXT3_HTREE_EOF) > + if (filp->f_pos == ext3_get_htree_eof(filp)) > return 0; /* EOF */ > > /* Some one has messed with f_pos; reset the world */ > @@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, > free_rb_tree_fname(&info->root); > info->curr_node = NULL; > info->extra_fname = NULL; > - info->curr_hash = pos2maj_hash(filp->f_pos); > - info->curr_minor_hash = pos2min_hash(filp->f_pos); > + info->curr_hash = pos2maj_hash(filp, filp->f_pos); > + info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); > } > > /* > @@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, > if (ret < 0) > return ret; > if (ret == 0) { > - filp->f_pos = EXT3_HTREE_EOF; > + filp->f_pos = ext3_get_htree_eof(filp); > break; > } > info->curr_node = rb_first(&info->root); > @@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, > info->curr_minor_hash = fname->minor_hash; > } else { > if (info->next_hash == ~0) { > - filp->f_pos = EXT3_HTREE_EOF; > + filp->f_pos = ext3_get_htree_eof(filp); > break; > } > info->curr_hash = info->next_hash; > @@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) > > return 0; > } > + > +const struct file_operations ext3_dir_operations = { > + .llseek = ext3_dir_llseek, > + .read = generic_read_dir, > + .readdir = ext3_readdir, > + .unlocked_ioctl = ext3_ioctl, > +#ifdef CONFIG_COMPAT > + .compat_ioctl = ext3_compat_ioctl, > +#endif > + .fsync = ext3_sync_file, > + .release = ext3_release_dir, > +}; > diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h > index b6515fd..fe5bef7 100644 > --- a/fs/ext3/ext3.h > +++ b/fs/ext3/ext3.h > @@ -920,7 +920,11 @@ struct dx_hash_info > u32 *seed; > }; > > -#define EXT3_HTREE_EOF 0x7fffffff > + > +/* 32 and 64 bit signed EOF for dx directories */ > +#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) > +#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) > + > > /* > * Control parameters used by ext3_htree_next_block > diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c > index d10231d..ede315c 100644 > --- a/fs/ext3/hash.c > +++ b/fs/ext3/hash.c > @@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) > return -1; > } > hash = hash & ~1; > - if (hash == (EXT3_HTREE_EOF << 1)) > - hash = (EXT3_HTREE_EOF-1) << 1; > + if (hash == (EXT3_HTREE_EOF_32BIT << 1)) > + hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; > hinfo->hash = hash; > hinfo->minor_hash = minor_hash; > return 0; >
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index cc761ad..92490e9 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -21,30 +21,15 @@ * */ +#include <linux/compat.h> #include "ext3.h" static unsigned char ext3_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; -static int ext3_readdir(struct file *, void *, filldir_t); static int ext3_dx_readdir(struct file * filp, void * dirent, filldir_t filldir); -static int ext3_release_dir (struct inode * inode, - struct file * filp); - -const struct file_operations ext3_dir_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = ext3_readdir, /* we take BKL. needed?*/ - .unlocked_ioctl = ext3_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext3_compat_ioctl, -#endif - .fsync = ext3_sync_file, /* BKL held */ - .release = ext3_release_dir, -}; - static unsigned char get_dtype(struct super_block *sb, int filetype) { @@ -55,6 +40,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) return (ext3_filetype_table[filetype]); } +/** + * Check if the given dir-inode refers to an htree-indexed directory + * (or a directory which chould potentially get coverted to use htree + * indexing). + * + * Return 1 if it is a dx dir, 0 if not + */ +static int is_dx_dir(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, + EXT3_FEATURE_COMPAT_DIR_INDEX) && + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || + ((inode->i_size >> sb->s_blocksize_bits) == 1))) + return 1; + + return 0; +} int ext3_check_dir_entry (const char * function, struct inode * dir, struct ext3_dir_entry_2 * de, @@ -94,18 +98,13 @@ static int ext3_readdir(struct file * filp, unsigned long offset; int i, stored; struct ext3_dir_entry_2 *de; - struct super_block *sb; int err; struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; int ret = 0; int dir_has_error = 0; - sb = inode->i_sb; - - if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, - EXT3_FEATURE_COMPAT_DIR_INDEX) && - ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { + if (is_dx_dir(inode)) { err = ext3_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) { ret = err; @@ -227,22 +226,87 @@ out: return ret; } +static inline int is_32bit_api(void) +{ +#ifdef CONFIG_COMPAT + return is_compat_task(); +#else + return (BITS_PER_LONG == 32); +#endif +} + /* * These functions convert from the major/minor hash to an f_pos - * value. + * value for dx directories * - * Currently we only use major hash numer. This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of - * lseek/telldir/seekdir will blow out spectacularly, and from within - * the ext2 low-level routine, we don't know if we're being called by - * a 64-bit version of the system call or the 32-bit version of the - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie. Sigh. + * Upper layer (for example NFS) should specify FMODE_32BITHASH or + * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted + * directly on both 32-bit and 64-bit nodes, under such case, neither + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. */ -#define hash2pos(major, minor) (major >> 1) -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -#define pos2min_hash(pos) (0) +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return major >> 1; + else + return ((__u64)(major >> 1) << 32) | (__u64)minor; +} + +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return (pos << 1) & 0xffffffff; + else + return ((pos >> 32) << 1) & 0xffffffff; +} + +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return 0; + else + return pos & 0xffffffff; +} + +/* + * Return 32- or 64-bit end-of-file for dx directories + */ +static inline loff_t ext3_get_htree_eof(struct file *filp) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return EXT3_HTREE_EOF_32BIT; + else + return EXT3_HTREE_EOF_64BIT; +} + + +/* + * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both + * non-htree and htree directories, where the "offset" is in terms + * of the filename hash value instead of the byte offset. + * + * Because we may return a 64-bit hash that is well beyond s_maxbytes, + * we need to pass the max hash as the maximum allowable offset in + * the htree directory case. + * + * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) + * will be invalid once the directory was converted into a dx directory + */ +loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + int dx_dir = is_dx_dir(inode); + + if (likely(dx_dir)) + return generic_file_llseek_size(file, offset, origin, + ext3_get_htree_eof(file)); + else + return generic_file_llseek(file, offset, origin); +} /* * This structure holds the nodes of the red-black tree used to store @@ -303,15 +367,16 @@ static void free_rb_tree_fname(struct rb_root *root) } -static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos) +static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, + loff_t pos) { struct dir_private_info *p; p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); if (!p) return NULL; - p->curr_hash = pos2maj_hash(pos); - p->curr_minor_hash = pos2min_hash(pos); + p->curr_hash = pos2maj_hash(filp, pos); + p->curr_minor_hash = pos2min_hash(filp, pos); return p; } @@ -401,7 +466,7 @@ static int call_filldir(struct file * filp, void * dirent, printk("call_filldir: called with null fname?!?\n"); return 0; } - curr_pos = hash2pos(fname->hash, fname->minor_hash); + curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); while (fname) { error = filldir(dirent, fname->name, fname->name_len, curr_pos, @@ -426,13 +491,13 @@ static int ext3_dx_readdir(struct file * filp, int ret; if (!info) { - info = ext3_htree_create_dir_info(filp->f_pos); + info = ext3_htree_create_dir_info(filp, filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; } - if (filp->f_pos == EXT3_HTREE_EOF) + if (filp->f_pos == ext3_get_htree_eof(filp)) return 0; /* EOF */ /* Some one has messed with f_pos; reset the world */ @@ -440,8 +505,8 @@ static int ext3_dx_readdir(struct file * filp, free_rb_tree_fname(&info->root); info->curr_node = NULL; info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(filp->f_pos); - info->curr_minor_hash = pos2min_hash(filp->f_pos); + info->curr_hash = pos2maj_hash(filp, filp->f_pos); + info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); } /* @@ -473,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, if (ret < 0) return ret; if (ret == 0) { - filp->f_pos = EXT3_HTREE_EOF; + filp->f_pos = ext3_get_htree_eof(filp); break; } info->curr_node = rb_first(&info->root); @@ -493,7 +558,7 @@ static int ext3_dx_readdir(struct file * filp, info->curr_minor_hash = fname->minor_hash; } else { if (info->next_hash == ~0) { - filp->f_pos = EXT3_HTREE_EOF; + filp->f_pos = ext3_get_htree_eof(filp); break; } info->curr_hash = info->next_hash; @@ -512,3 +577,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) return 0; } + +const struct file_operations ext3_dir_operations = { + .llseek = ext3_dir_llseek, + .read = generic_read_dir, + .readdir = ext3_readdir, + .unlocked_ioctl = ext3_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif + .fsync = ext3_sync_file, + .release = ext3_release_dir, +}; diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index b6515fd..fe5bef7 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h @@ -920,7 +920,11 @@ struct dx_hash_info u32 *seed; }; -#define EXT3_HTREE_EOF 0x7fffffff + +/* 32 and 64 bit signed EOF for dx directories */ +#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) +#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) + /* * Control parameters used by ext3_htree_next_block diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c index d10231d..ede315c 100644 --- a/fs/ext3/hash.c +++ b/fs/ext3/hash.c @@ -198,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) return -1; } hash = hash & ~1; - if (hash == (EXT3_HTREE_EOF << 1)) - hash = (EXT3_HTREE_EOF-1) << 1; + if (hash == (EXT3_HTREE_EOF_32BIT << 1)) + hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; hinfo->hash = hash; hinfo->minor_hash = minor_hash; return 0;
This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a ext4: return 32/64-bit dir name hash according to usage type by Fan Yong <yong.fan@whamcloud.com> Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() to appease NFSv2, which can only handle a 32-bit cookie for seekdir() and telldir(). However, this causes problems if there are 32-bit hash collisions, since the NFSv2 server can get stuck resending the same entries from the directory repeatedly. Allow ext3 to return a full 64-bit hash (both major and minor) for telldir to decrease the chance of hash collisions. This patch does implement a new ext3_dir_llseek op, because with 64-bit hashes, nfs will attempt to seek to a hash "offset" which is much larger than ext3's s_maxbytes. So for dx dirs, we call generic_file_llseek_size() with the appropriate max hash value as the maximum seekable size. Otherwise we just pass through to generic_file_llseek(). Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> Patch-updated-by: Eric Sandeen <sandeen@redhat.com> (blame us if something is not correct) Signed-off-by: Eric Sandeen <sandeen@redhat.com> --- -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html