Experiment with scanning using openat() rather than chdir()

A slightly different approach. In terms of syscalls it probably doesn't
matter at all, but it's a simpler, less hacky and maybe more efficient
way to work around PATH_MAX limitations than constantly chdir()'ing for
each directory.  Should also fix the chdir("..") fail case in MacOS
firmlink scenarios, though I haven't a clue if MacOS implements all
these openat()-related calls in the first place.

Downsides:
- Less portable. Everything is in POSIX, but it's more recent than what
  we used to use.
- Requires more open file descriptions, might hit the limit for deeply
  nested directories.

Haven't done much testing yet, but there's a bunch of TODO's:
- Make refreshing work again (not sure what's going wrong there)
- See if we can report readdir() etc errors again?
- Make firmlink detection work again?
- See how portable this really is.
- Also update file deletion code
This commit is contained in:
Yorhel 2021-03-04 17:13:37 +01:00
parent 9337cdc99e
commit 783bcb5b78
6 changed files with 87 additions and 173 deletions

View file

@ -28,7 +28,7 @@ AC_CHECK_FUNCS(
[getcwd gettimeofday fnmatch chdir rmdir unlink lstat system getenv],[], [getcwd gettimeofday fnmatch chdir rmdir unlink lstat system getenv],[],
AC_MSG_ERROR([required function missing])) AC_MSG_ERROR([required function missing]))
AC_CHECK_FUNCS(statfs) AC_CHECK_FUNCS(fstatfs)
AC_CHECK_HEADERS([sys/attr.h]) AC_CHECK_HEADERS([sys/attr.h])

View file

@ -113,7 +113,7 @@ void dir_scan_init(const char *path);
extern int dir_import_active; extern int dir_import_active;
int dir_import_init(const char *fn); int dir_import_init(const char *fn);
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS #if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
extern int exclude_kernfs; extern int exclude_kernfs;
#endif #endif

View file

@ -30,6 +30,7 @@
#include <errno.h> #include <errno.h>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <dirent.h> #include <dirent.h>
@ -38,7 +39,7 @@
#include <sys/attr.h> #include <sys/attr.h>
#endif #endif
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS #if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
#include <sys/statfs.h> #include <sys/statfs.h>
#include <linux/magic.h> #include <linux/magic.h>
#endif #endif
@ -59,7 +60,7 @@ static struct dir *buf_dir;
static struct dir_ext buf_ext[1]; static struct dir_ext buf_ext[1];
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS #if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
int exclude_kernfs; /* Exclude Linux pseudo filesystems */ int exclude_kernfs; /* Exclude Linux pseudo filesystems */
static int is_kernfs(unsigned long type) { static int is_kernfs(unsigned long type) {
@ -138,112 +139,14 @@ static void stat_to_dir(struct stat *fs) {
} }
/* Reads all filenames in the currently chdir'ed directory and stores it as a static int dir_walk(int);
* nul-separated list of filenames. The list ends with an empty filename (i.e.
* two nuls). . and .. are not included. Returned memory should be freed. *err
* is set to 1 if some error occurred. Returns NULL if that error was fatal.
* The reason for reading everything in memory first and then walking through
* the list is to avoid eating too many file descriptors in a deeply recursive
* directory. */
static char *dir_read(int *err) {
DIR *dir;
struct dirent *item;
char *buf = NULL;
size_t buflen = 512;
size_t off = 0;
if((dir = opendir(".")) == NULL) {
*err = 1;
return NULL;
}
buf = xmalloc(buflen);
errno = 0;
while((item = readdir(dir)) != NULL) {
if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0)))
continue;
size_t req = off+3+strlen(item->d_name);
if(req > buflen) {
buflen = req < buflen*2 ? buflen*2 : req;
buf = xrealloc(buf, buflen);
}
strcpy(buf+off, item->d_name);
off += strlen(item->d_name)+1;
}
if(errno)
*err = 1;
if(closedir(dir) < 0)
*err = 1;
buf[off] = 0;
buf[off+1] = 0;
return buf;
}
static int dir_walk(char *);
/* Tries to recurse into the current directory item (buf_dir is assumed to be the current dir) */
static int dir_scan_recurse(const char *name) {
int fail = 0;
char *dir;
if(chdir(name)) {
dir_setlasterr(dir_curpath);
buf_dir->flags |= FF_ERR;
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
return 0;
}
if((dir = dir_read(&fail)) == NULL) {
dir_setlasterr(dir_curpath);
buf_dir->flags |= FF_ERR;
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
if(chdir("..")) {
dir_seterr("Error going back to parent directory: %s", strerror(errno));
return 1;
} else
return 0;
}
/* readdir() failed halfway, not fatal. */
if(fail)
buf_dir->flags |= FF_ERR;
if(dir_output.item(buf_dir, name, buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
fail = dir_walk(dir);
if(dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
/* Not being able to chdir back is fatal */
if(!fail && chdir("..")) {
dir_seterr("Error going back to parent directory: %s", strerror(errno));
return 1;
}
return fail;
}
/* Scans and adds a single item. Recurses into dir_walk() again if this is a /* Scans and adds a single item. Recurses into dir_walk() again if this is a
* directory. Assumes we're chdir'ed in the directory in which this item * directory. */
* resides. */ static int dir_scan_item(int parfd, const char *name) {
static int dir_scan_item(const char *name) {
static struct stat st, stl; static struct stat st, stl;
int fail = 0; int fail = 0, dirfd = -1;
#ifdef __CYGWIN__ #ifdef __CYGWIN__
/* /proc/registry names may contain slashes */ /* /proc/registry names may contain slashes */
@ -256,15 +159,20 @@ static int dir_scan_item(const char *name) {
if(exclude_match(dir_curpath)) if(exclude_match(dir_curpath))
buf_dir->flags |= FF_EXL; buf_dir->flags |= FF_EXL;
if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && lstat(name, &st)) { if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && fstatat(parfd, name, &st, AT_SYMLINK_NOFOLLOW)) {
buf_dir->flags |= FF_ERR; buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath); dir_setlasterr(dir_curpath);
} }
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode) && (dirfd = openat(parfd, name, O_RDONLY|O_DIRECTORY)) < 0) {
if(exclude_kernfs && !(buf_dir->flags & (FF_ERR|FF_EXL)) && S_ISDIR(st.st_mode)) { buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath);
}
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
if(exclude_kernfs && dirfd >= 0) {
struct statfs fst; struct statfs fst;
if(statfs(name, &fst)) { if(fstatfs(dirfd, &fst)) {
buf_dir->flags |= FF_ERR; buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath); dir_setlasterr(dir_curpath);
} else if(is_kernfs(fst.f_type)) } else if(is_kernfs(fst.f_type))
@ -272,7 +180,8 @@ static int dir_scan_item(const char *name) {
} }
#endif #endif
#if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH /* TODO: Completely broken; prolly needs absolute path lookup */
#if 0 && HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH
if(!follow_firmlinks) { if(!follow_firmlinks) {
struct attrlist list = { struct attrlist list = {
.bitmapcount = ATTR_BIT_MAP_COUNT, .bitmapcount = ATTR_BIT_MAP_COUNT,
@ -292,60 +201,79 @@ static int dir_scan_item(const char *name) {
#endif #endif
if(!(buf_dir->flags & (FF_ERR|FF_EXL))) { if(!(buf_dir->flags & (FF_ERR|FF_EXL))) {
if(follow_symlinks && S_ISLNK(st.st_mode) && !stat(name, &stl) && !S_ISDIR(stl.st_mode)) if(follow_symlinks && S_ISLNK(st.st_mode) && !fstatat(parfd, name, &stl, 0) && !S_ISDIR(stl.st_mode))
stat_to_dir(&stl); stat_to_dir(&stl);
else else
stat_to_dir(&st); stat_to_dir(&st);
} }
if(cachedir_tags && (buf_dir->flags & FF_DIR) && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) if(cachedir_tags && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK)))
if(has_cachedir_tag(name)) { if(has_cachedir_tag(dirfd)) {
buf_dir->flags |= FF_EXL; buf_dir->flags |= FF_EXL;
buf_dir->size = buf_dir->asize = 0; buf_dir->size = buf_dir->asize = 0;
} }
/* Recurse into the dir or output the item */ if(dir_output.item(buf_dir, name, buf_ext)) {
if(buf_dir->flags & FF_DIR && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK)))
fail = dir_scan_recurse(name);
else if(buf_dir->flags & FF_DIR) {
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno)); dir_seterr("Output error: %s", strerror(errno));
fail = 1; fail = 1;
} }
} else if(dir_output.item(buf_dir, name, buf_ext)) {
if(!fail && dirfd >= 0 && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS|FF_KERNFS|FF_FRMLNK))) {
/* XXX: Can't do anything with the return value, since we've already outputted our dir entry item.
* So errors reading dir items will be silently ignored. Not great. */
dir_walk(dirfd);
dirfd = -1;
}
if(!fail && (buf_dir->flags & FF_DIR) && dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno)); dir_seterr("Output error: %s", strerror(errno));
fail = 1; fail = 1;
} }
if(dirfd >= 0)
close(dirfd);
return fail || input_handle(1); return fail || input_handle(1);
} }
/* Walks through the directory that we're currently chdir'ed to. *dir contains /* Recursively walks through the directory descriptor. Will close() the given dirfd. */
* the filenames as returned by dir_read(), and will be freed automatically by static int dir_walk(int dirfd) {
* this function. */
static int dir_walk(char *dir) {
int fail = 0; int fail = 0;
char *cur; DIR *dir;
struct dirent *item;
fail = 0; /* Illegal behavior: We're giving dirfd to fdopendir(), which in turn takes
for(cur=dir; !fail&&cur&&*cur; cur+=strlen(cur)+1) { * control of the fd and we shouldn't be using it again. Yet we do use it
dir_curpath_enter(cur); * later on for openat() calls. I doubt this will be a problem, but may need
* further testing. The alternative is to dup(), but that makes us run out of
* descriptors twice as fast... */
if((dir = fdopendir(dirfd)) == NULL) {
close(dirfd);
return -1;
}
while((item = readdir(dir)) != NULL) {
if(item->d_name[0] == '.' && (item->d_name[1] == 0 || (item->d_name[1] == '.' && item->d_name[2] == 0)))
continue;
dir_curpath_enter(item->d_name);
memset(buf_dir, 0, offsetof(struct dir, name)); memset(buf_dir, 0, offsetof(struct dir, name));
memset(buf_ext, 0, sizeof(struct dir_ext)); memset(buf_ext, 0, sizeof(struct dir_ext));
fail = dir_scan_item(cur); fail |= dir_scan_item(dirfd, item->d_name);
dir_curpath_leave(); dir_curpath_leave();
} }
free(dir); if(errno)
fail = 1;
if(closedir(dir) < 0)
fail = 1;
return fail; return fail;
} }
static int process(void) { static int process(void) {
char *path; char *path;
char *dir; int fail = 0, dirfd = -1;
int fail = 0;
struct stat fs; struct stat fs;
memset(buf_dir, 0, offsetof(struct dir, name)); memset(buf_dir, 0, offsetof(struct dir, name));
@ -361,15 +289,12 @@ static int process(void) {
if(!dir_fatalerr && path_chdir(dir_curpath) < 0) if(!dir_fatalerr && path_chdir(dir_curpath) < 0)
dir_seterr("Error changing directory: %s", strerror(errno)); dir_seterr("Error changing directory: %s", strerror(errno));
/* Can these even fail after a chdir? */ if(!dir_fatalerr && (dirfd = open(".", O_RDONLY|O_DIRECTORY)) < 0)
if(!dir_fatalerr && lstat(".", &fs) != 0)
dir_seterr("Error obtaining directory information: %s", strerror(errno));
if(!dir_fatalerr && !S_ISDIR(fs.st_mode))
dir_seterr("Not a directory");
if(!dir_fatalerr && !(dir = dir_read(&fail)))
dir_seterr("Error reading directory: %s", strerror(errno)); dir_seterr("Error reading directory: %s", strerror(errno));
if(!dir_fatalerr && fstat(dirfd, &fs) != 0)
dir_seterr("Error obtaining directory information: %s", strerror(errno));
if(!dir_fatalerr) { if(!dir_fatalerr) {
curdev = (uint64_t)fs.st_dev; curdev = (uint64_t)fs.st_dev;
if(fail) if(fail)
@ -380,14 +305,19 @@ static int process(void) {
dir_seterr("Output error: %s", strerror(errno)); dir_seterr("Output error: %s", strerror(errno));
fail = 1; fail = 1;
} }
if(!fail) if(!fail) {
fail = dir_walk(dir); fail = dir_walk(dirfd);
dirfd = -1;
}
if(!fail && dir_output.item(NULL, 0, NULL)) { if(!fail && dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno)); dir_seterr("Output error: %s", strerror(errno));
fail = 1; fail = 1;
} }
} }
if(dirfd >= 0)
close(dirfd);
while(dir_fatalerr && !input_handle(0)) while(dir_fatalerr && !input_handle(0))
; ;
return dir_output.final(dir_fatalerr || fail); return dir_output.final(dir_fatalerr || fail);

View file

@ -29,6 +29,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <fnmatch.h> #include <fnmatch.h>
#include <unistd.h>
#include <fcntl.h>
static struct exclude { static struct exclude {
@ -105,35 +107,17 @@ void exclude_clear() {
* Exclusion of directories that contain only cached information. * Exclusion of directories that contain only cached information.
* See http://www.brynosaurus.com/cachedir/ * See http://www.brynosaurus.com/cachedir/
*/ */
#define CACHEDIR_TAG_FILENAME "CACHEDIR.TAG"
#define CACHEDIR_TAG_SIGNATURE "Signature: 8a477f597d28d172789f06886806bc55" #define CACHEDIR_TAG_SIGNATURE "Signature: 8a477f597d28d172789f06886806bc55"
int has_cachedir_tag(const char *name) { int has_cachedir_tag(int dirfd) {
static int path_l = 1024; int fd = -1, match = 0;
static char *path = NULL;
int l;
char buf[sizeof CACHEDIR_TAG_SIGNATURE - 1]; char buf[sizeof CACHEDIR_TAG_SIGNATURE - 1];
FILE *f;
int match = 0;
/* Compute the required length for `path`. */ /* Assumption: We won't get a short read() when fetching the tag. */
l = strlen(name) + sizeof CACHEDIR_TAG_FILENAME + 2; match = (fd = openat(dirfd, "CACHEDIR.TAG", O_RDONLY)) >= 0
if(l > path_l || path == NULL) { && read(fd, buf, sizeof buf) == sizeof buf
path_l = path_l * 2; && !memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf);
if(path_l < l) if(fd >= 0)
path_l = l; close(fd);
/* We don't need to copy the content of `path`, so it's more efficient to
* use `free` + `malloc`. */
free(path);
path = xmalloc(path_l);
}
snprintf(path, path_l, "%s/%s", name, CACHEDIR_TAG_FILENAME);
f = fopen(path, "rb");
if(f != NULL) {
match = ((fread(buf, 1, sizeof buf, f) == sizeof buf) &&
!memcmp(buf, CACHEDIR_TAG_SIGNATURE, sizeof buf));
fclose(f);
}
return match; return match;
} }

View file

@ -30,6 +30,6 @@ void exclude_add(char *);
int exclude_addfile(char *); int exclude_addfile(char *);
int exclude_match(char *); int exclude_match(char *);
void exclude_clear(void); void exclude_clear(void);
int has_cachedir_tag(const char *name); int has_cachedir_tag(int);
#endif #endif

View file

@ -170,7 +170,7 @@ static void argv_parse(int argc, char **argv) {
printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n"); printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n");
printf(" -L, --follow-symlinks Follow symbolic links (excluding directories)\n"); printf(" -L, --follow-symlinks Follow symbolic links (excluding directories)\n");
printf(" --exclude-caches Exclude directories containing CACHEDIR.TAG\n"); printf(" --exclude-caches Exclude directories containing CACHEDIR.TAG\n");
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS #if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
printf(" --exclude-kernfs Exclude Linux pseudo filesystems (procfs,sysfs,cgroup,...)\n"); printf(" --exclude-kernfs Exclude Linux pseudo filesystems (procfs,sysfs,cgroup,...)\n");
#endif #endif
#if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH #if HAVE_SYS_ATTR_H && HAVE_GETATTRLIST && HAVE_DECL_ATTR_CMNEXT_NOFIRMLINKPATH
@ -206,7 +206,7 @@ static void argv_parse(int argc, char **argv) {
break; break;
case 2 : /* --exclude-kernfs */ case 2 : /* --exclude-kernfs */
#if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_STATFS #if HAVE_LINUX_MAGIC_H && HAVE_SYS_STATFS_H && HAVE_FSTATFS
exclude_kernfs = 1; break; exclude_kernfs = 1; break;
#else #else
fprintf(stderr, "This feature is not supported on your platform\n"); fprintf(stderr, "This feature is not supported on your platform\n");