From 12291656b135ed788e41dadbd2d15e613ddea9b5 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 12 Jul 2011 08:35:05 -0600 Subject: [PATCH] save: let iohelper work on O_DIRECT fds Required for a coming patch where iohelper will operate on O_DIRECT fds. There, the user-space memory must be aligned to file system boundaries (at least 512, but using page-aligned works better, and some file systems prefer 64k). Made tougher by the fact that VIR_ALLOC won't work on void *, but posix_memalign won't work on char * and isn't available everywhere. This patch makes some simplifying assumptions - namely, output to an O_DIRECT fd will only be attempted on an empty seekable file (hence, no need to worry about preserving existing data on a partial block, and ftruncate will work to undo the effects of having to round up the size of the last block written), and input from an O_DIRECT fd will only be attempted on a complete seekable file with the only possible short read at EOF. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign. * src/util/iohelper.c (runIO): Use aligned memory, and handle quirks of O_DIRECT on last write. --- configure.ac | 6 +++--- src/util/iohelper.c | 52 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index e9d5be4858..9e39f44e2a 100644 --- a/configure.ac +++ b/configure.ac @@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long]) dnl Availability of various common functions (non-fatal if missing), dnl and various less common threadsafe functions -AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \ - geteuid initgroups posix_fallocate mmap kill \ - getmntent_r getgrnam_r getpwuid_r]) +AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \ + getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \ + regexec sched_getaffinity]) dnl Availability of pthread functions (if missing, win32 threading is dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE. diff --git a/src/util/iohelper.c b/src/util/iohelper.c index 502bce586a..9e7bbdef92 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -74,17 +74,32 @@ cleanup: static int runIO(const char *path, int fd, int oflags, unsigned long long length) { - char *buf = NULL; + void *base = NULL; /* Location to be freed */ + char *buf = NULL; /* Aligned location within base */ size_t buflen = 1024*1024; + intptr_t alignMask = 64*1024 - 1; int ret = -1; int fdin, fdout; const char *fdinname, *fdoutname; unsigned long long total = 0; + bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); + bool shortRead = false; /* true if we hit a short read */ + off_t end = 0; - if (VIR_ALLOC_N(buf, buflen) < 0) { +#if HAVE_POSIX_MEMALIGN + if (posix_memalign(&base, alignMask + 1, buflen)) { virReportOOMError(); goto cleanup; } + buf = base; +#else + if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) { + virReportOOMError(); + goto cleanup; + } + base = buf; + buf = (char *) (((intptr_t) base + alignMask) & alignMask); +#endif switch (oflags & O_ACCMODE) { case O_RDONLY: @@ -92,12 +107,26 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) fdinname = path; fdout = STDOUT_FILENO; fdoutname = "stdout"; + /* To make the implementation simpler, we give up on any + * attempt to use O_DIRECT in a non-trivial manner. */ + if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0 || length)) { + virReportSystemError(end < 0 ? errno : EINVAL, "%s", + _("O_DIRECT read needs entire seekable file")); + goto cleanup; + } break; case O_WRONLY: fdin = STDIN_FILENO; fdinname = "stdin"; fdout = fd; fdoutname = path; + /* To make the implementation simpler, we give up on any + * attempt to use O_DIRECT in a non-trivial manner. */ + if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { + virReportSystemError(end < 0 ? errno : EINVAL, "%s", + _("O_DIRECT write needs empty seekable file")); + goto cleanup; + } break; case O_RDWR: @@ -124,12 +153,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) } if (got == 0) break; /* End of file before end of requested data */ + if (got < buflen || (buflen & alignMask)) { + /* O_DIRECT can handle at most one short read, at end of file */ + if (direct && shortRead) { + virReportSystemError(EINVAL, "%s", + _("Too many short reads for O_DIRECT")); + } + shortRead = true; + } total += got; + if (fdout == fd && direct && shortRead) { + end = total; + memset(buf + got, 0, buflen - got); + got = (got + alignMask) & ~alignMask; + } if (safewrite(fdout, buf, got) < 0) { virReportSystemError(errno, _("Unable to write %s"), fdoutname); goto cleanup; } + if (end && ftruncate(fd, end) < 0) { + virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); + goto cleanup; + } } ret = 0; @@ -141,7 +187,7 @@ cleanup: ret = -1; } - VIR_FREE(buf); + VIR_FREE(base); return ret; }