*
*/
+#include <sys/ioctl.h>
#include <pthread.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include "osdep.h"
+#include "qemu-common.h"
#include "posix-aio-compat.h"
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
static pthread_t thread_id;
+static pthread_attr_t attr;
static int max_threads = 64;
static int cur_threads = 0;
static int idle_threads = 0;
static TAILQ_HEAD(, qemu_paiocb) request_list;
+#ifdef HAVE_PREADV
+static int preadv_present = 1;
+#else
+static int preadv_present = 0;
+#endif
+
static void die2(int err, const char *what)
{
fprintf(stderr, "%s failed: %s\n", what, strerror(err));
if (ret) die2(ret, "pthread_create");
}
+static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
+{
+ int ret;
+
+ ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
+ if (ret == -1)
+ return -errno;
+
+ /*
+ * This looks weird, but the aio code only consideres a request
+ * successfull if it has written the number full number of bytes.
+ *
+ * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
+ * so in fact we return the ioctl command here to make posix_aio_read()
+ * happy..
+ */
+ return aiocb->aio_nbytes;
+}
+
+#ifdef HAVE_PREADV
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+ return preadv(fd, iov, nr_iov, offset);
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+ return pwritev(fd, iov, nr_iov, offset);
+}
+
+#else
+
+static ssize_t
+qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+ return -ENOSYS;
+}
+
+static ssize_t
+qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+{
+ return -ENOSYS;
+}
+
+#endif
+
+/*
+ * Check if we need to copy the data in the aiocb into a new
+ * properly aligned buffer.
+ */
+static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
+{
+ if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov; i++)
+ if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
+ return 1;
+ }
+
+ return 0;
+}
+
+static size_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
+{
+ size_t offset = 0;
+ ssize_t len;
+
+ do {
+ if (aiocb->aio_type == QEMU_PAIO_WRITE)
+ len = qemu_pwritev(aiocb->aio_fildes,
+ aiocb->aio_iov,
+ aiocb->aio_niov,
+ aiocb->aio_offset + offset);
+ else
+ len = qemu_preadv(aiocb->aio_fildes,
+ aiocb->aio_iov,
+ aiocb->aio_niov,
+ aiocb->aio_offset + offset);
+ } while (len == -1 && errno == EINTR);
+
+ if (len == -1)
+ return -errno;
+ return len;
+}
+
+static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
+{
+ size_t offset = 0;
+ size_t len;
+
+ while (offset < aiocb->aio_nbytes) {
+ if (aiocb->aio_type == QEMU_PAIO_WRITE)
+ len = pwrite(aiocb->aio_fildes,
+ (const char *)buf + offset,
+ aiocb->aio_nbytes - offset,
+ aiocb->aio_offset + offset);
+ else
+ len = pread(aiocb->aio_fildes,
+ buf + offset,
+ aiocb->aio_nbytes - offset,
+ aiocb->aio_offset + offset);
+
+ if (len == -1 && errno == EINTR)
+ continue;
+ else if (len == -1) {
+ offset = -errno;
+ break;
+ } else if (len == 0)
+ break;
+
+ offset += len;
+ }
+
+ return offset;
+}
+
+static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
+{
+ size_t nbytes;
+ char *buf;
+
+ if (!aiocb_needs_copy(aiocb)) {
+ /*
+ * If there is just a single buffer, and it is properly aligned
+ * we can just use plain pread/pwrite without any problems.
+ */
+ if (aiocb->aio_niov == 1)
+ return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
+
+ /*
+ * We have more than one iovec, and all are properly aligned.
+ *
+ * Try preadv/pwritev first and fall back to linearizing the
+ * buffer if it's not supported.
+ */
+ if (preadv_present) {
+ nbytes = handle_aiocb_rw_vector(aiocb);
+ if (nbytes == aiocb->aio_nbytes)
+ return nbytes;
+ if (nbytes < 0 && nbytes != -ENOSYS)
+ return nbytes;
+ preadv_present = 0;
+ }
+
+ /*
+ * XXX(hch): short read/write. no easy way to handle the reminder
+ * using these interfaces. For now retry using plain
+ * pread/pwrite?
+ */
+ }
+
+ /*
+ * Ok, we have to do it the hard way, copy all segments into
+ * a single aligned buffer.
+ */
+ buf = qemu_memalign(512, aiocb->aio_nbytes);
+ if (aiocb->aio_type == QEMU_PAIO_WRITE) {
+ char *p = buf;
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov; ++i) {
+ memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
+ p += aiocb->aio_iov[i].iov_len;
+ }
+ }
+
+ nbytes = handle_aiocb_rw_linear(aiocb, buf);
+ if (aiocb->aio_type != QEMU_PAIO_WRITE) {
+ char *p = buf;
+ size_t count = aiocb->aio_nbytes, copy;
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov && count; ++i) {
+ copy = count;
+ if (copy > aiocb->aio_iov[i].iov_len)
+ copy = aiocb->aio_iov[i].iov_len;
+ memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+ p += copy;
+ count -= copy;
+ }
+ }
+ qemu_vfree(buf);
+
+ return nbytes;
+}
+
static void *aio_thread(void *unused)
{
+ pid_t pid;
sigset_t set;
+ pid = getpid();
+
/* block all signals */
if (sigfillset(&set)) die("sigfillset");
if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask");
while (1) {
struct qemu_paiocb *aiocb;
- size_t offset;
- int ret = 0;
+ size_t ret = 0;
qemu_timeval tv;
struct timespec ts;
ret = cond_timedwait(&cond, &lock, &ts);
}
- if (ret == ETIMEDOUT)
+ if (TAILQ_EMPTY(&request_list))
break;
aiocb = TAILQ_FIRST(&request_list);
TAILQ_REMOVE(&request_list, aiocb, node);
-
- offset = 0;
aiocb->active = 1;
-
idle_threads--;
mutex_unlock(&lock);
- while (offset < aiocb->aio_nbytes) {
- ssize_t len;
-
- if (aiocb->is_write)
- len = pwrite(aiocb->aio_fildes,
- (const char *)aiocb->aio_buf + offset,
- aiocb->aio_nbytes - offset,
- aiocb->aio_offset + offset);
- else
- len = pread(aiocb->aio_fildes,
- (char *)aiocb->aio_buf + offset,
- aiocb->aio_nbytes - offset,
- aiocb->aio_offset + offset);
-
- if (len == -1 && errno == EINTR)
- continue;
- else if (len == -1) {
- offset = -errno;
- break;
- } else if (len == 0)
- break;
-
- offset += len;
- }
+ switch (aiocb->aio_type) {
+ case QEMU_PAIO_READ:
+ case QEMU_PAIO_WRITE:
+ ret = handle_aiocb_rw(aiocb);
+ break;
+ case QEMU_PAIO_IOCTL:
+ ret = handle_aiocb_ioctl(aiocb);
+ break;
+ default:
+ fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
+ ret = -EINVAL;
+ break;
+ }
mutex_lock(&lock);
- aiocb->ret = offset;
+ aiocb->ret = ret;
idle_threads++;
mutex_unlock(&lock);
- if (kill(getpid(), aiocb->ev_signo)) die("kill failed");
+ if (kill(pid, aiocb->ev_signo)) die("kill failed");
}
idle_threads--;
static void spawn_thread(void)
{
- int ret;
- pthread_attr_t attr;
-
cur_threads++;
idle_threads++;
-
- ret = pthread_attr_init(&attr);
- if (ret) die2 (ret, "pthread_attr_init");
- ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
- if (ret) die2 (ret, "pthread_attr_setdetachstate");
thread_create(&thread_id, &attr, aio_thread, NULL);
- ret = pthread_attr_destroy(&attr);
- if (ret) die2 (ret, "pthread_attr_destroy");
}
int qemu_paio_init(struct qemu_paioinit *aioinit)
{
+ int ret;
+
+ ret = pthread_attr_init(&attr);
+ if (ret) die2(ret, "pthread_attr_init");
+
+ ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ if (ret) die2(ret, "pthread_attr_setdetachstate");
+
TAILQ_INIT(&request_list);
return 0;
}
-static int qemu_paio_submit(struct qemu_paiocb *aiocb, int is_write)
+static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
{
- aiocb->is_write = is_write;
+ aiocb->aio_type = type;
aiocb->ret = -EINPROGRESS;
aiocb->active = 0;
mutex_lock(&lock);
int qemu_paio_read(struct qemu_paiocb *aiocb)
{
- return qemu_paio_submit(aiocb, 0);
+ return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
}
int qemu_paio_write(struct qemu_paiocb *aiocb)
{
- return qemu_paio_submit(aiocb, 1);
+ return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
+}
+
+int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
+{
+ return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
}
ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)