fixed refresh logic (initial patch by Igor Kovalenko)
[qemu] / block-qcow.c
index 34026a4..65e74e7 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Block driver for the QCOW format
  * 
- * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2004-2006 Fabrice Bellard
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -53,7 +53,7 @@ typedef struct QCowHeader {
 #define L2_CACHE_SIZE 16
 
 typedef struct BDRVQcowState {
-    int fd;
+    BlockDriverState *hd;
     int cluster_bits;
     int cluster_size;
     int cluster_sectors;
@@ -89,20 +89,16 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
         return 0;
 }
 
-static int qcow_open(BlockDriverState *bs, const char *filename)
+static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVQcowState *s = bs->opaque;
-    int fd, len, i, shift;
+    int len, i, shift, ret;
     QCowHeader header;
-    
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
-            return -1;
-    }
-    s->fd = fd;
-    if (read(fd, &header, sizeof(header)) != sizeof(header))
+
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
         goto fail;
     be32_to_cpus(&header.magic);
     be32_to_cpus(&header.version);
@@ -138,8 +134,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
     s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
     if (!s->l1_table)
         goto fail;
-    lseek(fd, s->l1_table_offset, SEEK_SET);
-    if (read(fd, s->l1_table, s->l1_size * sizeof(uint64_t)) != 
+    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != 
         s->l1_size * sizeof(uint64_t))
         goto fail;
     for(i = 0;i < s->l1_size; i++) {
@@ -162,8 +157,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
         len = header.backing_file_size;
         if (len > 1023)
             len = 1023;
-        lseek(fd, header.backing_file_offset, SEEK_SET);
-        if (read(fd, bs->backing_file, len) != len)
+        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
             goto fail;
         bs->backing_file[len] = '\0';
     }
@@ -174,7 +168,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
     qemu_free(s->l2_cache);
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
-    close(fd);
+    bdrv_delete(s->hd);
     return -1;
 }
 
@@ -276,14 +270,14 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         if (!allocate)
             return 0;
         /* allocate a new l2 entry */
-        l2_offset = lseek(s->fd, 0, SEEK_END);
+        l2_offset = bdrv_getlength(s->hd);
         /* round to cluster size */
         l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
         /* update the L1 entry */
         s->l1_table[l1_index] = l2_offset;
         tmp = cpu_to_be64(l2_offset);
-        lseek(s->fd, s->l1_table_offset + l1_index * sizeof(tmp), SEEK_SET);
-        if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+        if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), 
+                        &tmp, sizeof(tmp)) != sizeof(tmp))
             return 0;
         new_l2_table = 1;
     }
@@ -309,14 +303,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         }
     }
     l2_table = s->l2_cache + (min_index << s->l2_bits);
-    lseek(s->fd, l2_offset, SEEK_SET);
     if (new_l2_table) {
         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+        if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
             s->l2_size * sizeof(uint64_t))
             return 0;
     } else {
-        if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != 
+        if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 
             s->l2_size * sizeof(uint64_t))
             return 0;
     }
@@ -337,21 +330,20 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                overwritten */
             if (decompress_cluster(s, cluster_offset) < 0)
                 return 0;
-            cluster_offset = lseek(s->fd, 0, SEEK_END);
+            cluster_offset = bdrv_getlength(s->hd);
             cluster_offset = (cluster_offset + s->cluster_size - 1) & 
                 ~(s->cluster_size - 1);
             /* write the cluster content */
-            lseek(s->fd, cluster_offset, SEEK_SET);
-            if (write(s->fd, s->cluster_cache, s->cluster_size) != 
+            if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) != 
                 s->cluster_size)
                 return -1;
         } else {
-            cluster_offset = lseek(s->fd, 0, SEEK_END);
+            cluster_offset = bdrv_getlength(s->hd);
             if (allocate == 1) {
                 /* round to cluster size */
                 cluster_offset = (cluster_offset + s->cluster_size - 1) & 
                     ~(s->cluster_size - 1);
-                ftruncate(s->fd, cluster_offset + s->cluster_size);
+                bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
                 /* if encrypted, we must initialize the cluster
                    content which won't be written */
                 if (s->crypt_method && 
@@ -365,8 +357,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                                             s->cluster_data, 
                                             s->cluster_data + 512, 1, 1,
                                             &s->aes_encrypt_key);
-                            lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
-                            if (write(s->fd, s->cluster_data, 512) != 512)
+                            if (bdrv_pwrite(s->hd, cluster_offset + i * 512, 
+                                            s->cluster_data, 512) != 512)
                                 return -1;
                         }
                     }
@@ -379,8 +371,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         /* update L2 table */
         tmp = cpu_to_be64(cluster_offset);
         l2_table[l2_index] = tmp;
-        lseek(s->fd, l2_offset + l2_index * sizeof(tmp), SEEK_SET);
-        if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+        if (bdrv_pwrite(s->hd, 
+                        l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
             return 0;
     }
     return cluster_offset;
@@ -438,8 +430,7 @@ static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
     if (s->cluster_cache_offset != coffset) {
         csize = cluster_offset >> (63 - s->cluster_bits);
         csize &= (s->cluster_size - 1);
-        lseek(s->fd, coffset, SEEK_SET);
-        ret = read(s->fd, s->cluster_data, csize);
+        ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
         if (ret != csize) 
             return -1;
         if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -451,6 +442,8 @@ static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
     return 0;
 }
 
+#if 0
+
 static int qcow_read(BlockDriverState *bs, int64_t sector_num, 
                      uint8_t *buf, int nb_sectors)
 {
@@ -465,14 +458,20 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors)
             n = nb_sectors;
         if (!cluster_offset) {
-            memset(buf, 0, 512 * n);
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
+                memset(buf, 0, 512 * n);
+            }
         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
             if (decompress_cluster(s, cluster_offset) < 0)
                 return -1;
             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
         } else {
-            lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
-            ret = read(s->fd, buf, n * 512);
+            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
             if (ret != n * 512) 
                 return -1;
             if (s->crypt_method) {
@@ -486,6 +485,7 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
     }
     return 0;
 }
+#endif
 
 static int qcow_write(BlockDriverState *bs, int64_t sector_num, 
                      const uint8_t *buf, int nb_sectors)
@@ -504,13 +504,13 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
                                             index_in_cluster + n);
         if (!cluster_offset)
             return -1;
-        lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
         if (s->crypt_method) {
             encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
                             &s->aes_encrypt_key);
-            ret = write(s->fd, s->cluster_data, n * 512);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, 
+                              s->cluster_data, n * 512);
         } else {
-            ret = write(s->fd, buf, n * 512);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
         }
         if (ret != n * 512) 
             return -1;
@@ -522,6 +522,231 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
+typedef struct {
+    int64_t sector_num;
+    uint8_t *buf;
+    int nb_sectors;
+    int n;
+    uint64_t cluster_offset;
+    uint8_t *cluster_data; 
+    BlockDriverAIOCB *hd_aiocb;
+    BlockDriverAIOCB *backing_hd_aiocb;
+} QCowAIOCB;
+
+static void qcow_aio_delete(BlockDriverAIOCB *acb);
+
+static int qcow_aio_new(BlockDriverAIOCB *acb)
+{
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1;
+    acb1 = qemu_mallocz(sizeof(QCowAIOCB));
+    if (!acb1)
+        return -1;
+    acb->opaque = acb1;
+    acb1->hd_aiocb = bdrv_aio_new(s->hd);
+    if (!acb1->hd_aiocb)
+        goto fail;
+    if (bs->backing_hd) {
+        acb1->backing_hd_aiocb = bdrv_aio_new(bs->backing_hd);
+        if (!acb1->backing_hd_aiocb)
+            goto fail;
+    }
+    return 0;
+ fail:
+    qcow_aio_delete(acb);
+    return -1;
+}
+
+static void qcow_aio_read_cb(void *opaque, int ret)
+{
+    BlockDriverAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1 = acb->opaque;
+    int index_in_cluster;
+
+    if (ret < 0) {
+    fail:
+        acb->cb(acb->cb_opaque, ret);
+        return;
+    }
+
+ redo:
+    /* post process the read buffer */
+    if (!acb1->cluster_offset) {
+        /* nothing to do */
+    } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* nothing to do */
+    } else {
+        if (s->crypt_method) {
+            encrypt_sectors(s, acb1->sector_num, acb1->buf, acb1->buf, 
+                            acb1->n, 0, 
+                            &s->aes_decrypt_key);
+        }
+    }
+
+    acb1->nb_sectors -= acb1->n;
+    acb1->sector_num += acb1->n;
+    acb1->buf += acb1->n * 512;
+
+    if (acb1->nb_sectors == 0) {
+        /* request completed */
+        acb->cb(acb->cb_opaque, 0);
+        return;
+    }
+    
+    /* prepare next AIO request */
+    acb1->cluster_offset = get_cluster_offset(bs, 
+                                              acb1->sector_num << 9, 
+                                              0, 0, 0, 0);
+    index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
+    acb1->n = s->cluster_sectors - index_in_cluster;
+    if (acb1->n > acb1->nb_sectors)
+        acb1->n = acb1->nb_sectors;
+
+    if (!acb1->cluster_offset) {
+        if (bs->backing_hd) {
+            /* read from the base image */
+            ret = bdrv_aio_read(acb1->backing_hd_aiocb, acb1->sector_num, 
+                                acb1->buf, acb1->n, qcow_aio_read_cb, acb);
+            if (ret < 0)
+                goto fail;
+        } else {
+            /* Note: in this case, no need to wait */
+            memset(acb1->buf, 0, 512 * acb1->n);
+            goto redo;
+        }
+    } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* add AIO support for compressed blocks ? */
+        if (decompress_cluster(s, acb1->cluster_offset) < 0)
+            goto fail;
+        memcpy(acb1->buf, 
+               s->cluster_cache + index_in_cluster * 512, 512 * acb1->n);
+        goto redo;
+    } else {
+        if ((acb1->cluster_offset & 511) != 0) {
+            ret = -EIO;
+            goto fail;
+        }
+        ret = bdrv_aio_read(acb1->hd_aiocb, 
+                            (acb1->cluster_offset >> 9) + index_in_cluster, 
+                            acb1->buf, acb1->n, qcow_aio_read_cb, acb);
+        if (ret < 0)
+            goto fail;
+    }
+}
+
+static int qcow_aio_read(BlockDriverAIOCB *acb, int64_t sector_num, 
+                         uint8_t *buf, int nb_sectors)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    
+    acb1->sector_num = sector_num;
+    acb1->buf = buf;
+    acb1->nb_sectors = nb_sectors;
+    acb1->n = 0;
+    acb1->cluster_offset = 0;    
+
+    qcow_aio_read_cb(acb, 0);
+}
+
+static void qcow_aio_write_cb(void *opaque, int ret)
+{
+    BlockDriverAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1 = acb->opaque;
+    int index_in_cluster;
+    uint64_t cluster_offset;
+    const uint8_t *src_buf;
+    
+    if (ret < 0) {
+    fail:
+        acb->cb(acb->cb_opaque, ret);
+        return;
+    }
+
+    acb1->nb_sectors -= acb1->n;
+    acb1->sector_num += acb1->n;
+    acb1->buf += acb1->n * 512;
+
+    if (acb1->nb_sectors == 0) {
+        /* request completed */
+        acb->cb(acb->cb_opaque, 0);
+        return;
+    }
+    
+    index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
+    acb1->n = s->cluster_sectors - index_in_cluster;
+    if (acb1->n > acb1->nb_sectors)
+        acb1->n = acb1->nb_sectors;
+    cluster_offset = get_cluster_offset(bs, acb1->sector_num << 9, 1, 0, 
+                                        index_in_cluster, 
+                                        index_in_cluster + acb1->n);
+    if (!cluster_offset || (cluster_offset & 511) != 0) {
+        ret = -EIO;
+        goto fail;
+    }
+    if (s->crypt_method) {
+        if (!acb1->cluster_data) {
+            acb1->cluster_data = qemu_mallocz(s->cluster_size);
+            if (!acb1->cluster_data) {
+                ret = -ENOMEM;
+                goto fail;
+            }
+        }
+        encrypt_sectors(s, acb1->sector_num, acb1->cluster_data, acb1->buf, 
+                        acb1->n, 1, &s->aes_encrypt_key);
+        src_buf = acb1->cluster_data;
+    } else {
+        src_buf = acb1->buf;
+    }
+    ret = bdrv_aio_write(acb1->hd_aiocb, 
+                         (cluster_offset >> 9) + index_in_cluster, 
+                         src_buf, acb1->n, 
+                         qcow_aio_write_cb, acb);
+    if (ret < 0)
+        goto fail;
+}
+
+static int qcow_aio_write(BlockDriverAIOCB *acb, int64_t sector_num, 
+                          const uint8_t *buf, int nb_sectors)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+
+    acb1->sector_num = sector_num;
+    acb1->buf = (uint8_t *)buf;
+    acb1->nb_sectors = nb_sectors;
+    acb1->n = 0;
+    
+    qcow_aio_write_cb(acb, 0);
+}
+
+static void qcow_aio_cancel(BlockDriverAIOCB *acb)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    if (acb1->hd_aiocb)
+        bdrv_aio_cancel(acb1->hd_aiocb);
+    if (acb1->backing_hd_aiocb)
+        bdrv_aio_cancel(acb1->backing_hd_aiocb);
+}
+
+static void qcow_aio_delete(BlockDriverAIOCB *acb)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    if (acb1->hd_aiocb)
+        bdrv_aio_delete(acb1->hd_aiocb);
+    if (acb1->backing_hd_aiocb)
+        bdrv_aio_delete(acb1->backing_hd_aiocb);
+    qemu_free(acb1->cluster_data);
+    qemu_free(acb1);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -529,7 +754,7 @@ static void qcow_close(BlockDriverState *bs)
     qemu_free(s->l2_cache);
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
-    close(s->fd);
+    bdrv_delete(s->hd);
 }
 
 static int qcow_create(const char *filename, int64_t total_size,
@@ -537,12 +762,9 @@ static int qcow_create(const char *filename, int64_t total_size,
 {
     int fd, header_size, backing_filename_len, l1_size, i, shift;
     QCowHeader header;
-    char backing_filename[1024];
     uint64_t tmp;
-    struct stat st;
 
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 
-              0644);
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
     if (fd < 0)
         return -1;
     memset(&header, 0, sizeof(header));
@@ -552,28 +774,11 @@ static int qcow_create(const char *filename, int64_t total_size,
     header_size = sizeof(header);
     backing_filename_len = 0;
     if (backing_file) {
-       if (strcmp(backing_file, "fat:")) {
-           const char *p;
-           /* XXX: this is a hack: we do not attempt to check for URL
-              like syntax */
-           p = strchr(backing_file, ':');
-           if (p && (p - backing_file) >= 2) {
-               /* URL like but exclude "c:" like filenames */
-               pstrcpy(backing_filename, sizeof(backing_filename),
-                       backing_file);
-           } else {
-               realpath(backing_file, backing_filename);
-               if (stat(backing_filename, &st) != 0) {
-                   return -1;
-               }
-           }
-           header.backing_file_offset = cpu_to_be64(header_size);
-           backing_filename_len = strlen(backing_filename);
-           header.backing_file_size = cpu_to_be32(backing_filename_len);
-           header_size += backing_filename_len;
-       } else
-           backing_file = NULL;
-        header.mtime = cpu_to_be32(st.st_mtime);
+        header.backing_file_offset = cpu_to_be64(header_size);
+        backing_filename_len = strlen(backing_file);
+        header.backing_file_size = cpu_to_be32(backing_filename_len);
+        header_size += backing_filename_len;
+        header.mtime = cpu_to_be32(0);
         header.cluster_bits = 9; /* 512 byte cluster to avoid copying
                                     unmodifyed sectors */
         header.l2_bits = 12; /* 32 KB L2 tables */
@@ -595,7 +800,7 @@ static int qcow_create(const char *filename, int64_t total_size,
     /* write all the data */
     write(fd, &header, sizeof(header));
     if (backing_file) {
-        write(fd, backing_filename, backing_filename_len);
+        write(fd, backing_file, backing_filename_len);
     }
     lseek(fd, header_size, SEEK_SET);
     tmp = 0;
@@ -610,12 +815,14 @@ int qcow_make_empty(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+    int ret;
 
     memset(s->l1_table, 0, l1_length);
-    lseek(s->fd, s->l1_table_offset, SEEK_SET);
-    if (write(s->fd, s->l1_table, l1_length) < 0)
+    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
        return -1;
-    ftruncate(s->fd, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+    if (ret < 0)
+        return ret;
 
     memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
     memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
@@ -682,8 +889,7 @@ int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
         cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 
                                             out_len, 0, 0);
         cluster_offset &= s->cluster_offset_mask;
-        lseek(s->fd, cluster_offset, SEEK_SET);
-        if (write(s->fd, out_buf, out_len) != out_len) {
+        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
             qemu_free(out_buf);
             return -1;
         }
@@ -693,18 +899,31 @@ int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
+static void qcow_flush(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    bdrv_flush(s->hd);
+}
+
 BlockDriver bdrv_qcow = {
     "qcow",
     sizeof(BDRVQcowState),
     qcow_probe,
     qcow_open,
-    qcow_read,
-    qcow_write,
+    NULL,
+    NULL,
     qcow_close,
     qcow_create,
+    qcow_flush,
     qcow_is_allocated,
     qcow_set_key,
-    qcow_make_empty
+    qcow_make_empty,
+
+    .bdrv_aio_new = qcow_aio_new,
+    .bdrv_aio_read = qcow_aio_read,
+    .bdrv_aio_write = qcow_aio_write,
+    .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_delete = qcow_aio_delete,
 };