cbc83b166e49f621da9eea94ec36a4868b9f2c68
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33 #include "module.h"
34
35 #ifdef HOST_BSD
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/ioctl.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
43
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
47
48 #define SECTOR_BITS 9
49 #define SECTOR_SIZE (1 << SECTOR_BITS)
50
51 typedef struct BlockDriverAIOCBSync {
52     BlockDriverAIOCB common;
53     QEMUBH *bh;
54     int ret;
55     /* vector translation state */
56     QEMUIOVector *qiov;
57     uint8_t *bounce;
58     int is_write;
59 } BlockDriverAIOCBSync;
60
61 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66         BlockDriverCompletionFunc *cb, void *opaque);
67 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
68 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
69                         uint8_t *buf, int nb_sectors);
70 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
71                          const uint8_t *buf, int nb_sectors);
72
73 BlockDriverState *bdrv_first;
74
75 static BlockDriver *first_drv;
76
77 int path_is_absolute(const char *path)
78 {
79     const char *p;
80 #ifdef _WIN32
81     /* specific case for names like: "\\.\d:" */
82     if (*path == '/' || *path == '\\')
83         return 1;
84 #endif
85     p = strchr(path, ':');
86     if (p)
87         p++;
88     else
89         p = path;
90 #ifdef _WIN32
91     return (*p == '/' || *p == '\\');
92 #else
93     return (*p == '/');
94 #endif
95 }
96
97 /* if filename is absolute, just copy it to dest. Otherwise, build a
98    path to it by considering it is relative to base_path. URL are
99    supported. */
100 void path_combine(char *dest, int dest_size,
101                   const char *base_path,
102                   const char *filename)
103 {
104     const char *p, *p1;
105     int len;
106
107     if (dest_size <= 0)
108         return;
109     if (path_is_absolute(filename)) {
110         pstrcpy(dest, dest_size, filename);
111     } else {
112         p = strchr(base_path, ':');
113         if (p)
114             p++;
115         else
116             p = base_path;
117         p1 = strrchr(base_path, '/');
118 #ifdef _WIN32
119         {
120             const char *p2;
121             p2 = strrchr(base_path, '\\');
122             if (!p1 || p2 > p1)
123                 p1 = p2;
124         }
125 #endif
126         if (p1)
127             p1++;
128         else
129             p1 = base_path;
130         if (p1 > p)
131             p = p1;
132         len = p - base_path;
133         if (len > dest_size - 1)
134             len = dest_size - 1;
135         memcpy(dest, base_path, len);
136         dest[len] = '\0';
137         pstrcat(dest, dest_size, filename);
138     }
139 }
140
141
142 void bdrv_register(BlockDriver *bdrv)
143 {
144     if (!bdrv->bdrv_aio_readv) {
145         /* add AIO emulation layer */
146         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
147         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
148         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
149         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
150     } else if (!bdrv->bdrv_read) {
151         /* add synchronous IO emulation layer */
152         bdrv->bdrv_read = bdrv_read_em;
153         bdrv->bdrv_write = bdrv_write_em;
154     }
155     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
156     bdrv->next = first_drv;
157     first_drv = bdrv;
158 }
159
160 /* create a new block device (by default it is empty) */
161 BlockDriverState *bdrv_new(const char *device_name)
162 {
163     BlockDriverState **pbs, *bs;
164
165     bs = qemu_mallocz(sizeof(BlockDriverState));
166     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
167     if (device_name[0] != '\0') {
168         /* insert at the end */
169         pbs = &bdrv_first;
170         while (*pbs != NULL)
171             pbs = &(*pbs)->next;
172         *pbs = bs;
173     }
174     return bs;
175 }
176
177 BlockDriver *bdrv_find_format(const char *format_name)
178 {
179     BlockDriver *drv1;
180     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
181         if (!strcmp(drv1->format_name, format_name))
182             return drv1;
183     }
184     return NULL;
185 }
186
187 int bdrv_create2(BlockDriver *drv,
188                 const char *filename, int64_t size_in_sectors,
189                 const char *backing_file, const char *backing_format,
190                 int flags)
191 {
192     if (drv->bdrv_create2)
193         return drv->bdrv_create2(filename, size_in_sectors, backing_file,
194                                  backing_format, flags);
195     if (drv->bdrv_create)
196         return drv->bdrv_create(filename, size_in_sectors, backing_file,
197                                 flags);
198     return -ENOTSUP;
199 }
200
201 int bdrv_create(BlockDriver *drv,
202                 const char *filename, int64_t size_in_sectors,
203                 const char *backing_file, int flags)
204 {
205     if (!drv->bdrv_create)
206         return -ENOTSUP;
207     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
208 }
209
210 #ifdef _WIN32
211 void get_tmp_filename(char *filename, int size)
212 {
213     char temp_dir[MAX_PATH];
214
215     GetTempPath(MAX_PATH, temp_dir);
216     GetTempFileName(temp_dir, "qem", 0, filename);
217 }
218 #else
219 void get_tmp_filename(char *filename, int size)
220 {
221     int fd;
222     const char *tmpdir;
223     /* XXX: race condition possible */
224     tmpdir = getenv("TMPDIR");
225     if (!tmpdir)
226         tmpdir = "/tmp";
227     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
228     fd = mkstemp(filename);
229     close(fd);
230 }
231 #endif
232
233 #ifdef _WIN32
234 static int is_windows_drive_prefix(const char *filename)
235 {
236     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
237              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
238             filename[1] == ':');
239 }
240
241 static int is_windows_drive(const char *filename)
242 {
243     if (is_windows_drive_prefix(filename) &&
244         filename[2] == '\0')
245         return 1;
246     if (strstart(filename, "\\\\.\\", NULL) ||
247         strstart(filename, "//./", NULL))
248         return 1;
249     return 0;
250 }
251 #endif
252
253 static BlockDriver *find_protocol(const char *filename)
254 {
255     BlockDriver *drv1;
256     char protocol[128];
257     int len;
258     const char *p;
259
260 #ifdef _WIN32
261     if (is_windows_drive(filename) ||
262         is_windows_drive_prefix(filename))
263         return bdrv_find_format("raw");
264 #endif
265     p = strchr(filename, ':');
266     if (!p)
267         return bdrv_find_format("raw");
268     len = p - filename;
269     if (len > sizeof(protocol) - 1)
270         len = sizeof(protocol) - 1;
271     memcpy(protocol, filename, len);
272     protocol[len] = '\0';
273     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
274         if (drv1->protocol_name &&
275             !strcmp(drv1->protocol_name, protocol))
276             return drv1;
277     }
278     return NULL;
279 }
280
281 /* XXX: force raw format if block or character device ? It would
282    simplify the BSD case */
283 static BlockDriver *find_image_format(const char *filename)
284 {
285     int ret, score, score_max;
286     BlockDriver *drv1, *drv;
287     uint8_t buf[2048];
288     BlockDriverState *bs;
289
290     /* detect host devices. By convention, /dev/cdrom[N] is always
291        recognized as a host CDROM */
292     if (strstart(filename, "/dev/cdrom", NULL))
293         return bdrv_find_format("host_device");
294 #ifdef _WIN32
295     if (is_windows_drive(filename))
296         return bdrv_find_format("host_device");
297 #else
298     {
299         struct stat st;
300         if (stat(filename, &st) >= 0 &&
301             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
302             return bdrv_find_format("host_device");
303         }
304     }
305 #endif
306
307     drv = find_protocol(filename);
308     /* no need to test disk image formats for vvfat */
309     if (drv && strcmp(drv->format_name, "vvfat") == 0)
310         return drv;
311
312     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
313     if (ret < 0)
314         return NULL;
315     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
316     bdrv_delete(bs);
317     if (ret < 0) {
318         return NULL;
319     }
320
321     score_max = 0;
322     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
323         if (drv1->bdrv_probe) {
324             score = drv1->bdrv_probe(buf, ret, filename);
325             if (score > score_max) {
326                 score_max = score;
327                 drv = drv1;
328             }
329         }
330     }
331     return drv;
332 }
333
334 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
335 {
336     BlockDriverState *bs;
337     int ret;
338
339     bs = bdrv_new("");
340     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
341     if (ret < 0) {
342         bdrv_delete(bs);
343         return ret;
344     }
345     bs->growable = 1;
346     *pbs = bs;
347     return 0;
348 }
349
350 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
351 {
352     return bdrv_open2(bs, filename, flags, NULL);
353 }
354
355 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
356                BlockDriver *drv)
357 {
358     int ret, open_flags;
359     char tmp_filename[PATH_MAX];
360     char backing_filename[PATH_MAX];
361
362     bs->read_only = 0;
363     bs->is_temporary = 0;
364     bs->encrypted = 0;
365     bs->valid_key = 0;
366     /* buffer_alignment defaulted to 512, drivers can change this value */
367     bs->buffer_alignment = 512;
368
369     if (flags & BDRV_O_SNAPSHOT) {
370         BlockDriverState *bs1;
371         int64_t total_size;
372         int is_protocol = 0;
373
374         /* if snapshot, we create a temporary backing file and open it
375            instead of opening 'filename' directly */
376
377         /* if there is a backing file, use it */
378         bs1 = bdrv_new("");
379         ret = bdrv_open2(bs1, filename, 0, drv);
380         if (ret < 0) {
381             bdrv_delete(bs1);
382             return ret;
383         }
384         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
385
386         if (bs1->drv && bs1->drv->protocol_name)
387             is_protocol = 1;
388
389         bdrv_delete(bs1);
390
391         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
392
393         /* Real path is meaningless for protocols */
394         if (is_protocol)
395             snprintf(backing_filename, sizeof(backing_filename),
396                      "%s", filename);
397         else
398             realpath(filename, backing_filename);
399
400         ret = bdrv_create2(bdrv_find_format("qcow2"), tmp_filename,
401                            total_size, backing_filename, 
402                            (drv ? drv->format_name : NULL), 0);
403         if (ret < 0) {
404             return ret;
405         }
406         filename = tmp_filename;
407         drv = bdrv_find_format("qcow2");
408         bs->is_temporary = 1;
409     }
410
411     pstrcpy(bs->filename, sizeof(bs->filename), filename);
412     if (flags & BDRV_O_FILE) {
413         drv = find_protocol(filename);
414     } else if (!drv) {
415         drv = find_image_format(filename);
416     }
417     if (!drv) {
418         ret = -ENOENT;
419         goto unlink_and_fail;
420     }
421     bs->drv = drv;
422     bs->opaque = qemu_mallocz(drv->instance_size);
423     /* Note: for compatibility, we open disk image files as RDWR, and
424        RDONLY as fallback */
425     if (!(flags & BDRV_O_FILE))
426         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
427     else
428         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
429     ret = drv->bdrv_open(bs, filename, open_flags);
430     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
431         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
432         bs->read_only = 1;
433     }
434     if (ret < 0) {
435         qemu_free(bs->opaque);
436         bs->opaque = NULL;
437         bs->drv = NULL;
438     unlink_and_fail:
439         if (bs->is_temporary)
440             unlink(filename);
441         return ret;
442     }
443     if (drv->bdrv_getlength) {
444         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
445     }
446 #ifndef _WIN32
447     if (bs->is_temporary) {
448         unlink(filename);
449     }
450 #endif
451     if (bs->backing_file[0] != '\0') {
452         /* if there is a backing file, use it */
453         BlockDriver *back_drv = NULL;
454         bs->backing_hd = bdrv_new("");
455         path_combine(backing_filename, sizeof(backing_filename),
456                      filename, bs->backing_file);
457         if (bs->backing_format[0] != '\0')
458             back_drv = bdrv_find_format(bs->backing_format);
459         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
460                          back_drv);
461         if (ret < 0) {
462             bdrv_close(bs);
463             return ret;
464         }
465     }
466
467     if (!bdrv_key_required(bs)) {
468         /* call the change callback */
469         bs->media_changed = 1;
470         if (bs->change_cb)
471             bs->change_cb(bs->change_opaque);
472     }
473     return 0;
474 }
475
476 void bdrv_close(BlockDriverState *bs)
477 {
478     if (bs->drv) {
479         if (bs->backing_hd)
480             bdrv_delete(bs->backing_hd);
481         bs->drv->bdrv_close(bs);
482         qemu_free(bs->opaque);
483 #ifdef _WIN32
484         if (bs->is_temporary) {
485             unlink(bs->filename);
486         }
487 #endif
488         bs->opaque = NULL;
489         bs->drv = NULL;
490
491         /* call the change callback */
492         bs->media_changed = 1;
493         if (bs->change_cb)
494             bs->change_cb(bs->change_opaque);
495     }
496 }
497
498 void bdrv_delete(BlockDriverState *bs)
499 {
500     BlockDriverState **pbs;
501
502     pbs = &bdrv_first;
503     while (*pbs != bs && *pbs != NULL)
504         pbs = &(*pbs)->next;
505     if (*pbs == bs)
506         *pbs = bs->next;
507
508     bdrv_close(bs);
509     qemu_free(bs);
510 }
511
512 /*
513  * Run consistency checks on an image
514  *
515  * Returns the number of errors or -errno when an internal error occurs
516  */
517 int bdrv_check(BlockDriverState *bs)
518 {
519     if (bs->drv->bdrv_check == NULL) {
520         return -ENOTSUP;
521     }
522
523     return bs->drv->bdrv_check(bs);
524 }
525
526 /* commit COW file into the raw image */
527 int bdrv_commit(BlockDriverState *bs)
528 {
529     BlockDriver *drv = bs->drv;
530     int64_t i, total_sectors;
531     int n, j;
532     unsigned char sector[512];
533
534     if (!drv)
535         return -ENOMEDIUM;
536
537     if (bs->read_only) {
538         return -EACCES;
539     }
540
541     if (!bs->backing_hd) {
542         return -ENOTSUP;
543     }
544
545     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
546     for (i = 0; i < total_sectors;) {
547         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
548             for(j = 0; j < n; j++) {
549                 if (bdrv_read(bs, i, sector, 1) != 0) {
550                     return -EIO;
551                 }
552
553                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
554                     return -EIO;
555                 }
556                 i++;
557             }
558         } else {
559             i += n;
560         }
561     }
562
563     if (drv->bdrv_make_empty)
564         return drv->bdrv_make_empty(bs);
565
566     return 0;
567 }
568
569 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
570                                    size_t size)
571 {
572     int64_t len;
573
574     if (!bdrv_is_inserted(bs))
575         return -ENOMEDIUM;
576
577     if (bs->growable)
578         return 0;
579
580     len = bdrv_getlength(bs);
581
582     if (offset < 0)
583         return -EIO;
584
585     if ((offset > len) || (len - offset < size))
586         return -EIO;
587
588     return 0;
589 }
590
591 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
592                               int nb_sectors)
593 {
594     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
595 }
596
597 /* return < 0 if error. See bdrv_write() for the return codes */
598 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
599               uint8_t *buf, int nb_sectors)
600 {
601     BlockDriver *drv = bs->drv;
602
603     if (!drv)
604         return -ENOMEDIUM;
605     if (bdrv_check_request(bs, sector_num, nb_sectors))
606         return -EIO;
607
608     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
609 }
610
611 /* Return < 0 if error. Important errors are:
612   -EIO         generic I/O error (may happen for all errors)
613   -ENOMEDIUM   No media inserted.
614   -EINVAL      Invalid sector number or nb_sectors
615   -EACCES      Trying to write a read-only device
616 */
617 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
618                const uint8_t *buf, int nb_sectors)
619 {
620     BlockDriver *drv = bs->drv;
621     if (!bs->drv)
622         return -ENOMEDIUM;
623     if (bs->read_only)
624         return -EACCES;
625     if (bdrv_check_request(bs, sector_num, nb_sectors))
626         return -EIO;
627
628     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
629 }
630
631 int bdrv_pread(BlockDriverState *bs, int64_t offset,
632                void *buf, int count1)
633 {
634     uint8_t tmp_buf[SECTOR_SIZE];
635     int len, nb_sectors, count;
636     int64_t sector_num;
637
638     count = count1;
639     /* first read to align to sector start */
640     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
641     if (len > count)
642         len = count;
643     sector_num = offset >> SECTOR_BITS;
644     if (len > 0) {
645         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
646             return -EIO;
647         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
648         count -= len;
649         if (count == 0)
650             return count1;
651         sector_num++;
652         buf += len;
653     }
654
655     /* read the sectors "in place" */
656     nb_sectors = count >> SECTOR_BITS;
657     if (nb_sectors > 0) {
658         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
659             return -EIO;
660         sector_num += nb_sectors;
661         len = nb_sectors << SECTOR_BITS;
662         buf += len;
663         count -= len;
664     }
665
666     /* add data from the last sector */
667     if (count > 0) {
668         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
669             return -EIO;
670         memcpy(buf, tmp_buf, count);
671     }
672     return count1;
673 }
674
675 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
676                 const void *buf, int count1)
677 {
678     uint8_t tmp_buf[SECTOR_SIZE];
679     int len, nb_sectors, count;
680     int64_t sector_num;
681
682     count = count1;
683     /* first write to align to sector start */
684     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
685     if (len > count)
686         len = count;
687     sector_num = offset >> SECTOR_BITS;
688     if (len > 0) {
689         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
690             return -EIO;
691         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
692         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
693             return -EIO;
694         count -= len;
695         if (count == 0)
696             return count1;
697         sector_num++;
698         buf += len;
699     }
700
701     /* write the sectors "in place" */
702     nb_sectors = count >> SECTOR_BITS;
703     if (nb_sectors > 0) {
704         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
705             return -EIO;
706         sector_num += nb_sectors;
707         len = nb_sectors << SECTOR_BITS;
708         buf += len;
709         count -= len;
710     }
711
712     /* add data from the last sector */
713     if (count > 0) {
714         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
715             return -EIO;
716         memcpy(tmp_buf, buf, count);
717         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
718             return -EIO;
719     }
720     return count1;
721 }
722
723 /**
724  * Truncate file to 'offset' bytes (needed only for file protocols)
725  */
726 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
727 {
728     BlockDriver *drv = bs->drv;
729     if (!drv)
730         return -ENOMEDIUM;
731     if (!drv->bdrv_truncate)
732         return -ENOTSUP;
733     return drv->bdrv_truncate(bs, offset);
734 }
735
736 /**
737  * Length of a file in bytes. Return < 0 if error or unknown.
738  */
739 int64_t bdrv_getlength(BlockDriverState *bs)
740 {
741     BlockDriver *drv = bs->drv;
742     if (!drv)
743         return -ENOMEDIUM;
744     if (!drv->bdrv_getlength) {
745         /* legacy mode */
746         return bs->total_sectors * SECTOR_SIZE;
747     }
748     return drv->bdrv_getlength(bs);
749 }
750
751 /* return 0 as number of sectors if no device present or error */
752 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
753 {
754     int64_t length;
755     length = bdrv_getlength(bs);
756     if (length < 0)
757         length = 0;
758     else
759         length = length >> SECTOR_BITS;
760     *nb_sectors_ptr = length;
761 }
762
763 struct partition {
764         uint8_t boot_ind;           /* 0x80 - active */
765         uint8_t head;               /* starting head */
766         uint8_t sector;             /* starting sector */
767         uint8_t cyl;                /* starting cylinder */
768         uint8_t sys_ind;            /* What partition type */
769         uint8_t end_head;           /* end head */
770         uint8_t end_sector;         /* end sector */
771         uint8_t end_cyl;            /* end cylinder */
772         uint32_t start_sect;        /* starting sector counting from 0 */
773         uint32_t nr_sects;          /* nr of sectors in partition */
774 } __attribute__((packed));
775
776 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
777 static int guess_disk_lchs(BlockDriverState *bs,
778                            int *pcylinders, int *pheads, int *psectors)
779 {
780     uint8_t buf[512];
781     int ret, i, heads, sectors, cylinders;
782     struct partition *p;
783     uint32_t nr_sects;
784     uint64_t nb_sectors;
785
786     bdrv_get_geometry(bs, &nb_sectors);
787
788     ret = bdrv_read(bs, 0, buf, 1);
789     if (ret < 0)
790         return -1;
791     /* test msdos magic */
792     if (buf[510] != 0x55 || buf[511] != 0xaa)
793         return -1;
794     for(i = 0; i < 4; i++) {
795         p = ((struct partition *)(buf + 0x1be)) + i;
796         nr_sects = le32_to_cpu(p->nr_sects);
797         if (nr_sects && p->end_head) {
798             /* We make the assumption that the partition terminates on
799                a cylinder boundary */
800             heads = p->end_head + 1;
801             sectors = p->end_sector & 63;
802             if (sectors == 0)
803                 continue;
804             cylinders = nb_sectors / (heads * sectors);
805             if (cylinders < 1 || cylinders > 16383)
806                 continue;
807             *pheads = heads;
808             *psectors = sectors;
809             *pcylinders = cylinders;
810 #if 0
811             printf("guessed geometry: LCHS=%d %d %d\n",
812                    cylinders, heads, sectors);
813 #endif
814             return 0;
815         }
816     }
817     return -1;
818 }
819
820 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
821 {
822     int translation, lba_detected = 0;
823     int cylinders, heads, secs;
824     uint64_t nb_sectors;
825
826     /* if a geometry hint is available, use it */
827     bdrv_get_geometry(bs, &nb_sectors);
828     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
829     translation = bdrv_get_translation_hint(bs);
830     if (cylinders != 0) {
831         *pcyls = cylinders;
832         *pheads = heads;
833         *psecs = secs;
834     } else {
835         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
836             if (heads > 16) {
837                 /* if heads > 16, it means that a BIOS LBA
838                    translation was active, so the default
839                    hardware geometry is OK */
840                 lba_detected = 1;
841                 goto default_geometry;
842             } else {
843                 *pcyls = cylinders;
844                 *pheads = heads;
845                 *psecs = secs;
846                 /* disable any translation to be in sync with
847                    the logical geometry */
848                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
849                     bdrv_set_translation_hint(bs,
850                                               BIOS_ATA_TRANSLATION_NONE);
851                 }
852             }
853         } else {
854         default_geometry:
855             /* if no geometry, use a standard physical disk geometry */
856             cylinders = nb_sectors / (16 * 63);
857
858             if (cylinders > 16383)
859                 cylinders = 16383;
860             else if (cylinders < 2)
861                 cylinders = 2;
862             *pcyls = cylinders;
863             *pheads = 16;
864             *psecs = 63;
865             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
866                 if ((*pcyls * *pheads) <= 131072) {
867                     bdrv_set_translation_hint(bs,
868                                               BIOS_ATA_TRANSLATION_LARGE);
869                 } else {
870                     bdrv_set_translation_hint(bs,
871                                               BIOS_ATA_TRANSLATION_LBA);
872                 }
873             }
874         }
875         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
876     }
877 }
878
879 void bdrv_set_geometry_hint(BlockDriverState *bs,
880                             int cyls, int heads, int secs)
881 {
882     bs->cyls = cyls;
883     bs->heads = heads;
884     bs->secs = secs;
885 }
886
887 void bdrv_set_type_hint(BlockDriverState *bs, int type)
888 {
889     bs->type = type;
890     bs->removable = ((type == BDRV_TYPE_CDROM ||
891                       type == BDRV_TYPE_FLOPPY));
892 }
893
894 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
895 {
896     bs->translation = translation;
897 }
898
899 void bdrv_get_geometry_hint(BlockDriverState *bs,
900                             int *pcyls, int *pheads, int *psecs)
901 {
902     *pcyls = bs->cyls;
903     *pheads = bs->heads;
904     *psecs = bs->secs;
905 }
906
907 int bdrv_get_type_hint(BlockDriverState *bs)
908 {
909     return bs->type;
910 }
911
912 int bdrv_get_translation_hint(BlockDriverState *bs)
913 {
914     return bs->translation;
915 }
916
917 int bdrv_is_removable(BlockDriverState *bs)
918 {
919     return bs->removable;
920 }
921
922 int bdrv_is_read_only(BlockDriverState *bs)
923 {
924     return bs->read_only;
925 }
926
927 int bdrv_is_sg(BlockDriverState *bs)
928 {
929     return bs->sg;
930 }
931
932 /* XXX: no longer used */
933 void bdrv_set_change_cb(BlockDriverState *bs,
934                         void (*change_cb)(void *opaque), void *opaque)
935 {
936     bs->change_cb = change_cb;
937     bs->change_opaque = opaque;
938 }
939
940 int bdrv_is_encrypted(BlockDriverState *bs)
941 {
942     if (bs->backing_hd && bs->backing_hd->encrypted)
943         return 1;
944     return bs->encrypted;
945 }
946
947 int bdrv_key_required(BlockDriverState *bs)
948 {
949     BlockDriverState *backing_hd = bs->backing_hd;
950
951     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
952         return 1;
953     return (bs->encrypted && !bs->valid_key);
954 }
955
956 int bdrv_set_key(BlockDriverState *bs, const char *key)
957 {
958     int ret;
959     if (bs->backing_hd && bs->backing_hd->encrypted) {
960         ret = bdrv_set_key(bs->backing_hd, key);
961         if (ret < 0)
962             return ret;
963         if (!bs->encrypted)
964             return 0;
965     }
966     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
967         return -1;
968     ret = bs->drv->bdrv_set_key(bs, key);
969     if (ret < 0) {
970         bs->valid_key = 0;
971     } else if (!bs->valid_key) {
972         bs->valid_key = 1;
973         /* call the change callback now, we skipped it on open */
974         bs->media_changed = 1;
975         if (bs->change_cb)
976             bs->change_cb(bs->change_opaque);
977     }
978     return ret;
979 }
980
981 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
982 {
983     if (!bs->drv) {
984         buf[0] = '\0';
985     } else {
986         pstrcpy(buf, buf_size, bs->drv->format_name);
987     }
988 }
989
990 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
991                          void *opaque)
992 {
993     BlockDriver *drv;
994
995     for (drv = first_drv; drv != NULL; drv = drv->next) {
996         it(opaque, drv->format_name);
997     }
998 }
999
1000 BlockDriverState *bdrv_find(const char *name)
1001 {
1002     BlockDriverState *bs;
1003
1004     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1005         if (!strcmp(name, bs->device_name))
1006             return bs;
1007     }
1008     return NULL;
1009 }
1010
1011 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1012 {
1013     BlockDriverState *bs;
1014
1015     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1016         it(opaque, bs);
1017     }
1018 }
1019
1020 const char *bdrv_get_device_name(BlockDriverState *bs)
1021 {
1022     return bs->device_name;
1023 }
1024
1025 void bdrv_flush(BlockDriverState *bs)
1026 {
1027     if (!bs->drv)
1028         return;
1029     if (bs->drv->bdrv_flush)
1030         bs->drv->bdrv_flush(bs);
1031     if (bs->backing_hd)
1032         bdrv_flush(bs->backing_hd);
1033 }
1034
1035 void bdrv_flush_all(void)
1036 {
1037     BlockDriverState *bs;
1038
1039     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1040         if (bs->drv && !bdrv_is_read_only(bs) && 
1041             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1042             bdrv_flush(bs);
1043 }
1044
1045 /*
1046  * Returns true iff the specified sector is present in the disk image. Drivers
1047  * not implementing the functionality are assumed to not support backing files,
1048  * hence all their sectors are reported as allocated.
1049  *
1050  * 'pnum' is set to the number of sectors (including and immediately following
1051  * the specified sector) that are known to be in the same
1052  * allocated/unallocated state.
1053  *
1054  * 'nb_sectors' is the max value 'pnum' should be set to.
1055  */
1056 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1057         int *pnum)
1058 {
1059     int64_t n;
1060     if (!bs->drv->bdrv_is_allocated) {
1061         if (sector_num >= bs->total_sectors) {
1062             *pnum = 0;
1063             return 0;
1064         }
1065         n = bs->total_sectors - sector_num;
1066         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1067         return 1;
1068     }
1069     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1070 }
1071
1072 void bdrv_info(Monitor *mon)
1073 {
1074     BlockDriverState *bs;
1075
1076     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1077         monitor_printf(mon, "%s:", bs->device_name);
1078         monitor_printf(mon, " type=");
1079         switch(bs->type) {
1080         case BDRV_TYPE_HD:
1081             monitor_printf(mon, "hd");
1082             break;
1083         case BDRV_TYPE_CDROM:
1084             monitor_printf(mon, "cdrom");
1085             break;
1086         case BDRV_TYPE_FLOPPY:
1087             monitor_printf(mon, "floppy");
1088             break;
1089         }
1090         monitor_printf(mon, " removable=%d", bs->removable);
1091         if (bs->removable) {
1092             monitor_printf(mon, " locked=%d", bs->locked);
1093         }
1094         if (bs->drv) {
1095             monitor_printf(mon, " file=");
1096             monitor_print_filename(mon, bs->filename);
1097             if (bs->backing_file[0] != '\0') {
1098                 monitor_printf(mon, " backing_file=");
1099                 monitor_print_filename(mon, bs->backing_file);
1100             }
1101             monitor_printf(mon, " ro=%d", bs->read_only);
1102             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1103             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1104         } else {
1105             monitor_printf(mon, " [not inserted]");
1106         }
1107         monitor_printf(mon, "\n");
1108     }
1109 }
1110
1111 /* The "info blockstats" command. */
1112 void bdrv_info_stats(Monitor *mon)
1113 {
1114     BlockDriverState *bs;
1115
1116     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1117         monitor_printf(mon, "%s:"
1118                        " rd_bytes=%" PRIu64
1119                        " wr_bytes=%" PRIu64
1120                        " rd_operations=%" PRIu64
1121                        " wr_operations=%" PRIu64
1122                        "\n",
1123                        bs->device_name,
1124                        bs->rd_bytes, bs->wr_bytes,
1125                        bs->rd_ops, bs->wr_ops);
1126     }
1127 }
1128
1129 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1130 {
1131     if (bs->backing_hd && bs->backing_hd->encrypted)
1132         return bs->backing_file;
1133     else if (bs->encrypted)
1134         return bs->filename;
1135     else
1136         return NULL;
1137 }
1138
1139 void bdrv_get_backing_filename(BlockDriverState *bs,
1140                                char *filename, int filename_size)
1141 {
1142     if (!bs->backing_hd) {
1143         pstrcpy(filename, filename_size, "");
1144     } else {
1145         pstrcpy(filename, filename_size, bs->backing_file);
1146     }
1147 }
1148
1149 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1150                           const uint8_t *buf, int nb_sectors)
1151 {
1152     BlockDriver *drv = bs->drv;
1153     if (!drv)
1154         return -ENOMEDIUM;
1155     if (!drv->bdrv_write_compressed)
1156         return -ENOTSUP;
1157     if (bdrv_check_request(bs, sector_num, nb_sectors))
1158         return -EIO;
1159     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1160 }
1161
1162 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1163 {
1164     BlockDriver *drv = bs->drv;
1165     if (!drv)
1166         return -ENOMEDIUM;
1167     if (!drv->bdrv_get_info)
1168         return -ENOTSUP;
1169     memset(bdi, 0, sizeof(*bdi));
1170     return drv->bdrv_get_info(bs, bdi);
1171 }
1172
1173 int bdrv_put_buffer(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size)
1174 {
1175     BlockDriver *drv = bs->drv;
1176     if (!drv)
1177         return -ENOMEDIUM;
1178     if (!drv->bdrv_put_buffer)
1179         return -ENOTSUP;
1180     return drv->bdrv_put_buffer(bs, buf, pos, size);
1181 }
1182
1183 int bdrv_get_buffer(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size)
1184 {
1185     BlockDriver *drv = bs->drv;
1186     if (!drv)
1187         return -ENOMEDIUM;
1188     if (!drv->bdrv_get_buffer)
1189         return -ENOTSUP;
1190     return drv->bdrv_get_buffer(bs, buf, pos, size);
1191 }
1192
1193 /**************************************************************/
1194 /* handling of snapshots */
1195
1196 int bdrv_snapshot_create(BlockDriverState *bs,
1197                          QEMUSnapshotInfo *sn_info)
1198 {
1199     BlockDriver *drv = bs->drv;
1200     if (!drv)
1201         return -ENOMEDIUM;
1202     if (!drv->bdrv_snapshot_create)
1203         return -ENOTSUP;
1204     return drv->bdrv_snapshot_create(bs, sn_info);
1205 }
1206
1207 int bdrv_snapshot_goto(BlockDriverState *bs,
1208                        const char *snapshot_id)
1209 {
1210     BlockDriver *drv = bs->drv;
1211     if (!drv)
1212         return -ENOMEDIUM;
1213     if (!drv->bdrv_snapshot_goto)
1214         return -ENOTSUP;
1215     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1216 }
1217
1218 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1219 {
1220     BlockDriver *drv = bs->drv;
1221     if (!drv)
1222         return -ENOMEDIUM;
1223     if (!drv->bdrv_snapshot_delete)
1224         return -ENOTSUP;
1225     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1226 }
1227
1228 int bdrv_snapshot_list(BlockDriverState *bs,
1229                        QEMUSnapshotInfo **psn_info)
1230 {
1231     BlockDriver *drv = bs->drv;
1232     if (!drv)
1233         return -ENOMEDIUM;
1234     if (!drv->bdrv_snapshot_list)
1235         return -ENOTSUP;
1236     return drv->bdrv_snapshot_list(bs, psn_info);
1237 }
1238
1239 #define NB_SUFFIXES 4
1240
1241 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1242 {
1243     static const char suffixes[NB_SUFFIXES] = "KMGT";
1244     int64_t base;
1245     int i;
1246
1247     if (size <= 999) {
1248         snprintf(buf, buf_size, "%" PRId64, size);
1249     } else {
1250         base = 1024;
1251         for(i = 0; i < NB_SUFFIXES; i++) {
1252             if (size < (10 * base)) {
1253                 snprintf(buf, buf_size, "%0.1f%c",
1254                          (double)size / base,
1255                          suffixes[i]);
1256                 break;
1257             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1258                 snprintf(buf, buf_size, "%" PRId64 "%c",
1259                          ((size + (base >> 1)) / base),
1260                          suffixes[i]);
1261                 break;
1262             }
1263             base = base * 1024;
1264         }
1265     }
1266     return buf;
1267 }
1268
1269 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1270 {
1271     char buf1[128], date_buf[128], clock_buf[128];
1272 #ifdef _WIN32
1273     struct tm *ptm;
1274 #else
1275     struct tm tm;
1276 #endif
1277     time_t ti;
1278     int64_t secs;
1279
1280     if (!sn) {
1281         snprintf(buf, buf_size,
1282                  "%-10s%-20s%7s%20s%15s",
1283                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1284     } else {
1285         ti = sn->date_sec;
1286 #ifdef _WIN32
1287         ptm = localtime(&ti);
1288         strftime(date_buf, sizeof(date_buf),
1289                  "%Y-%m-%d %H:%M:%S", ptm);
1290 #else
1291         localtime_r(&ti, &tm);
1292         strftime(date_buf, sizeof(date_buf),
1293                  "%Y-%m-%d %H:%M:%S", &tm);
1294 #endif
1295         secs = sn->vm_clock_nsec / 1000000000;
1296         snprintf(clock_buf, sizeof(clock_buf),
1297                  "%02d:%02d:%02d.%03d",
1298                  (int)(secs / 3600),
1299                  (int)((secs / 60) % 60),
1300                  (int)(secs % 60),
1301                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1302         snprintf(buf, buf_size,
1303                  "%-10s%-20s%7s%20s%15s",
1304                  sn->id_str, sn->name,
1305                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1306                  date_buf,
1307                  clock_buf);
1308     }
1309     return buf;
1310 }
1311
1312
1313 /**************************************************************/
1314 /* async I/Os */
1315
1316 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1317                                  QEMUIOVector *qiov, int nb_sectors,
1318                                  BlockDriverCompletionFunc *cb, void *opaque)
1319 {
1320     BlockDriver *drv = bs->drv;
1321     BlockDriverAIOCB *ret;
1322
1323     if (!drv)
1324         return NULL;
1325     if (bdrv_check_request(bs, sector_num, nb_sectors))
1326         return NULL;
1327
1328     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1329                               cb, opaque);
1330
1331     if (ret) {
1332         /* Update stats even though technically transfer has not happened. */
1333         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1334         bs->rd_ops ++;
1335     }
1336
1337     return ret;
1338 }
1339
1340 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1341                                   QEMUIOVector *qiov, int nb_sectors,
1342                                   BlockDriverCompletionFunc *cb, void *opaque)
1343 {
1344     BlockDriver *drv = bs->drv;
1345     BlockDriverAIOCB *ret;
1346
1347     if (!drv)
1348         return NULL;
1349     if (bs->read_only)
1350         return NULL;
1351     if (bdrv_check_request(bs, sector_num, nb_sectors))
1352         return NULL;
1353
1354     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1355                                cb, opaque);
1356
1357     if (ret) {
1358         /* Update stats even though technically transfer has not happened. */
1359         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1360         bs->wr_ops ++;
1361     }
1362
1363     return ret;
1364 }
1365
1366 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1367 {
1368     acb->pool->cancel(acb);
1369 }
1370
1371
1372 /**************************************************************/
1373 /* async block device emulation */
1374
1375 static void bdrv_aio_bh_cb(void *opaque)
1376 {
1377     BlockDriverAIOCBSync *acb = opaque;
1378
1379     if (!acb->is_write)
1380         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1381     qemu_vfree(acb->bounce);
1382     acb->common.cb(acb->common.opaque, acb->ret);
1383
1384     qemu_aio_release(acb);
1385 }
1386
1387 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1388                                             int64_t sector_num,
1389                                             QEMUIOVector *qiov,
1390                                             int nb_sectors,
1391                                             BlockDriverCompletionFunc *cb,
1392                                             void *opaque,
1393                                             int is_write)
1394
1395 {
1396     BlockDriverAIOCBSync *acb;
1397
1398     acb = qemu_aio_get(bs, cb, opaque);
1399     acb->is_write = is_write;
1400     acb->qiov = qiov;
1401     acb->bounce = qemu_blockalign(bs, qiov->size);
1402
1403     if (!acb->bh)
1404         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1405
1406     if (is_write) {
1407         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1408         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1409     } else {
1410         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1411     }
1412
1413     qemu_bh_schedule(acb->bh);
1414
1415     return &acb->common;
1416 }
1417
1418 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1419         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1420         BlockDriverCompletionFunc *cb, void *opaque)
1421 {
1422     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1423 }
1424
1425 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1426         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1427         BlockDriverCompletionFunc *cb, void *opaque)
1428 {
1429     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1430 }
1431
1432
1433 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1434 {
1435     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1436     qemu_bh_cancel(acb->bh);
1437     qemu_aio_release(acb);
1438 }
1439
1440 /**************************************************************/
1441 /* sync block device emulation */
1442
1443 static void bdrv_rw_em_cb(void *opaque, int ret)
1444 {
1445     *(int *)opaque = ret;
1446 }
1447
1448 #define NOT_DONE 0x7fffffff
1449
1450 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1451                         uint8_t *buf, int nb_sectors)
1452 {
1453     int async_ret;
1454     BlockDriverAIOCB *acb;
1455     struct iovec iov;
1456     QEMUIOVector qiov;
1457
1458     async_ret = NOT_DONE;
1459     iov.iov_base = (void *)buf;
1460     iov.iov_len = nb_sectors * 512;
1461     qemu_iovec_init_external(&qiov, &iov, 1);
1462     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1463         bdrv_rw_em_cb, &async_ret);
1464     if (acb == NULL)
1465         return -1;
1466
1467     while (async_ret == NOT_DONE) {
1468         qemu_aio_wait();
1469     }
1470
1471     return async_ret;
1472 }
1473
1474 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1475                          const uint8_t *buf, int nb_sectors)
1476 {
1477     int async_ret;
1478     BlockDriverAIOCB *acb;
1479     struct iovec iov;
1480     QEMUIOVector qiov;
1481
1482     async_ret = NOT_DONE;
1483     iov.iov_base = (void *)buf;
1484     iov.iov_len = nb_sectors * 512;
1485     qemu_iovec_init_external(&qiov, &iov, 1);
1486     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1487         bdrv_rw_em_cb, &async_ret);
1488     if (acb == NULL)
1489         return -1;
1490     while (async_ret == NOT_DONE) {
1491         qemu_aio_wait();
1492     }
1493     return async_ret;
1494 }
1495
1496 void bdrv_init(void)
1497 {
1498     module_call_init(MODULE_INIT_BLOCK);
1499 }
1500
1501 void aio_pool_init(AIOPool *pool, int aiocb_size,
1502                    void (*cancel)(BlockDriverAIOCB *acb))
1503 {
1504     pool->aiocb_size = aiocb_size;
1505     pool->cancel = cancel;
1506     pool->free_aiocb = NULL;
1507 }
1508
1509 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1510                         BlockDriverCompletionFunc *cb, void *opaque)
1511 {
1512     BlockDriverAIOCB *acb;
1513
1514     if (pool->free_aiocb) {
1515         acb = pool->free_aiocb;
1516         pool->free_aiocb = acb->next;
1517     } else {
1518         acb = qemu_mallocz(pool->aiocb_size);
1519         acb->pool = pool;
1520     }
1521     acb->bs = bs;
1522     acb->cb = cb;
1523     acb->opaque = opaque;
1524     return acb;
1525 }
1526
1527 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1528                    void *opaque)
1529 {
1530     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1531 }
1532
1533 void qemu_aio_release(void *p)
1534 {
1535     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1536     AIOPool *pool = acb->pool;
1537     acb->next = pool->free_aiocb;
1538     pool->free_aiocb = acb;
1539 }
1540
1541 /**************************************************************/
1542 /* removable device support */
1543
1544 /**
1545  * Return TRUE if the media is present
1546  */
1547 int bdrv_is_inserted(BlockDriverState *bs)
1548 {
1549     BlockDriver *drv = bs->drv;
1550     int ret;
1551     if (!drv)
1552         return 0;
1553     if (!drv->bdrv_is_inserted)
1554         return 1;
1555     ret = drv->bdrv_is_inserted(bs);
1556     return ret;
1557 }
1558
1559 /**
1560  * Return TRUE if the media changed since the last call to this
1561  * function. It is currently only used for floppy disks
1562  */
1563 int bdrv_media_changed(BlockDriverState *bs)
1564 {
1565     BlockDriver *drv = bs->drv;
1566     int ret;
1567
1568     if (!drv || !drv->bdrv_media_changed)
1569         ret = -ENOTSUP;
1570     else
1571         ret = drv->bdrv_media_changed(bs);
1572     if (ret == -ENOTSUP)
1573         ret = bs->media_changed;
1574     bs->media_changed = 0;
1575     return ret;
1576 }
1577
1578 /**
1579  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1580  */
1581 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1582 {
1583     BlockDriver *drv = bs->drv;
1584     int ret;
1585
1586     if (!drv || !drv->bdrv_eject) {
1587         ret = -ENOTSUP;
1588     } else {
1589         ret = drv->bdrv_eject(bs, eject_flag);
1590     }
1591     if (ret == -ENOTSUP) {
1592         if (eject_flag)
1593             bdrv_close(bs);
1594     }
1595 }
1596
1597 int bdrv_is_locked(BlockDriverState *bs)
1598 {
1599     return bs->locked;
1600 }
1601
1602 /**
1603  * Lock or unlock the media (if it is locked, the user won't be able
1604  * to eject it manually).
1605  */
1606 void bdrv_set_locked(BlockDriverState *bs, int locked)
1607 {
1608     BlockDriver *drv = bs->drv;
1609
1610     bs->locked = locked;
1611     if (drv && drv->bdrv_set_locked) {
1612         drv->bdrv_set_locked(bs, locked);
1613     }
1614 }
1615
1616 /* needed for generic scsi interface */
1617
1618 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1619 {
1620     BlockDriver *drv = bs->drv;
1621
1622     if (drv && drv->bdrv_ioctl)
1623         return drv->bdrv_ioctl(bs, req, buf);
1624     return -ENOTSUP;
1625 }
1626
1627 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1628         unsigned long int req, void *buf,
1629         BlockDriverCompletionFunc *cb, void *opaque)
1630 {
1631     BlockDriver *drv = bs->drv;
1632
1633     if (drv && drv->bdrv_aio_ioctl)
1634         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1635     return NULL;
1636 }
1637
1638 void *qemu_blockalign(BlockDriverState *bs, size_t size)
1639 {
1640     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
1641 }