monitor: Rework early disk password inquiry (Jan Kiszka)
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef _BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "console.h"
32 #include "block_int.h"
33
34 #ifdef _BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/disk.h>
39 #endif
40
41 #define SECTOR_BITS 9
42 #define SECTOR_SIZE (1 << SECTOR_BITS)
43
44 typedef struct BlockDriverAIOCBSync {
45     BlockDriverAIOCB common;
46     QEMUBH *bh;
47     int ret;
48 } BlockDriverAIOCBSync;
49
50 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
51         int64_t sector_num, uint8_t *buf, int nb_sectors,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
54         int64_t sector_num, const uint8_t *buf, int nb_sectors,
55         BlockDriverCompletionFunc *cb, void *opaque);
56 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58                         uint8_t *buf, int nb_sectors);
59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60                          const uint8_t *buf, int nb_sectors);
61
62 BlockDriverState *bdrv_first;
63
64 static BlockDriver *first_drv;
65
66 int path_is_absolute(const char *path)
67 {
68     const char *p;
69 #ifdef _WIN32
70     /* specific case for names like: "\\.\d:" */
71     if (*path == '/' || *path == '\\')
72         return 1;
73 #endif
74     p = strchr(path, ':');
75     if (p)
76         p++;
77     else
78         p = path;
79 #ifdef _WIN32
80     return (*p == '/' || *p == '\\');
81 #else
82     return (*p == '/');
83 #endif
84 }
85
86 /* if filename is absolute, just copy it to dest. Otherwise, build a
87    path to it by considering it is relative to base_path. URL are
88    supported. */
89 void path_combine(char *dest, int dest_size,
90                   const char *base_path,
91                   const char *filename)
92 {
93     const char *p, *p1;
94     int len;
95
96     if (dest_size <= 0)
97         return;
98     if (path_is_absolute(filename)) {
99         pstrcpy(dest, dest_size, filename);
100     } else {
101         p = strchr(base_path, ':');
102         if (p)
103             p++;
104         else
105             p = base_path;
106         p1 = strrchr(base_path, '/');
107 #ifdef _WIN32
108         {
109             const char *p2;
110             p2 = strrchr(base_path, '\\');
111             if (!p1 || p2 > p1)
112                 p1 = p2;
113         }
114 #endif
115         if (p1)
116             p1++;
117         else
118             p1 = base_path;
119         if (p1 > p)
120             p = p1;
121         len = p - base_path;
122         if (len > dest_size - 1)
123             len = dest_size - 1;
124         memcpy(dest, base_path, len);
125         dest[len] = '\0';
126         pstrcat(dest, dest_size, filename);
127     }
128 }
129
130
131 static void bdrv_register(BlockDriver *bdrv)
132 {
133     if (!bdrv->bdrv_aio_read) {
134         /* add AIO emulation layer */
135         bdrv->bdrv_aio_read = bdrv_aio_read_em;
136         bdrv->bdrv_aio_write = bdrv_aio_write_em;
137         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
138         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
139     } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) {
140         /* add synchronous IO emulation layer */
141         bdrv->bdrv_read = bdrv_read_em;
142         bdrv->bdrv_write = bdrv_write_em;
143     }
144     bdrv->next = first_drv;
145     first_drv = bdrv;
146 }
147
148 /* create a new block device (by default it is empty) */
149 BlockDriverState *bdrv_new(const char *device_name)
150 {
151     BlockDriverState **pbs, *bs;
152
153     bs = qemu_mallocz(sizeof(BlockDriverState));
154     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
155     if (device_name[0] != '\0') {
156         /* insert at the end */
157         pbs = &bdrv_first;
158         while (*pbs != NULL)
159             pbs = &(*pbs)->next;
160         *pbs = bs;
161     }
162     return bs;
163 }
164
165 BlockDriver *bdrv_find_format(const char *format_name)
166 {
167     BlockDriver *drv1;
168     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
169         if (!strcmp(drv1->format_name, format_name))
170             return drv1;
171     }
172     return NULL;
173 }
174
175 int bdrv_create(BlockDriver *drv,
176                 const char *filename, int64_t size_in_sectors,
177                 const char *backing_file, int flags)
178 {
179     if (!drv->bdrv_create)
180         return -ENOTSUP;
181     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
182 }
183
184 #ifdef _WIN32
185 void get_tmp_filename(char *filename, int size)
186 {
187     char temp_dir[MAX_PATH];
188
189     GetTempPath(MAX_PATH, temp_dir);
190     GetTempFileName(temp_dir, "qem", 0, filename);
191 }
192 #else
193 void get_tmp_filename(char *filename, int size)
194 {
195     int fd;
196     const char *tmpdir;
197     /* XXX: race condition possible */
198     tmpdir = getenv("TMPDIR");
199     if (!tmpdir)
200         tmpdir = "/tmp";
201     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
202     fd = mkstemp(filename);
203     close(fd);
204 }
205 #endif
206
207 #ifdef _WIN32
208 static int is_windows_drive_prefix(const char *filename)
209 {
210     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
211              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
212             filename[1] == ':');
213 }
214
215 static int is_windows_drive(const char *filename)
216 {
217     if (is_windows_drive_prefix(filename) &&
218         filename[2] == '\0')
219         return 1;
220     if (strstart(filename, "\\\\.\\", NULL) ||
221         strstart(filename, "//./", NULL))
222         return 1;
223     return 0;
224 }
225 #endif
226
227 static BlockDriver *find_protocol(const char *filename)
228 {
229     BlockDriver *drv1;
230     char protocol[128];
231     int len;
232     const char *p;
233
234 #ifdef _WIN32
235     if (is_windows_drive(filename) ||
236         is_windows_drive_prefix(filename))
237         return &bdrv_raw;
238 #endif
239     p = strchr(filename, ':');
240     if (!p)
241         return &bdrv_raw;
242     len = p - filename;
243     if (len > sizeof(protocol) - 1)
244         len = sizeof(protocol) - 1;
245     memcpy(protocol, filename, len);
246     protocol[len] = '\0';
247     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
248         if (drv1->protocol_name &&
249             !strcmp(drv1->protocol_name, protocol))
250             return drv1;
251     }
252     return NULL;
253 }
254
255 /* XXX: force raw format if block or character device ? It would
256    simplify the BSD case */
257 static BlockDriver *find_image_format(const char *filename)
258 {
259     int ret, score, score_max;
260     BlockDriver *drv1, *drv;
261     uint8_t buf[2048];
262     BlockDriverState *bs;
263
264     /* detect host devices. By convention, /dev/cdrom[N] is always
265        recognized as a host CDROM */
266     if (strstart(filename, "/dev/cdrom", NULL))
267         return &bdrv_host_device;
268 #ifdef _WIN32
269     if (is_windows_drive(filename))
270         return &bdrv_host_device;
271 #else
272     {
273         struct stat st;
274         if (stat(filename, &st) >= 0 &&
275             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
276             return &bdrv_host_device;
277         }
278     }
279 #endif
280
281     drv = find_protocol(filename);
282     /* no need to test disk image formats for vvfat */
283     if (drv == &bdrv_vvfat)
284         return drv;
285
286     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
287     if (ret < 0)
288         return NULL;
289     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
290     bdrv_delete(bs);
291     if (ret < 0) {
292         return NULL;
293     }
294
295     score_max = 0;
296     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
297         if (drv1->bdrv_probe) {
298             score = drv1->bdrv_probe(buf, ret, filename);
299             if (score > score_max) {
300                 score_max = score;
301                 drv = drv1;
302             }
303         }
304     }
305     return drv;
306 }
307
308 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
309 {
310     BlockDriverState *bs;
311     int ret;
312
313     bs = bdrv_new("");
314     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
315     if (ret < 0) {
316         bdrv_delete(bs);
317         return ret;
318     }
319     bs->growable = 1;
320     *pbs = bs;
321     return 0;
322 }
323
324 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
325 {
326     return bdrv_open2(bs, filename, flags, NULL);
327 }
328
329 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
330                BlockDriver *drv)
331 {
332     int ret, open_flags;
333     char tmp_filename[PATH_MAX];
334     char backing_filename[PATH_MAX];
335
336     bs->read_only = 0;
337     bs->is_temporary = 0;
338     bs->encrypted = 0;
339     bs->valid_key = 0;
340
341     if (flags & BDRV_O_SNAPSHOT) {
342         BlockDriverState *bs1;
343         int64_t total_size;
344         int is_protocol = 0;
345
346         /* if snapshot, we create a temporary backing file and open it
347            instead of opening 'filename' directly */
348
349         /* if there is a backing file, use it */
350         bs1 = bdrv_new("");
351         ret = bdrv_open(bs1, filename, 0);
352         if (ret < 0) {
353             bdrv_delete(bs1);
354             return ret;
355         }
356         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
357
358         if (bs1->drv && bs1->drv->protocol_name)
359             is_protocol = 1;
360
361         bdrv_delete(bs1);
362
363         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
364
365         /* Real path is meaningless for protocols */
366         if (is_protocol)
367             snprintf(backing_filename, sizeof(backing_filename),
368                      "%s", filename);
369         else
370             realpath(filename, backing_filename);
371
372         ret = bdrv_create(&bdrv_qcow2, tmp_filename,
373                           total_size, backing_filename, 0);
374         if (ret < 0) {
375             return ret;
376         }
377         filename = tmp_filename;
378         bs->is_temporary = 1;
379     }
380
381     pstrcpy(bs->filename, sizeof(bs->filename), filename);
382     if (flags & BDRV_O_FILE) {
383         drv = find_protocol(filename);
384     } else if (!drv) {
385         drv = find_image_format(filename);
386     }
387     if (!drv) {
388         ret = -ENOENT;
389         goto unlink_and_fail;
390     }
391     bs->drv = drv;
392     bs->opaque = qemu_mallocz(drv->instance_size);
393     /* Note: for compatibility, we open disk image files as RDWR, and
394        RDONLY as fallback */
395     if (!(flags & BDRV_O_FILE))
396         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
397     else
398         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
399     ret = drv->bdrv_open(bs, filename, open_flags);
400     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
401         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
402         bs->read_only = 1;
403     }
404     if (ret < 0) {
405         qemu_free(bs->opaque);
406         bs->opaque = NULL;
407         bs->drv = NULL;
408     unlink_and_fail:
409         if (bs->is_temporary)
410             unlink(filename);
411         return ret;
412     }
413     if (drv->bdrv_getlength) {
414         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
415     }
416 #ifndef _WIN32
417     if (bs->is_temporary) {
418         unlink(filename);
419     }
420 #endif
421     if (bs->backing_file[0] != '\0') {
422         /* if there is a backing file, use it */
423         bs->backing_hd = bdrv_new("");
424         path_combine(backing_filename, sizeof(backing_filename),
425                      filename, bs->backing_file);
426         ret = bdrv_open(bs->backing_hd, backing_filename, open_flags);
427         if (ret < 0) {
428             bdrv_close(bs);
429             return ret;
430         }
431     }
432
433     /* call the change callback */
434     bs->media_changed = 1;
435     if (bs->change_cb)
436         bs->change_cb(bs->change_opaque);
437
438     return 0;
439 }
440
441 void bdrv_close(BlockDriverState *bs)
442 {
443     if (bs->drv) {
444         if (bs->backing_hd)
445             bdrv_delete(bs->backing_hd);
446         bs->drv->bdrv_close(bs);
447         qemu_free(bs->opaque);
448 #ifdef _WIN32
449         if (bs->is_temporary) {
450             unlink(bs->filename);
451         }
452 #endif
453         bs->opaque = NULL;
454         bs->drv = NULL;
455
456         /* call the change callback */
457         bs->media_changed = 1;
458         if (bs->change_cb)
459             bs->change_cb(bs->change_opaque);
460     }
461 }
462
463 void bdrv_delete(BlockDriverState *bs)
464 {
465     BlockDriverState **pbs;
466
467     pbs = &bdrv_first;
468     while (*pbs != bs && *pbs != NULL)
469         pbs = &(*pbs)->next;
470     if (*pbs == bs)
471         *pbs = bs->next;
472
473     bdrv_close(bs);
474     qemu_free(bs);
475 }
476
477 /* commit COW file into the raw image */
478 int bdrv_commit(BlockDriverState *bs)
479 {
480     BlockDriver *drv = bs->drv;
481     int64_t i, total_sectors;
482     int n, j;
483     unsigned char sector[512];
484
485     if (!drv)
486         return -ENOMEDIUM;
487
488     if (bs->read_only) {
489         return -EACCES;
490     }
491
492     if (!bs->backing_hd) {
493         return -ENOTSUP;
494     }
495
496     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
497     for (i = 0; i < total_sectors;) {
498         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
499             for(j = 0; j < n; j++) {
500                 if (bdrv_read(bs, i, sector, 1) != 0) {
501                     return -EIO;
502                 }
503
504                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
505                     return -EIO;
506                 }
507                 i++;
508             }
509         } else {
510             i += n;
511         }
512     }
513
514     if (drv->bdrv_make_empty)
515         return drv->bdrv_make_empty(bs);
516
517     return 0;
518 }
519
520 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
521                                    size_t size)
522 {
523     int64_t len;
524
525     if (!bdrv_is_inserted(bs))
526         return -ENOMEDIUM;
527
528     if (bs->growable)
529         return 0;
530
531     len = bdrv_getlength(bs);
532
533     if ((offset + size) > len)
534         return -EIO;
535
536     return 0;
537 }
538
539 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
540                               int nb_sectors)
541 {
542     int64_t offset;
543
544     /* Deal with byte accesses */
545     if (sector_num < 0)
546         offset = -sector_num;
547     else
548         offset = sector_num * 512;
549
550     return bdrv_check_byte_request(bs, offset, nb_sectors * 512);
551 }
552
553 /* return < 0 if error. See bdrv_write() for the return codes */
554 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
555               uint8_t *buf, int nb_sectors)
556 {
557     BlockDriver *drv = bs->drv;
558
559     if (!drv)
560         return -ENOMEDIUM;
561     if (bdrv_check_request(bs, sector_num, nb_sectors))
562         return -EIO;
563
564     if (drv->bdrv_pread) {
565         int ret, len;
566         len = nb_sectors * 512;
567         ret = drv->bdrv_pread(bs, sector_num * 512, buf, len);
568         if (ret < 0)
569             return ret;
570         else if (ret != len)
571             return -EINVAL;
572         else {
573             bs->rd_bytes += (unsigned) len;
574             bs->rd_ops ++;
575             return 0;
576         }
577     } else {
578         return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
579     }
580 }
581
582 /* Return < 0 if error. Important errors are:
583   -EIO         generic I/O error (may happen for all errors)
584   -ENOMEDIUM   No media inserted.
585   -EINVAL      Invalid sector number or nb_sectors
586   -EACCES      Trying to write a read-only device
587 */
588 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
589                const uint8_t *buf, int nb_sectors)
590 {
591     BlockDriver *drv = bs->drv;
592     if (!bs->drv)
593         return -ENOMEDIUM;
594     if (bs->read_only)
595         return -EACCES;
596     if (bdrv_check_request(bs, sector_num, nb_sectors))
597         return -EIO;
598
599     if (drv->bdrv_pwrite) {
600         int ret, len, count = 0;
601         len = nb_sectors * 512;
602         do {
603             ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len - count);
604             if (ret < 0) {
605                 printf("bdrv_write ret=%d\n", ret);
606                 return ret;
607             }
608             count += ret;
609             buf += ret;
610         } while (count != len);
611         bs->wr_bytes += (unsigned) len;
612         bs->wr_ops ++;
613         return 0;
614     }
615     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
616 }
617
618 static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
619                          uint8_t *buf, int count1)
620 {
621     uint8_t tmp_buf[SECTOR_SIZE];
622     int len, nb_sectors, count;
623     int64_t sector_num;
624
625     count = count1;
626     /* first read to align to sector start */
627     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
628     if (len > count)
629         len = count;
630     sector_num = offset >> SECTOR_BITS;
631     if (len > 0) {
632         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
633             return -EIO;
634         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
635         count -= len;
636         if (count == 0)
637             return count1;
638         sector_num++;
639         buf += len;
640     }
641
642     /* read the sectors "in place" */
643     nb_sectors = count >> SECTOR_BITS;
644     if (nb_sectors > 0) {
645         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
646             return -EIO;
647         sector_num += nb_sectors;
648         len = nb_sectors << SECTOR_BITS;
649         buf += len;
650         count -= len;
651     }
652
653     /* add data from the last sector */
654     if (count > 0) {
655         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
656             return -EIO;
657         memcpy(buf, tmp_buf, count);
658     }
659     return count1;
660 }
661
662 static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset,
663                           const uint8_t *buf, int count1)
664 {
665     uint8_t tmp_buf[SECTOR_SIZE];
666     int len, nb_sectors, count;
667     int64_t sector_num;
668
669     count = count1;
670     /* first write to align to sector start */
671     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
672     if (len > count)
673         len = count;
674     sector_num = offset >> SECTOR_BITS;
675     if (len > 0) {
676         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
677             return -EIO;
678         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
679         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
680             return -EIO;
681         count -= len;
682         if (count == 0)
683             return count1;
684         sector_num++;
685         buf += len;
686     }
687
688     /* write the sectors "in place" */
689     nb_sectors = count >> SECTOR_BITS;
690     if (nb_sectors > 0) {
691         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
692             return -EIO;
693         sector_num += nb_sectors;
694         len = nb_sectors << SECTOR_BITS;
695         buf += len;
696         count -= len;
697     }
698
699     /* add data from the last sector */
700     if (count > 0) {
701         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
702             return -EIO;
703         memcpy(tmp_buf, buf, count);
704         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
705             return -EIO;
706     }
707     return count1;
708 }
709
710 /**
711  * Read with byte offsets (needed only for file protocols)
712  */
713 int bdrv_pread(BlockDriverState *bs, int64_t offset,
714                void *buf1, int count1)
715 {
716     BlockDriver *drv = bs->drv;
717
718     if (!drv)
719         return -ENOMEDIUM;
720     if (bdrv_check_byte_request(bs, offset, count1))
721         return -EIO;
722
723     if (!drv->bdrv_pread)
724         return bdrv_pread_em(bs, offset, buf1, count1);
725     return drv->bdrv_pread(bs, offset, buf1, count1);
726 }
727
728 /**
729  * Write with byte offsets (needed only for file protocols)
730  */
731 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
732                 const void *buf1, int count1)
733 {
734     BlockDriver *drv = bs->drv;
735
736     if (!drv)
737         return -ENOMEDIUM;
738     if (bdrv_check_byte_request(bs, offset, count1))
739         return -EIO;
740
741     if (!drv->bdrv_pwrite)
742         return bdrv_pwrite_em(bs, offset, buf1, count1);
743     return drv->bdrv_pwrite(bs, offset, buf1, count1);
744 }
745
746 /**
747  * Truncate file to 'offset' bytes (needed only for file protocols)
748  */
749 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
750 {
751     BlockDriver *drv = bs->drv;
752     if (!drv)
753         return -ENOMEDIUM;
754     if (!drv->bdrv_truncate)
755         return -ENOTSUP;
756     return drv->bdrv_truncate(bs, offset);
757 }
758
759 /**
760  * Length of a file in bytes. Return < 0 if error or unknown.
761  */
762 int64_t bdrv_getlength(BlockDriverState *bs)
763 {
764     BlockDriver *drv = bs->drv;
765     if (!drv)
766         return -ENOMEDIUM;
767     if (!drv->bdrv_getlength) {
768         /* legacy mode */
769         return bs->total_sectors * SECTOR_SIZE;
770     }
771     return drv->bdrv_getlength(bs);
772 }
773
774 /* return 0 as number of sectors if no device present or error */
775 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
776 {
777     int64_t length;
778     length = bdrv_getlength(bs);
779     if (length < 0)
780         length = 0;
781     else
782         length = length >> SECTOR_BITS;
783     *nb_sectors_ptr = length;
784 }
785
786 struct partition {
787         uint8_t boot_ind;           /* 0x80 - active */
788         uint8_t head;               /* starting head */
789         uint8_t sector;             /* starting sector */
790         uint8_t cyl;                /* starting cylinder */
791         uint8_t sys_ind;            /* What partition type */
792         uint8_t end_head;           /* end head */
793         uint8_t end_sector;         /* end sector */
794         uint8_t end_cyl;            /* end cylinder */
795         uint32_t start_sect;        /* starting sector counting from 0 */
796         uint32_t nr_sects;          /* nr of sectors in partition */
797 } __attribute__((packed));
798
799 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
800 static int guess_disk_lchs(BlockDriverState *bs,
801                            int *pcylinders, int *pheads, int *psectors)
802 {
803     uint8_t buf[512];
804     int ret, i, heads, sectors, cylinders;
805     struct partition *p;
806     uint32_t nr_sects;
807     uint64_t nb_sectors;
808
809     bdrv_get_geometry(bs, &nb_sectors);
810
811     ret = bdrv_read(bs, 0, buf, 1);
812     if (ret < 0)
813         return -1;
814     /* test msdos magic */
815     if (buf[510] != 0x55 || buf[511] != 0xaa)
816         return -1;
817     for(i = 0; i < 4; i++) {
818         p = ((struct partition *)(buf + 0x1be)) + i;
819         nr_sects = le32_to_cpu(p->nr_sects);
820         if (nr_sects && p->end_head) {
821             /* We make the assumption that the partition terminates on
822                a cylinder boundary */
823             heads = p->end_head + 1;
824             sectors = p->end_sector & 63;
825             if (sectors == 0)
826                 continue;
827             cylinders = nb_sectors / (heads * sectors);
828             if (cylinders < 1 || cylinders > 16383)
829                 continue;
830             *pheads = heads;
831             *psectors = sectors;
832             *pcylinders = cylinders;
833 #if 0
834             printf("guessed geometry: LCHS=%d %d %d\n",
835                    cylinders, heads, sectors);
836 #endif
837             return 0;
838         }
839     }
840     return -1;
841 }
842
843 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
844 {
845     int translation, lba_detected = 0;
846     int cylinders, heads, secs;
847     uint64_t nb_sectors;
848
849     /* if a geometry hint is available, use it */
850     bdrv_get_geometry(bs, &nb_sectors);
851     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
852     translation = bdrv_get_translation_hint(bs);
853     if (cylinders != 0) {
854         *pcyls = cylinders;
855         *pheads = heads;
856         *psecs = secs;
857     } else {
858         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
859             if (heads > 16) {
860                 /* if heads > 16, it means that a BIOS LBA
861                    translation was active, so the default
862                    hardware geometry is OK */
863                 lba_detected = 1;
864                 goto default_geometry;
865             } else {
866                 *pcyls = cylinders;
867                 *pheads = heads;
868                 *psecs = secs;
869                 /* disable any translation to be in sync with
870                    the logical geometry */
871                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
872                     bdrv_set_translation_hint(bs,
873                                               BIOS_ATA_TRANSLATION_NONE);
874                 }
875             }
876         } else {
877         default_geometry:
878             /* if no geometry, use a standard physical disk geometry */
879             cylinders = nb_sectors / (16 * 63);
880
881             if (cylinders > 16383)
882                 cylinders = 16383;
883             else if (cylinders < 2)
884                 cylinders = 2;
885             *pcyls = cylinders;
886             *pheads = 16;
887             *psecs = 63;
888             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
889                 if ((*pcyls * *pheads) <= 131072) {
890                     bdrv_set_translation_hint(bs,
891                                               BIOS_ATA_TRANSLATION_LARGE);
892                 } else {
893                     bdrv_set_translation_hint(bs,
894                                               BIOS_ATA_TRANSLATION_LBA);
895                 }
896             }
897         }
898         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
899     }
900 }
901
902 void bdrv_set_geometry_hint(BlockDriverState *bs,
903                             int cyls, int heads, int secs)
904 {
905     bs->cyls = cyls;
906     bs->heads = heads;
907     bs->secs = secs;
908 }
909
910 void bdrv_set_type_hint(BlockDriverState *bs, int type)
911 {
912     bs->type = type;
913     bs->removable = ((type == BDRV_TYPE_CDROM ||
914                       type == BDRV_TYPE_FLOPPY));
915 }
916
917 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
918 {
919     bs->translation = translation;
920 }
921
922 void bdrv_get_geometry_hint(BlockDriverState *bs,
923                             int *pcyls, int *pheads, int *psecs)
924 {
925     *pcyls = bs->cyls;
926     *pheads = bs->heads;
927     *psecs = bs->secs;
928 }
929
930 int bdrv_get_type_hint(BlockDriverState *bs)
931 {
932     return bs->type;
933 }
934
935 int bdrv_get_translation_hint(BlockDriverState *bs)
936 {
937     return bs->translation;
938 }
939
940 int bdrv_is_removable(BlockDriverState *bs)
941 {
942     return bs->removable;
943 }
944
945 int bdrv_is_read_only(BlockDriverState *bs)
946 {
947     return bs->read_only;
948 }
949
950 int bdrv_is_sg(BlockDriverState *bs)
951 {
952     return bs->sg;
953 }
954
955 /* XXX: no longer used */
956 void bdrv_set_change_cb(BlockDriverState *bs,
957                         void (*change_cb)(void *opaque), void *opaque)
958 {
959     bs->change_cb = change_cb;
960     bs->change_opaque = opaque;
961 }
962
963 int bdrv_is_encrypted(BlockDriverState *bs)
964 {
965     if (bs->backing_hd && bs->backing_hd->encrypted)
966         return 1;
967     return bs->encrypted;
968 }
969
970 int bdrv_key_required(BlockDriverState *bs)
971 {
972     BlockDriverState *backing_hd = bs->backing_hd;
973
974     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
975         return 1;
976     return (bs->encrypted && !bs->valid_key);
977 }
978
979 int bdrv_set_key(BlockDriverState *bs, const char *key)
980 {
981     int ret;
982     if (bs->backing_hd && bs->backing_hd->encrypted) {
983         ret = bdrv_set_key(bs->backing_hd, key);
984         if (ret < 0)
985             return ret;
986         if (!bs->encrypted)
987             return 0;
988     }
989     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
990         return -1;
991     ret = bs->drv->bdrv_set_key(bs, key);
992     bs->valid_key = (ret == 0);
993     return ret;
994 }
995
996 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
997 {
998     if (!bs->drv) {
999         buf[0] = '\0';
1000     } else {
1001         pstrcpy(buf, buf_size, bs->drv->format_name);
1002     }
1003 }
1004
1005 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1006                          void *opaque)
1007 {
1008     BlockDriver *drv;
1009
1010     for (drv = first_drv; drv != NULL; drv = drv->next) {
1011         it(opaque, drv->format_name);
1012     }
1013 }
1014
1015 BlockDriverState *bdrv_find(const char *name)
1016 {
1017     BlockDriverState *bs;
1018
1019     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1020         if (!strcmp(name, bs->device_name))
1021             return bs;
1022     }
1023     return NULL;
1024 }
1025
1026 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1027 {
1028     BlockDriverState *bs;
1029
1030     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1031         it(opaque, bs);
1032     }
1033 }
1034
1035 const char *bdrv_get_device_name(BlockDriverState *bs)
1036 {
1037     return bs->device_name;
1038 }
1039
1040 void bdrv_flush(BlockDriverState *bs)
1041 {
1042     if (bs->drv->bdrv_flush)
1043         bs->drv->bdrv_flush(bs);
1044     if (bs->backing_hd)
1045         bdrv_flush(bs->backing_hd);
1046 }
1047
1048 void bdrv_flush_all(void)
1049 {
1050     BlockDriverState *bs;
1051
1052     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1053         if (bs->drv && !bdrv_is_read_only(bs) && 
1054             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1055             bdrv_flush(bs);
1056 }
1057
1058 /*
1059  * Returns true iff the specified sector is present in the disk image. Drivers
1060  * not implementing the functionality are assumed to not support backing files,
1061  * hence all their sectors are reported as allocated.
1062  *
1063  * 'pnum' is set to the number of sectors (including and immediately following
1064  * the specified sector) that are known to be in the same
1065  * allocated/unallocated state.
1066  *
1067  * 'nb_sectors' is the max value 'pnum' should be set to.
1068  */
1069 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1070         int *pnum)
1071 {
1072     int64_t n;
1073     if (!bs->drv->bdrv_is_allocated) {
1074         if (sector_num >= bs->total_sectors) {
1075             *pnum = 0;
1076             return 0;
1077         }
1078         n = bs->total_sectors - sector_num;
1079         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1080         return 1;
1081     }
1082     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1083 }
1084
1085 void bdrv_info(void)
1086 {
1087     BlockDriverState *bs;
1088
1089     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1090         term_printf("%s:", bs->device_name);
1091         term_printf(" type=");
1092         switch(bs->type) {
1093         case BDRV_TYPE_HD:
1094             term_printf("hd");
1095             break;
1096         case BDRV_TYPE_CDROM:
1097             term_printf("cdrom");
1098             break;
1099         case BDRV_TYPE_FLOPPY:
1100             term_printf("floppy");
1101             break;
1102         }
1103         term_printf(" removable=%d", bs->removable);
1104         if (bs->removable) {
1105             term_printf(" locked=%d", bs->locked);
1106         }
1107         if (bs->drv) {
1108             term_printf(" file=");
1109             term_print_filename(bs->filename);
1110             if (bs->backing_file[0] != '\0') {
1111                 term_printf(" backing_file=");
1112                 term_print_filename(bs->backing_file);
1113             }
1114             term_printf(" ro=%d", bs->read_only);
1115             term_printf(" drv=%s", bs->drv->format_name);
1116             term_printf(" encrypted=%d", bdrv_is_encrypted(bs));
1117         } else {
1118             term_printf(" [not inserted]");
1119         }
1120         term_printf("\n");
1121     }
1122 }
1123
1124 /* The "info blockstats" command. */
1125 void bdrv_info_stats (void)
1126 {
1127     BlockDriverState *bs;
1128     BlockDriverInfo bdi;
1129
1130     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1131         term_printf ("%s:"
1132                      " rd_bytes=%" PRIu64
1133                      " wr_bytes=%" PRIu64
1134                      " rd_operations=%" PRIu64
1135                      " wr_operations=%" PRIu64
1136                      ,
1137                      bs->device_name,
1138                      bs->rd_bytes, bs->wr_bytes,
1139                      bs->rd_ops, bs->wr_ops);
1140         if (bdrv_get_info(bs, &bdi) == 0)
1141             term_printf(" high=%" PRId64
1142                         " bytes_free=%" PRId64,
1143                         bdi.highest_alloc, bdi.num_free_bytes);
1144         term_printf("\n");
1145     }
1146 }
1147
1148 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1149 {
1150     if (bs->backing_hd && bs->backing_hd->encrypted)
1151         return bs->backing_file;
1152     else if (bs->encrypted)
1153         return bs->filename;
1154     else
1155         return NULL;
1156 }
1157
1158 void bdrv_get_backing_filename(BlockDriverState *bs,
1159                                char *filename, int filename_size)
1160 {
1161     if (!bs->backing_hd) {
1162         pstrcpy(filename, filename_size, "");
1163     } else {
1164         pstrcpy(filename, filename_size, bs->backing_file);
1165     }
1166 }
1167
1168 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1169                           const uint8_t *buf, int nb_sectors)
1170 {
1171     BlockDriver *drv = bs->drv;
1172     if (!drv)
1173         return -ENOMEDIUM;
1174     if (!drv->bdrv_write_compressed)
1175         return -ENOTSUP;
1176     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1177 }
1178
1179 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1180 {
1181     BlockDriver *drv = bs->drv;
1182     if (!drv)
1183         return -ENOMEDIUM;
1184     if (!drv->bdrv_get_info)
1185         return -ENOTSUP;
1186     memset(bdi, 0, sizeof(*bdi));
1187     return drv->bdrv_get_info(bs, bdi);
1188 }
1189
1190 /**************************************************************/
1191 /* handling of snapshots */
1192
1193 int bdrv_snapshot_create(BlockDriverState *bs,
1194                          QEMUSnapshotInfo *sn_info)
1195 {
1196     BlockDriver *drv = bs->drv;
1197     if (!drv)
1198         return -ENOMEDIUM;
1199     if (!drv->bdrv_snapshot_create)
1200         return -ENOTSUP;
1201     return drv->bdrv_snapshot_create(bs, sn_info);
1202 }
1203
1204 int bdrv_snapshot_goto(BlockDriverState *bs,
1205                        const char *snapshot_id)
1206 {
1207     BlockDriver *drv = bs->drv;
1208     if (!drv)
1209         return -ENOMEDIUM;
1210     if (!drv->bdrv_snapshot_goto)
1211         return -ENOTSUP;
1212     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1213 }
1214
1215 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1216 {
1217     BlockDriver *drv = bs->drv;
1218     if (!drv)
1219         return -ENOMEDIUM;
1220     if (!drv->bdrv_snapshot_delete)
1221         return -ENOTSUP;
1222     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1223 }
1224
1225 int bdrv_snapshot_list(BlockDriverState *bs,
1226                        QEMUSnapshotInfo **psn_info)
1227 {
1228     BlockDriver *drv = bs->drv;
1229     if (!drv)
1230         return -ENOMEDIUM;
1231     if (!drv->bdrv_snapshot_list)
1232         return -ENOTSUP;
1233     return drv->bdrv_snapshot_list(bs, psn_info);
1234 }
1235
1236 #define NB_SUFFIXES 4
1237
1238 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1239 {
1240     static const char suffixes[NB_SUFFIXES] = "KMGT";
1241     int64_t base;
1242     int i;
1243
1244     if (size <= 999) {
1245         snprintf(buf, buf_size, "%" PRId64, size);
1246     } else {
1247         base = 1024;
1248         for(i = 0; i < NB_SUFFIXES; i++) {
1249             if (size < (10 * base)) {
1250                 snprintf(buf, buf_size, "%0.1f%c",
1251                          (double)size / base,
1252                          suffixes[i]);
1253                 break;
1254             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1255                 snprintf(buf, buf_size, "%" PRId64 "%c",
1256                          ((size + (base >> 1)) / base),
1257                          suffixes[i]);
1258                 break;
1259             }
1260             base = base * 1024;
1261         }
1262     }
1263     return buf;
1264 }
1265
1266 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1267 {
1268     char buf1[128], date_buf[128], clock_buf[128];
1269 #ifdef _WIN32
1270     struct tm *ptm;
1271 #else
1272     struct tm tm;
1273 #endif
1274     time_t ti;
1275     int64_t secs;
1276
1277     if (!sn) {
1278         snprintf(buf, buf_size,
1279                  "%-10s%-20s%7s%20s%15s",
1280                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1281     } else {
1282         ti = sn->date_sec;
1283 #ifdef _WIN32
1284         ptm = localtime(&ti);
1285         strftime(date_buf, sizeof(date_buf),
1286                  "%Y-%m-%d %H:%M:%S", ptm);
1287 #else
1288         localtime_r(&ti, &tm);
1289         strftime(date_buf, sizeof(date_buf),
1290                  "%Y-%m-%d %H:%M:%S", &tm);
1291 #endif
1292         secs = sn->vm_clock_nsec / 1000000000;
1293         snprintf(clock_buf, sizeof(clock_buf),
1294                  "%02d:%02d:%02d.%03d",
1295                  (int)(secs / 3600),
1296                  (int)((secs / 60) % 60),
1297                  (int)(secs % 60),
1298                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1299         snprintf(buf, buf_size,
1300                  "%-10s%-20s%7s%20s%15s",
1301                  sn->id_str, sn->name,
1302                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1303                  date_buf,
1304                  clock_buf);
1305     }
1306     return buf;
1307 }
1308
1309
1310 /**************************************************************/
1311 /* async I/Os */
1312
1313 typedef struct VectorTranslationState {
1314     QEMUIOVector *iov;
1315     uint8_t *bounce;
1316     int is_write;
1317     BlockDriverAIOCB *aiocb;
1318     BlockDriverAIOCB *this_aiocb;
1319 } VectorTranslationState;
1320
1321 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1322 {
1323     VectorTranslationState *s = opaque;
1324
1325     if (!s->is_write) {
1326         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1327     }
1328     qemu_vfree(s->bounce);
1329     s->this_aiocb->cb(s->this_aiocb->opaque, ret);
1330     qemu_aio_release(s->this_aiocb);
1331 }
1332
1333 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1334                                             int64_t sector_num,
1335                                             QEMUIOVector *iov,
1336                                             int nb_sectors,
1337                                             BlockDriverCompletionFunc *cb,
1338                                             void *opaque,
1339                                             int is_write)
1340
1341 {
1342     VectorTranslationState *s = qemu_mallocz(sizeof(*s));
1343     BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque);
1344
1345     s->this_aiocb = aiocb;
1346     s->iov = iov;
1347     s->bounce = qemu_memalign(512, nb_sectors * 512);
1348     s->is_write = is_write;
1349     if (is_write) {
1350         qemu_iovec_to_buffer(s->iov, s->bounce);
1351         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1352                                   bdrv_aio_rw_vector_cb, s);
1353     } else {
1354         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1355                                  bdrv_aio_rw_vector_cb, s);
1356     }
1357     return aiocb;
1358 }
1359
1360 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1361                                  QEMUIOVector *iov, int nb_sectors,
1362                                  BlockDriverCompletionFunc *cb, void *opaque)
1363 {
1364     if (bdrv_check_request(bs, sector_num, nb_sectors))
1365         return NULL;
1366
1367     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1368                               cb, opaque, 0);
1369 }
1370
1371 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1372                                   QEMUIOVector *iov, int nb_sectors,
1373                                   BlockDriverCompletionFunc *cb, void *opaque)
1374 {
1375     if (bdrv_check_request(bs, sector_num, nb_sectors))
1376         return NULL;
1377
1378     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1379                               cb, opaque, 1);
1380 }
1381
1382 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1383                                 uint8_t *buf, int nb_sectors,
1384                                 BlockDriverCompletionFunc *cb, void *opaque)
1385 {
1386     BlockDriver *drv = bs->drv;
1387     BlockDriverAIOCB *ret;
1388
1389     if (!drv)
1390         return NULL;
1391     if (bdrv_check_request(bs, sector_num, nb_sectors))
1392         return NULL;
1393
1394     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1395
1396     if (ret) {
1397         /* Update stats even though technically transfer has not happened. */
1398         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1399         bs->rd_ops ++;
1400     }
1401
1402     return ret;
1403 }
1404
1405 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1406                                  const uint8_t *buf, int nb_sectors,
1407                                  BlockDriverCompletionFunc *cb, void *opaque)
1408 {
1409     BlockDriver *drv = bs->drv;
1410     BlockDriverAIOCB *ret;
1411
1412     if (!drv)
1413         return NULL;
1414     if (bs->read_only)
1415         return NULL;
1416     if (bdrv_check_request(bs, sector_num, nb_sectors))
1417         return NULL;
1418
1419     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1420
1421     if (ret) {
1422         /* Update stats even though technically transfer has not happened. */
1423         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1424         bs->wr_ops ++;
1425     }
1426
1427     return ret;
1428 }
1429
1430 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1431 {
1432     BlockDriver *drv = acb->bs->drv;
1433
1434     if (acb->cb == bdrv_aio_rw_vector_cb) {
1435         VectorTranslationState *s = acb->opaque;
1436         acb = s->aiocb;
1437     }
1438
1439     drv->bdrv_aio_cancel(acb);
1440 }
1441
1442
1443 /**************************************************************/
1444 /* async block device emulation */
1445
1446 static void bdrv_aio_bh_cb(void *opaque)
1447 {
1448     BlockDriverAIOCBSync *acb = opaque;
1449     acb->common.cb(acb->common.opaque, acb->ret);
1450     qemu_aio_release(acb);
1451 }
1452
1453 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1454         int64_t sector_num, uint8_t *buf, int nb_sectors,
1455         BlockDriverCompletionFunc *cb, void *opaque)
1456 {
1457     BlockDriverAIOCBSync *acb;
1458     int ret;
1459
1460     acb = qemu_aio_get(bs, cb, opaque);
1461     if (!acb->bh)
1462         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1463     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1464     acb->ret = ret;
1465     qemu_bh_schedule(acb->bh);
1466     return &acb->common;
1467 }
1468
1469 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1470         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1471         BlockDriverCompletionFunc *cb, void *opaque)
1472 {
1473     BlockDriverAIOCBSync *acb;
1474     int ret;
1475
1476     acb = qemu_aio_get(bs, cb, opaque);
1477     if (!acb->bh)
1478         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1479     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1480     acb->ret = ret;
1481     qemu_bh_schedule(acb->bh);
1482     return &acb->common;
1483 }
1484
1485 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1486 {
1487     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1488     qemu_bh_cancel(acb->bh);
1489     qemu_aio_release(acb);
1490 }
1491
1492 /**************************************************************/
1493 /* sync block device emulation */
1494
1495 static void bdrv_rw_em_cb(void *opaque, int ret)
1496 {
1497     *(int *)opaque = ret;
1498 }
1499
1500 #define NOT_DONE 0x7fffffff
1501
1502 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1503                         uint8_t *buf, int nb_sectors)
1504 {
1505     int async_ret;
1506     BlockDriverAIOCB *acb;
1507
1508     async_ret = NOT_DONE;
1509     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1510                         bdrv_rw_em_cb, &async_ret);
1511     if (acb == NULL)
1512         return -1;
1513
1514     while (async_ret == NOT_DONE) {
1515         qemu_aio_wait();
1516     }
1517
1518     return async_ret;
1519 }
1520
1521 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1522                          const uint8_t *buf, int nb_sectors)
1523 {
1524     int async_ret;
1525     BlockDriverAIOCB *acb;
1526
1527     async_ret = NOT_DONE;
1528     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1529                          bdrv_rw_em_cb, &async_ret);
1530     if (acb == NULL)
1531         return -1;
1532     while (async_ret == NOT_DONE) {
1533         qemu_aio_wait();
1534     }
1535     return async_ret;
1536 }
1537
1538 void bdrv_init(void)
1539 {
1540     bdrv_register(&bdrv_raw);
1541     bdrv_register(&bdrv_host_device);
1542 #ifndef _WIN32
1543     bdrv_register(&bdrv_cow);
1544 #endif
1545     bdrv_register(&bdrv_qcow);
1546     bdrv_register(&bdrv_vmdk);
1547     bdrv_register(&bdrv_cloop);
1548     bdrv_register(&bdrv_dmg);
1549     bdrv_register(&bdrv_bochs);
1550     bdrv_register(&bdrv_vpc);
1551     bdrv_register(&bdrv_vvfat);
1552     bdrv_register(&bdrv_qcow2);
1553     bdrv_register(&bdrv_parallels);
1554     bdrv_register(&bdrv_nbd);
1555 }
1556
1557 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1558                    void *opaque)
1559 {
1560     BlockDriver *drv;
1561     BlockDriverAIOCB *acb;
1562
1563     drv = bs->drv;
1564     if (drv->free_aiocb) {
1565         acb = drv->free_aiocb;
1566         drv->free_aiocb = acb->next;
1567     } else {
1568         acb = qemu_mallocz(drv->aiocb_size);
1569     }
1570     acb->bs = bs;
1571     acb->cb = cb;
1572     acb->opaque = opaque;
1573     return acb;
1574 }
1575
1576 void qemu_aio_release(void *p)
1577 {
1578     BlockDriverAIOCB *acb = p;
1579     BlockDriver *drv = acb->bs->drv;
1580     acb->next = drv->free_aiocb;
1581     drv->free_aiocb = acb;
1582 }
1583
1584 /**************************************************************/
1585 /* removable device support */
1586
1587 /**
1588  * Return TRUE if the media is present
1589  */
1590 int bdrv_is_inserted(BlockDriverState *bs)
1591 {
1592     BlockDriver *drv = bs->drv;
1593     int ret;
1594     if (!drv)
1595         return 0;
1596     if (!drv->bdrv_is_inserted)
1597         return 1;
1598     ret = drv->bdrv_is_inserted(bs);
1599     return ret;
1600 }
1601
1602 /**
1603  * Return TRUE if the media changed since the last call to this
1604  * function. It is currently only used for floppy disks
1605  */
1606 int bdrv_media_changed(BlockDriverState *bs)
1607 {
1608     BlockDriver *drv = bs->drv;
1609     int ret;
1610
1611     if (!drv || !drv->bdrv_media_changed)
1612         ret = -ENOTSUP;
1613     else
1614         ret = drv->bdrv_media_changed(bs);
1615     if (ret == -ENOTSUP)
1616         ret = bs->media_changed;
1617     bs->media_changed = 0;
1618     return ret;
1619 }
1620
1621 /**
1622  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1623  */
1624 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1625 {
1626     BlockDriver *drv = bs->drv;
1627     int ret;
1628
1629     if (!drv || !drv->bdrv_eject) {
1630         ret = -ENOTSUP;
1631     } else {
1632         ret = drv->bdrv_eject(bs, eject_flag);
1633     }
1634     if (ret == -ENOTSUP) {
1635         if (eject_flag)
1636             bdrv_close(bs);
1637     }
1638 }
1639
1640 int bdrv_is_locked(BlockDriverState *bs)
1641 {
1642     return bs->locked;
1643 }
1644
1645 /**
1646  * Lock or unlock the media (if it is locked, the user won't be able
1647  * to eject it manually).
1648  */
1649 void bdrv_set_locked(BlockDriverState *bs, int locked)
1650 {
1651     BlockDriver *drv = bs->drv;
1652
1653     bs->locked = locked;
1654     if (drv && drv->bdrv_set_locked) {
1655         drv->bdrv_set_locked(bs, locked);
1656     }
1657 }
1658
1659 /* needed for generic scsi interface */
1660
1661 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1662 {
1663     BlockDriver *drv = bs->drv;
1664
1665     if (drv && drv->bdrv_ioctl)
1666         return drv->bdrv_ioctl(bs, req, buf);
1667     return -ENOTSUP;
1668 }