Refactor aio callback allocation to use an aiocb pool (Avi Kivity)
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 typedef struct BlockDriverAIOCBSync {
51     BlockDriverAIOCB common;
52     QEMUBH *bh;
53     int ret;
54 } BlockDriverAIOCBSync;
55
56 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
57         int64_t sector_num, uint8_t *buf, int nb_sectors,
58         BlockDriverCompletionFunc *cb, void *opaque);
59 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
60         int64_t sector_num, const uint8_t *buf, int nb_sectors,
61         BlockDriverCompletionFunc *cb, void *opaque);
62 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
63 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
64                         uint8_t *buf, int nb_sectors);
65 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
66                          const uint8_t *buf, int nb_sectors);
67
68 BlockDriverState *bdrv_first;
69
70 static BlockDriver *first_drv;
71
72 int path_is_absolute(const char *path)
73 {
74     const char *p;
75 #ifdef _WIN32
76     /* specific case for names like: "\\.\d:" */
77     if (*path == '/' || *path == '\\')
78         return 1;
79 #endif
80     p = strchr(path, ':');
81     if (p)
82         p++;
83     else
84         p = path;
85 #ifdef _WIN32
86     return (*p == '/' || *p == '\\');
87 #else
88     return (*p == '/');
89 #endif
90 }
91
92 /* if filename is absolute, just copy it to dest. Otherwise, build a
93    path to it by considering it is relative to base_path. URL are
94    supported. */
95 void path_combine(char *dest, int dest_size,
96                   const char *base_path,
97                   const char *filename)
98 {
99     const char *p, *p1;
100     int len;
101
102     if (dest_size <= 0)
103         return;
104     if (path_is_absolute(filename)) {
105         pstrcpy(dest, dest_size, filename);
106     } else {
107         p = strchr(base_path, ':');
108         if (p)
109             p++;
110         else
111             p = base_path;
112         p1 = strrchr(base_path, '/');
113 #ifdef _WIN32
114         {
115             const char *p2;
116             p2 = strrchr(base_path, '\\');
117             if (!p1 || p2 > p1)
118                 p1 = p2;
119         }
120 #endif
121         if (p1)
122             p1++;
123         else
124             p1 = base_path;
125         if (p1 > p)
126             p = p1;
127         len = p - base_path;
128         if (len > dest_size - 1)
129             len = dest_size - 1;
130         memcpy(dest, base_path, len);
131         dest[len] = '\0';
132         pstrcat(dest, dest_size, filename);
133     }
134 }
135
136
137 static void bdrv_register(BlockDriver *bdrv)
138 {
139     if (!bdrv->bdrv_aio_read) {
140         /* add AIO emulation layer */
141         bdrv->bdrv_aio_read = bdrv_aio_read_em;
142         bdrv->bdrv_aio_write = bdrv_aio_write_em;
143         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
144         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
145     } else if (!bdrv->bdrv_read) {
146         /* add synchronous IO emulation layer */
147         bdrv->bdrv_read = bdrv_read_em;
148         bdrv->bdrv_write = bdrv_write_em;
149     }
150     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
151     bdrv->next = first_drv;
152     first_drv = bdrv;
153 }
154
155 /* create a new block device (by default it is empty) */
156 BlockDriverState *bdrv_new(const char *device_name)
157 {
158     BlockDriverState **pbs, *bs;
159
160     bs = qemu_mallocz(sizeof(BlockDriverState));
161     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
162     if (device_name[0] != '\0') {
163         /* insert at the end */
164         pbs = &bdrv_first;
165         while (*pbs != NULL)
166             pbs = &(*pbs)->next;
167         *pbs = bs;
168     }
169     return bs;
170 }
171
172 BlockDriver *bdrv_find_format(const char *format_name)
173 {
174     BlockDriver *drv1;
175     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
176         if (!strcmp(drv1->format_name, format_name))
177             return drv1;
178     }
179     return NULL;
180 }
181
182 int bdrv_create(BlockDriver *drv,
183                 const char *filename, int64_t size_in_sectors,
184                 const char *backing_file, int flags)
185 {
186     if (!drv->bdrv_create)
187         return -ENOTSUP;
188     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
189 }
190
191 #ifdef _WIN32
192 void get_tmp_filename(char *filename, int size)
193 {
194     char temp_dir[MAX_PATH];
195
196     GetTempPath(MAX_PATH, temp_dir);
197     GetTempFileName(temp_dir, "qem", 0, filename);
198 }
199 #else
200 void get_tmp_filename(char *filename, int size)
201 {
202     int fd;
203     const char *tmpdir;
204     /* XXX: race condition possible */
205     tmpdir = getenv("TMPDIR");
206     if (!tmpdir)
207         tmpdir = "/tmp";
208     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
209     fd = mkstemp(filename);
210     close(fd);
211 }
212 #endif
213
214 #ifdef _WIN32
215 static int is_windows_drive_prefix(const char *filename)
216 {
217     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
218              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
219             filename[1] == ':');
220 }
221
222 static int is_windows_drive(const char *filename)
223 {
224     if (is_windows_drive_prefix(filename) &&
225         filename[2] == '\0')
226         return 1;
227     if (strstart(filename, "\\\\.\\", NULL) ||
228         strstart(filename, "//./", NULL))
229         return 1;
230     return 0;
231 }
232 #endif
233
234 static BlockDriver *find_protocol(const char *filename)
235 {
236     BlockDriver *drv1;
237     char protocol[128];
238     int len;
239     const char *p;
240
241 #ifdef _WIN32
242     if (is_windows_drive(filename) ||
243         is_windows_drive_prefix(filename))
244         return &bdrv_raw;
245 #endif
246     p = strchr(filename, ':');
247     if (!p)
248         return &bdrv_raw;
249     len = p - filename;
250     if (len > sizeof(protocol) - 1)
251         len = sizeof(protocol) - 1;
252     memcpy(protocol, filename, len);
253     protocol[len] = '\0';
254     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
255         if (drv1->protocol_name &&
256             !strcmp(drv1->protocol_name, protocol))
257             return drv1;
258     }
259     return NULL;
260 }
261
262 /* XXX: force raw format if block or character device ? It would
263    simplify the BSD case */
264 static BlockDriver *find_image_format(const char *filename)
265 {
266     int ret, score, score_max;
267     BlockDriver *drv1, *drv;
268     uint8_t buf[2048];
269     BlockDriverState *bs;
270
271     /* detect host devices. By convention, /dev/cdrom[N] is always
272        recognized as a host CDROM */
273     if (strstart(filename, "/dev/cdrom", NULL))
274         return &bdrv_host_device;
275 #ifdef _WIN32
276     if (is_windows_drive(filename))
277         return &bdrv_host_device;
278 #else
279     {
280         struct stat st;
281         if (stat(filename, &st) >= 0 &&
282             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
283             return &bdrv_host_device;
284         }
285     }
286 #endif
287
288     drv = find_protocol(filename);
289     /* no need to test disk image formats for vvfat */
290     if (drv == &bdrv_vvfat)
291         return drv;
292
293     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
294     if (ret < 0)
295         return NULL;
296     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
297     bdrv_delete(bs);
298     if (ret < 0) {
299         return NULL;
300     }
301
302     score_max = 0;
303     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
304         if (drv1->bdrv_probe) {
305             score = drv1->bdrv_probe(buf, ret, filename);
306             if (score > score_max) {
307                 score_max = score;
308                 drv = drv1;
309             }
310         }
311     }
312     return drv;
313 }
314
315 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
316 {
317     BlockDriverState *bs;
318     int ret;
319
320     bs = bdrv_new("");
321     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
322     if (ret < 0) {
323         bdrv_delete(bs);
324         return ret;
325     }
326     bs->growable = 1;
327     *pbs = bs;
328     return 0;
329 }
330
331 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
332 {
333     return bdrv_open2(bs, filename, flags, NULL);
334 }
335
336 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
337                BlockDriver *drv)
338 {
339     int ret, open_flags;
340     char tmp_filename[PATH_MAX];
341     char backing_filename[PATH_MAX];
342
343     bs->read_only = 0;
344     bs->is_temporary = 0;
345     bs->encrypted = 0;
346     bs->valid_key = 0;
347
348     if (flags & BDRV_O_SNAPSHOT) {
349         BlockDriverState *bs1;
350         int64_t total_size;
351         int is_protocol = 0;
352
353         /* if snapshot, we create a temporary backing file and open it
354            instead of opening 'filename' directly */
355
356         /* if there is a backing file, use it */
357         bs1 = bdrv_new("");
358         ret = bdrv_open(bs1, filename, 0);
359         if (ret < 0) {
360             bdrv_delete(bs1);
361             return ret;
362         }
363         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
364
365         if (bs1->drv && bs1->drv->protocol_name)
366             is_protocol = 1;
367
368         bdrv_delete(bs1);
369
370         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
371
372         /* Real path is meaningless for protocols */
373         if (is_protocol)
374             snprintf(backing_filename, sizeof(backing_filename),
375                      "%s", filename);
376         else
377             realpath(filename, backing_filename);
378
379         ret = bdrv_create(&bdrv_qcow2, tmp_filename,
380                           total_size, backing_filename, 0);
381         if (ret < 0) {
382             return ret;
383         }
384         filename = tmp_filename;
385         bs->is_temporary = 1;
386     }
387
388     pstrcpy(bs->filename, sizeof(bs->filename), filename);
389     if (flags & BDRV_O_FILE) {
390         drv = find_protocol(filename);
391     } else if (!drv) {
392         drv = find_image_format(filename);
393     }
394     if (!drv) {
395         ret = -ENOENT;
396         goto unlink_and_fail;
397     }
398     bs->drv = drv;
399     bs->opaque = qemu_mallocz(drv->instance_size);
400     /* Note: for compatibility, we open disk image files as RDWR, and
401        RDONLY as fallback */
402     if (!(flags & BDRV_O_FILE))
403         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
404     else
405         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
406     ret = drv->bdrv_open(bs, filename, open_flags);
407     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
408         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
409         bs->read_only = 1;
410     }
411     if (ret < 0) {
412         qemu_free(bs->opaque);
413         bs->opaque = NULL;
414         bs->drv = NULL;
415     unlink_and_fail:
416         if (bs->is_temporary)
417             unlink(filename);
418         return ret;
419     }
420     if (drv->bdrv_getlength) {
421         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
422     }
423 #ifndef _WIN32
424     if (bs->is_temporary) {
425         unlink(filename);
426     }
427 #endif
428     if (bs->backing_file[0] != '\0') {
429         /* if there is a backing file, use it */
430         bs->backing_hd = bdrv_new("");
431         path_combine(backing_filename, sizeof(backing_filename),
432                      filename, bs->backing_file);
433         ret = bdrv_open(bs->backing_hd, backing_filename, open_flags);
434         if (ret < 0) {
435             bdrv_close(bs);
436             return ret;
437         }
438     }
439
440     if (!bdrv_key_required(bs)) {
441         /* call the change callback */
442         bs->media_changed = 1;
443         if (bs->change_cb)
444             bs->change_cb(bs->change_opaque);
445     }
446     return 0;
447 }
448
449 void bdrv_close(BlockDriverState *bs)
450 {
451     if (bs->drv) {
452         if (bs->backing_hd)
453             bdrv_delete(bs->backing_hd);
454         bs->drv->bdrv_close(bs);
455         qemu_free(bs->opaque);
456 #ifdef _WIN32
457         if (bs->is_temporary) {
458             unlink(bs->filename);
459         }
460 #endif
461         bs->opaque = NULL;
462         bs->drv = NULL;
463
464         /* call the change callback */
465         bs->media_changed = 1;
466         if (bs->change_cb)
467             bs->change_cb(bs->change_opaque);
468     }
469 }
470
471 void bdrv_delete(BlockDriverState *bs)
472 {
473     BlockDriverState **pbs;
474
475     pbs = &bdrv_first;
476     while (*pbs != bs && *pbs != NULL)
477         pbs = &(*pbs)->next;
478     if (*pbs == bs)
479         *pbs = bs->next;
480
481     bdrv_close(bs);
482     qemu_free(bs);
483 }
484
485 /* commit COW file into the raw image */
486 int bdrv_commit(BlockDriverState *bs)
487 {
488     BlockDriver *drv = bs->drv;
489     int64_t i, total_sectors;
490     int n, j;
491     unsigned char sector[512];
492
493     if (!drv)
494         return -ENOMEDIUM;
495
496     if (bs->read_only) {
497         return -EACCES;
498     }
499
500     if (!bs->backing_hd) {
501         return -ENOTSUP;
502     }
503
504     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
505     for (i = 0; i < total_sectors;) {
506         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
507             for(j = 0; j < n; j++) {
508                 if (bdrv_read(bs, i, sector, 1) != 0) {
509                     return -EIO;
510                 }
511
512                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
513                     return -EIO;
514                 }
515                 i++;
516             }
517         } else {
518             i += n;
519         }
520     }
521
522     if (drv->bdrv_make_empty)
523         return drv->bdrv_make_empty(bs);
524
525     return 0;
526 }
527
528 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
529                                    size_t size)
530 {
531     int64_t len;
532
533     if (!bdrv_is_inserted(bs))
534         return -ENOMEDIUM;
535
536     if (bs->growable)
537         return 0;
538
539     len = bdrv_getlength(bs);
540
541     if ((offset + size) > len)
542         return -EIO;
543
544     return 0;
545 }
546
547 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
548                               int nb_sectors)
549 {
550     int64_t offset;
551
552     /* Deal with byte accesses */
553     if (sector_num < 0)
554         offset = -sector_num;
555     else
556         offset = sector_num * 512;
557
558     return bdrv_check_byte_request(bs, offset, nb_sectors * 512);
559 }
560
561 /* return < 0 if error. See bdrv_write() for the return codes */
562 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
563               uint8_t *buf, int nb_sectors)
564 {
565     BlockDriver *drv = bs->drv;
566
567     if (!drv)
568         return -ENOMEDIUM;
569     if (bdrv_check_request(bs, sector_num, nb_sectors))
570         return -EIO;
571
572     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
573 }
574
575 /* Return < 0 if error. Important errors are:
576   -EIO         generic I/O error (may happen for all errors)
577   -ENOMEDIUM   No media inserted.
578   -EINVAL      Invalid sector number or nb_sectors
579   -EACCES      Trying to write a read-only device
580 */
581 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
582                const uint8_t *buf, int nb_sectors)
583 {
584     BlockDriver *drv = bs->drv;
585     if (!bs->drv)
586         return -ENOMEDIUM;
587     if (bs->read_only)
588         return -EACCES;
589     if (bdrv_check_request(bs, sector_num, nb_sectors))
590         return -EIO;
591
592     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
593 }
594
595 int bdrv_pread(BlockDriverState *bs, int64_t offset,
596                void *buf, int count1)
597 {
598     uint8_t tmp_buf[SECTOR_SIZE];
599     int len, nb_sectors, count;
600     int64_t sector_num;
601
602     count = count1;
603     /* first read to align to sector start */
604     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
605     if (len > count)
606         len = count;
607     sector_num = offset >> SECTOR_BITS;
608     if (len > 0) {
609         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
610             return -EIO;
611         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
612         count -= len;
613         if (count == 0)
614             return count1;
615         sector_num++;
616         buf += len;
617     }
618
619     /* read the sectors "in place" */
620     nb_sectors = count >> SECTOR_BITS;
621     if (nb_sectors > 0) {
622         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
623             return -EIO;
624         sector_num += nb_sectors;
625         len = nb_sectors << SECTOR_BITS;
626         buf += len;
627         count -= len;
628     }
629
630     /* add data from the last sector */
631     if (count > 0) {
632         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
633             return -EIO;
634         memcpy(buf, tmp_buf, count);
635     }
636     return count1;
637 }
638
639 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
640                 const void *buf, int count1)
641 {
642     uint8_t tmp_buf[SECTOR_SIZE];
643     int len, nb_sectors, count;
644     int64_t sector_num;
645
646     count = count1;
647     /* first write to align to sector start */
648     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
649     if (len > count)
650         len = count;
651     sector_num = offset >> SECTOR_BITS;
652     if (len > 0) {
653         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
654             return -EIO;
655         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
656         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
657             return -EIO;
658         count -= len;
659         if (count == 0)
660             return count1;
661         sector_num++;
662         buf += len;
663     }
664
665     /* write the sectors "in place" */
666     nb_sectors = count >> SECTOR_BITS;
667     if (nb_sectors > 0) {
668         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
669             return -EIO;
670         sector_num += nb_sectors;
671         len = nb_sectors << SECTOR_BITS;
672         buf += len;
673         count -= len;
674     }
675
676     /* add data from the last sector */
677     if (count > 0) {
678         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
679             return -EIO;
680         memcpy(tmp_buf, buf, count);
681         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
682             return -EIO;
683     }
684     return count1;
685 }
686
687 /**
688  * Truncate file to 'offset' bytes (needed only for file protocols)
689  */
690 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
691 {
692     BlockDriver *drv = bs->drv;
693     if (!drv)
694         return -ENOMEDIUM;
695     if (!drv->bdrv_truncate)
696         return -ENOTSUP;
697     return drv->bdrv_truncate(bs, offset);
698 }
699
700 /**
701  * Length of a file in bytes. Return < 0 if error or unknown.
702  */
703 int64_t bdrv_getlength(BlockDriverState *bs)
704 {
705     BlockDriver *drv = bs->drv;
706     if (!drv)
707         return -ENOMEDIUM;
708     if (!drv->bdrv_getlength) {
709         /* legacy mode */
710         return bs->total_sectors * SECTOR_SIZE;
711     }
712     return drv->bdrv_getlength(bs);
713 }
714
715 /* return 0 as number of sectors if no device present or error */
716 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
717 {
718     int64_t length;
719     length = bdrv_getlength(bs);
720     if (length < 0)
721         length = 0;
722     else
723         length = length >> SECTOR_BITS;
724     *nb_sectors_ptr = length;
725 }
726
727 struct partition {
728         uint8_t boot_ind;           /* 0x80 - active */
729         uint8_t head;               /* starting head */
730         uint8_t sector;             /* starting sector */
731         uint8_t cyl;                /* starting cylinder */
732         uint8_t sys_ind;            /* What partition type */
733         uint8_t end_head;           /* end head */
734         uint8_t end_sector;         /* end sector */
735         uint8_t end_cyl;            /* end cylinder */
736         uint32_t start_sect;        /* starting sector counting from 0 */
737         uint32_t nr_sects;          /* nr of sectors in partition */
738 } __attribute__((packed));
739
740 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
741 static int guess_disk_lchs(BlockDriverState *bs,
742                            int *pcylinders, int *pheads, int *psectors)
743 {
744     uint8_t buf[512];
745     int ret, i, heads, sectors, cylinders;
746     struct partition *p;
747     uint32_t nr_sects;
748     uint64_t nb_sectors;
749
750     bdrv_get_geometry(bs, &nb_sectors);
751
752     ret = bdrv_read(bs, 0, buf, 1);
753     if (ret < 0)
754         return -1;
755     /* test msdos magic */
756     if (buf[510] != 0x55 || buf[511] != 0xaa)
757         return -1;
758     for(i = 0; i < 4; i++) {
759         p = ((struct partition *)(buf + 0x1be)) + i;
760         nr_sects = le32_to_cpu(p->nr_sects);
761         if (nr_sects && p->end_head) {
762             /* We make the assumption that the partition terminates on
763                a cylinder boundary */
764             heads = p->end_head + 1;
765             sectors = p->end_sector & 63;
766             if (sectors == 0)
767                 continue;
768             cylinders = nb_sectors / (heads * sectors);
769             if (cylinders < 1 || cylinders > 16383)
770                 continue;
771             *pheads = heads;
772             *psectors = sectors;
773             *pcylinders = cylinders;
774 #if 0
775             printf("guessed geometry: LCHS=%d %d %d\n",
776                    cylinders, heads, sectors);
777 #endif
778             return 0;
779         }
780     }
781     return -1;
782 }
783
784 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
785 {
786     int translation, lba_detected = 0;
787     int cylinders, heads, secs;
788     uint64_t nb_sectors;
789
790     /* if a geometry hint is available, use it */
791     bdrv_get_geometry(bs, &nb_sectors);
792     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
793     translation = bdrv_get_translation_hint(bs);
794     if (cylinders != 0) {
795         *pcyls = cylinders;
796         *pheads = heads;
797         *psecs = secs;
798     } else {
799         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
800             if (heads > 16) {
801                 /* if heads > 16, it means that a BIOS LBA
802                    translation was active, so the default
803                    hardware geometry is OK */
804                 lba_detected = 1;
805                 goto default_geometry;
806             } else {
807                 *pcyls = cylinders;
808                 *pheads = heads;
809                 *psecs = secs;
810                 /* disable any translation to be in sync with
811                    the logical geometry */
812                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
813                     bdrv_set_translation_hint(bs,
814                                               BIOS_ATA_TRANSLATION_NONE);
815                 }
816             }
817         } else {
818         default_geometry:
819             /* if no geometry, use a standard physical disk geometry */
820             cylinders = nb_sectors / (16 * 63);
821
822             if (cylinders > 16383)
823                 cylinders = 16383;
824             else if (cylinders < 2)
825                 cylinders = 2;
826             *pcyls = cylinders;
827             *pheads = 16;
828             *psecs = 63;
829             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
830                 if ((*pcyls * *pheads) <= 131072) {
831                     bdrv_set_translation_hint(bs,
832                                               BIOS_ATA_TRANSLATION_LARGE);
833                 } else {
834                     bdrv_set_translation_hint(bs,
835                                               BIOS_ATA_TRANSLATION_LBA);
836                 }
837             }
838         }
839         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
840     }
841 }
842
843 void bdrv_set_geometry_hint(BlockDriverState *bs,
844                             int cyls, int heads, int secs)
845 {
846     bs->cyls = cyls;
847     bs->heads = heads;
848     bs->secs = secs;
849 }
850
851 void bdrv_set_type_hint(BlockDriverState *bs, int type)
852 {
853     bs->type = type;
854     bs->removable = ((type == BDRV_TYPE_CDROM ||
855                       type == BDRV_TYPE_FLOPPY));
856 }
857
858 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
859 {
860     bs->translation = translation;
861 }
862
863 void bdrv_get_geometry_hint(BlockDriverState *bs,
864                             int *pcyls, int *pheads, int *psecs)
865 {
866     *pcyls = bs->cyls;
867     *pheads = bs->heads;
868     *psecs = bs->secs;
869 }
870
871 int bdrv_get_type_hint(BlockDriverState *bs)
872 {
873     return bs->type;
874 }
875
876 int bdrv_get_translation_hint(BlockDriverState *bs)
877 {
878     return bs->translation;
879 }
880
881 int bdrv_is_removable(BlockDriverState *bs)
882 {
883     return bs->removable;
884 }
885
886 int bdrv_is_read_only(BlockDriverState *bs)
887 {
888     return bs->read_only;
889 }
890
891 int bdrv_is_sg(BlockDriverState *bs)
892 {
893     return bs->sg;
894 }
895
896 /* XXX: no longer used */
897 void bdrv_set_change_cb(BlockDriverState *bs,
898                         void (*change_cb)(void *opaque), void *opaque)
899 {
900     bs->change_cb = change_cb;
901     bs->change_opaque = opaque;
902 }
903
904 int bdrv_is_encrypted(BlockDriverState *bs)
905 {
906     if (bs->backing_hd && bs->backing_hd->encrypted)
907         return 1;
908     return bs->encrypted;
909 }
910
911 int bdrv_key_required(BlockDriverState *bs)
912 {
913     BlockDriverState *backing_hd = bs->backing_hd;
914
915     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
916         return 1;
917     return (bs->encrypted && !bs->valid_key);
918 }
919
920 int bdrv_set_key(BlockDriverState *bs, const char *key)
921 {
922     int ret;
923     if (bs->backing_hd && bs->backing_hd->encrypted) {
924         ret = bdrv_set_key(bs->backing_hd, key);
925         if (ret < 0)
926             return ret;
927         if (!bs->encrypted)
928             return 0;
929     }
930     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
931         return -1;
932     ret = bs->drv->bdrv_set_key(bs, key);
933     if (ret < 0) {
934         bs->valid_key = 0;
935     } else if (!bs->valid_key) {
936         bs->valid_key = 1;
937         /* call the change callback now, we skipped it on open */
938         bs->media_changed = 1;
939         if (bs->change_cb)
940             bs->change_cb(bs->change_opaque);
941     }
942     return ret;
943 }
944
945 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
946 {
947     if (!bs->drv) {
948         buf[0] = '\0';
949     } else {
950         pstrcpy(buf, buf_size, bs->drv->format_name);
951     }
952 }
953
954 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
955                          void *opaque)
956 {
957     BlockDriver *drv;
958
959     for (drv = first_drv; drv != NULL; drv = drv->next) {
960         it(opaque, drv->format_name);
961     }
962 }
963
964 BlockDriverState *bdrv_find(const char *name)
965 {
966     BlockDriverState *bs;
967
968     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
969         if (!strcmp(name, bs->device_name))
970             return bs;
971     }
972     return NULL;
973 }
974
975 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
976 {
977     BlockDriverState *bs;
978
979     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
980         it(opaque, bs);
981     }
982 }
983
984 const char *bdrv_get_device_name(BlockDriverState *bs)
985 {
986     return bs->device_name;
987 }
988
989 void bdrv_flush(BlockDriverState *bs)
990 {
991     if (bs->drv->bdrv_flush)
992         bs->drv->bdrv_flush(bs);
993     if (bs->backing_hd)
994         bdrv_flush(bs->backing_hd);
995 }
996
997 void bdrv_flush_all(void)
998 {
999     BlockDriverState *bs;
1000
1001     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1002         if (bs->drv && !bdrv_is_read_only(bs) && 
1003             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1004             bdrv_flush(bs);
1005 }
1006
1007 /*
1008  * Returns true iff the specified sector is present in the disk image. Drivers
1009  * not implementing the functionality are assumed to not support backing files,
1010  * hence all their sectors are reported as allocated.
1011  *
1012  * 'pnum' is set to the number of sectors (including and immediately following
1013  * the specified sector) that are known to be in the same
1014  * allocated/unallocated state.
1015  *
1016  * 'nb_sectors' is the max value 'pnum' should be set to.
1017  */
1018 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1019         int *pnum)
1020 {
1021     int64_t n;
1022     if (!bs->drv->bdrv_is_allocated) {
1023         if (sector_num >= bs->total_sectors) {
1024             *pnum = 0;
1025             return 0;
1026         }
1027         n = bs->total_sectors - sector_num;
1028         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1029         return 1;
1030     }
1031     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1032 }
1033
1034 void bdrv_info(Monitor *mon)
1035 {
1036     BlockDriverState *bs;
1037
1038     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1039         monitor_printf(mon, "%s:", bs->device_name);
1040         monitor_printf(mon, " type=");
1041         switch(bs->type) {
1042         case BDRV_TYPE_HD:
1043             monitor_printf(mon, "hd");
1044             break;
1045         case BDRV_TYPE_CDROM:
1046             monitor_printf(mon, "cdrom");
1047             break;
1048         case BDRV_TYPE_FLOPPY:
1049             monitor_printf(mon, "floppy");
1050             break;
1051         }
1052         monitor_printf(mon, " removable=%d", bs->removable);
1053         if (bs->removable) {
1054             monitor_printf(mon, " locked=%d", bs->locked);
1055         }
1056         if (bs->drv) {
1057             monitor_printf(mon, " file=");
1058             monitor_print_filename(mon, bs->filename);
1059             if (bs->backing_file[0] != '\0') {
1060                 monitor_printf(mon, " backing_file=");
1061                 monitor_print_filename(mon, bs->backing_file);
1062             }
1063             monitor_printf(mon, " ro=%d", bs->read_only);
1064             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1065             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1066         } else {
1067             monitor_printf(mon, " [not inserted]");
1068         }
1069         monitor_printf(mon, "\n");
1070     }
1071 }
1072
1073 /* The "info blockstats" command. */
1074 void bdrv_info_stats(Monitor *mon)
1075 {
1076     BlockDriverState *bs;
1077
1078     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1079         monitor_printf(mon, "%s:"
1080                        " rd_bytes=%" PRIu64
1081                        " wr_bytes=%" PRIu64
1082                        " rd_operations=%" PRIu64
1083                        " wr_operations=%" PRIu64
1084                        "\n",
1085                        bs->device_name,
1086                        bs->rd_bytes, bs->wr_bytes,
1087                        bs->rd_ops, bs->wr_ops);
1088     }
1089 }
1090
1091 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1092 {
1093     if (bs->backing_hd && bs->backing_hd->encrypted)
1094         return bs->backing_file;
1095     else if (bs->encrypted)
1096         return bs->filename;
1097     else
1098         return NULL;
1099 }
1100
1101 void bdrv_get_backing_filename(BlockDriverState *bs,
1102                                char *filename, int filename_size)
1103 {
1104     if (!bs->backing_hd) {
1105         pstrcpy(filename, filename_size, "");
1106     } else {
1107         pstrcpy(filename, filename_size, bs->backing_file);
1108     }
1109 }
1110
1111 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1112                           const uint8_t *buf, int nb_sectors)
1113 {
1114     BlockDriver *drv = bs->drv;
1115     if (!drv)
1116         return -ENOMEDIUM;
1117     if (!drv->bdrv_write_compressed)
1118         return -ENOTSUP;
1119     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1120 }
1121
1122 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1123 {
1124     BlockDriver *drv = bs->drv;
1125     if (!drv)
1126         return -ENOMEDIUM;
1127     if (!drv->bdrv_get_info)
1128         return -ENOTSUP;
1129     memset(bdi, 0, sizeof(*bdi));
1130     return drv->bdrv_get_info(bs, bdi);
1131 }
1132
1133 /**************************************************************/
1134 /* handling of snapshots */
1135
1136 int bdrv_snapshot_create(BlockDriverState *bs,
1137                          QEMUSnapshotInfo *sn_info)
1138 {
1139     BlockDriver *drv = bs->drv;
1140     if (!drv)
1141         return -ENOMEDIUM;
1142     if (!drv->bdrv_snapshot_create)
1143         return -ENOTSUP;
1144     return drv->bdrv_snapshot_create(bs, sn_info);
1145 }
1146
1147 int bdrv_snapshot_goto(BlockDriverState *bs,
1148                        const char *snapshot_id)
1149 {
1150     BlockDriver *drv = bs->drv;
1151     if (!drv)
1152         return -ENOMEDIUM;
1153     if (!drv->bdrv_snapshot_goto)
1154         return -ENOTSUP;
1155     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1156 }
1157
1158 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1159 {
1160     BlockDriver *drv = bs->drv;
1161     if (!drv)
1162         return -ENOMEDIUM;
1163     if (!drv->bdrv_snapshot_delete)
1164         return -ENOTSUP;
1165     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1166 }
1167
1168 int bdrv_snapshot_list(BlockDriverState *bs,
1169                        QEMUSnapshotInfo **psn_info)
1170 {
1171     BlockDriver *drv = bs->drv;
1172     if (!drv)
1173         return -ENOMEDIUM;
1174     if (!drv->bdrv_snapshot_list)
1175         return -ENOTSUP;
1176     return drv->bdrv_snapshot_list(bs, psn_info);
1177 }
1178
1179 #define NB_SUFFIXES 4
1180
1181 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1182 {
1183     static const char suffixes[NB_SUFFIXES] = "KMGT";
1184     int64_t base;
1185     int i;
1186
1187     if (size <= 999) {
1188         snprintf(buf, buf_size, "%" PRId64, size);
1189     } else {
1190         base = 1024;
1191         for(i = 0; i < NB_SUFFIXES; i++) {
1192             if (size < (10 * base)) {
1193                 snprintf(buf, buf_size, "%0.1f%c",
1194                          (double)size / base,
1195                          suffixes[i]);
1196                 break;
1197             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1198                 snprintf(buf, buf_size, "%" PRId64 "%c",
1199                          ((size + (base >> 1)) / base),
1200                          suffixes[i]);
1201                 break;
1202             }
1203             base = base * 1024;
1204         }
1205     }
1206     return buf;
1207 }
1208
1209 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1210 {
1211     char buf1[128], date_buf[128], clock_buf[128];
1212 #ifdef _WIN32
1213     struct tm *ptm;
1214 #else
1215     struct tm tm;
1216 #endif
1217     time_t ti;
1218     int64_t secs;
1219
1220     if (!sn) {
1221         snprintf(buf, buf_size,
1222                  "%-10s%-20s%7s%20s%15s",
1223                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1224     } else {
1225         ti = sn->date_sec;
1226 #ifdef _WIN32
1227         ptm = localtime(&ti);
1228         strftime(date_buf, sizeof(date_buf),
1229                  "%Y-%m-%d %H:%M:%S", ptm);
1230 #else
1231         localtime_r(&ti, &tm);
1232         strftime(date_buf, sizeof(date_buf),
1233                  "%Y-%m-%d %H:%M:%S", &tm);
1234 #endif
1235         secs = sn->vm_clock_nsec / 1000000000;
1236         snprintf(clock_buf, sizeof(clock_buf),
1237                  "%02d:%02d:%02d.%03d",
1238                  (int)(secs / 3600),
1239                  (int)((secs / 60) % 60),
1240                  (int)(secs % 60),
1241                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1242         snprintf(buf, buf_size,
1243                  "%-10s%-20s%7s%20s%15s",
1244                  sn->id_str, sn->name,
1245                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1246                  date_buf,
1247                  clock_buf);
1248     }
1249     return buf;
1250 }
1251
1252
1253 /**************************************************************/
1254 /* async I/Os */
1255
1256 typedef struct VectorTranslationState {
1257     QEMUIOVector *iov;
1258     uint8_t *bounce;
1259     int is_write;
1260     BlockDriverAIOCB *aiocb;
1261     BlockDriverAIOCB *this_aiocb;
1262 } VectorTranslationState;
1263
1264 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1265 {
1266     VectorTranslationState *s = opaque;
1267
1268     if (!s->is_write) {
1269         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1270     }
1271     qemu_vfree(s->bounce);
1272     s->this_aiocb->cb(s->this_aiocb->opaque, ret);
1273     qemu_aio_release(s->this_aiocb);
1274 }
1275
1276 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1277                                             int64_t sector_num,
1278                                             QEMUIOVector *iov,
1279                                             int nb_sectors,
1280                                             BlockDriverCompletionFunc *cb,
1281                                             void *opaque,
1282                                             int is_write)
1283
1284 {
1285     VectorTranslationState *s = qemu_mallocz(sizeof(*s));
1286     BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque);
1287
1288     s->this_aiocb = aiocb;
1289     s->iov = iov;
1290     s->bounce = qemu_memalign(512, nb_sectors * 512);
1291     s->is_write = is_write;
1292     if (is_write) {
1293         qemu_iovec_to_buffer(s->iov, s->bounce);
1294         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1295                                   bdrv_aio_rw_vector_cb, s);
1296     } else {
1297         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1298                                  bdrv_aio_rw_vector_cb, s);
1299     }
1300     return aiocb;
1301 }
1302
1303 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1304                                  QEMUIOVector *iov, int nb_sectors,
1305                                  BlockDriverCompletionFunc *cb, void *opaque)
1306 {
1307     if (bdrv_check_request(bs, sector_num, nb_sectors))
1308         return NULL;
1309
1310     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1311                               cb, opaque, 0);
1312 }
1313
1314 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1315                                   QEMUIOVector *iov, int nb_sectors,
1316                                   BlockDriverCompletionFunc *cb, void *opaque)
1317 {
1318     if (bdrv_check_request(bs, sector_num, nb_sectors))
1319         return NULL;
1320
1321     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1322                               cb, opaque, 1);
1323 }
1324
1325 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1326                                 uint8_t *buf, int nb_sectors,
1327                                 BlockDriverCompletionFunc *cb, void *opaque)
1328 {
1329     BlockDriver *drv = bs->drv;
1330     BlockDriverAIOCB *ret;
1331
1332     if (!drv)
1333         return NULL;
1334     if (bdrv_check_request(bs, sector_num, nb_sectors))
1335         return NULL;
1336
1337     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1338
1339     if (ret) {
1340         /* Update stats even though technically transfer has not happened. */
1341         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1342         bs->rd_ops ++;
1343     }
1344
1345     return ret;
1346 }
1347
1348 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1349                                  const uint8_t *buf, int nb_sectors,
1350                                  BlockDriverCompletionFunc *cb, void *opaque)
1351 {
1352     BlockDriver *drv = bs->drv;
1353     BlockDriverAIOCB *ret;
1354
1355     if (!drv)
1356         return NULL;
1357     if (bs->read_only)
1358         return NULL;
1359     if (bdrv_check_request(bs, sector_num, nb_sectors))
1360         return NULL;
1361
1362     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1363
1364     if (ret) {
1365         /* Update stats even though technically transfer has not happened. */
1366         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1367         bs->wr_ops ++;
1368     }
1369
1370     return ret;
1371 }
1372
1373 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1374 {
1375     if (acb->cb == bdrv_aio_rw_vector_cb) {
1376         VectorTranslationState *s = acb->opaque;
1377         acb = s->aiocb;
1378     }
1379
1380     acb->pool->cancel(acb);
1381 }
1382
1383
1384 /**************************************************************/
1385 /* async block device emulation */
1386
1387 static void bdrv_aio_bh_cb(void *opaque)
1388 {
1389     BlockDriverAIOCBSync *acb = opaque;
1390     acb->common.cb(acb->common.opaque, acb->ret);
1391     qemu_aio_release(acb);
1392 }
1393
1394 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1395         int64_t sector_num, uint8_t *buf, int nb_sectors,
1396         BlockDriverCompletionFunc *cb, void *opaque)
1397 {
1398     BlockDriverAIOCBSync *acb;
1399     int ret;
1400
1401     acb = qemu_aio_get(bs, cb, opaque);
1402     if (!acb->bh)
1403         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1404     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1405     acb->ret = ret;
1406     qemu_bh_schedule(acb->bh);
1407     return &acb->common;
1408 }
1409
1410 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1411         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1412         BlockDriverCompletionFunc *cb, void *opaque)
1413 {
1414     BlockDriverAIOCBSync *acb;
1415     int ret;
1416
1417     acb = qemu_aio_get(bs, cb, opaque);
1418     if (!acb->bh)
1419         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1420     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1421     acb->ret = ret;
1422     qemu_bh_schedule(acb->bh);
1423     return &acb->common;
1424 }
1425
1426 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1427 {
1428     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1429     qemu_bh_cancel(acb->bh);
1430     qemu_aio_release(acb);
1431 }
1432
1433 /**************************************************************/
1434 /* sync block device emulation */
1435
1436 static void bdrv_rw_em_cb(void *opaque, int ret)
1437 {
1438     *(int *)opaque = ret;
1439 }
1440
1441 #define NOT_DONE 0x7fffffff
1442
1443 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1444                         uint8_t *buf, int nb_sectors)
1445 {
1446     int async_ret;
1447     BlockDriverAIOCB *acb;
1448
1449     async_ret = NOT_DONE;
1450     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1451                         bdrv_rw_em_cb, &async_ret);
1452     if (acb == NULL)
1453         return -1;
1454
1455     while (async_ret == NOT_DONE) {
1456         qemu_aio_wait();
1457     }
1458
1459     return async_ret;
1460 }
1461
1462 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1463                          const uint8_t *buf, int nb_sectors)
1464 {
1465     int async_ret;
1466     BlockDriverAIOCB *acb;
1467
1468     async_ret = NOT_DONE;
1469     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1470                          bdrv_rw_em_cb, &async_ret);
1471     if (acb == NULL)
1472         return -1;
1473     while (async_ret == NOT_DONE) {
1474         qemu_aio_wait();
1475     }
1476     return async_ret;
1477 }
1478
1479 void bdrv_init(void)
1480 {
1481     bdrv_register(&bdrv_raw);
1482     bdrv_register(&bdrv_host_device);
1483 #ifndef _WIN32
1484     bdrv_register(&bdrv_cow);
1485 #endif
1486     bdrv_register(&bdrv_qcow);
1487     bdrv_register(&bdrv_vmdk);
1488     bdrv_register(&bdrv_cloop);
1489     bdrv_register(&bdrv_dmg);
1490     bdrv_register(&bdrv_bochs);
1491     bdrv_register(&bdrv_vpc);
1492     bdrv_register(&bdrv_vvfat);
1493     bdrv_register(&bdrv_qcow2);
1494     bdrv_register(&bdrv_parallels);
1495     bdrv_register(&bdrv_nbd);
1496 }
1497
1498 void aio_pool_init(AIOPool *pool, int aiocb_size,
1499                    void (*cancel)(BlockDriverAIOCB *acb))
1500 {
1501     pool->aiocb_size = aiocb_size;
1502     pool->cancel = cancel;
1503     pool->free_aiocb = NULL;
1504 }
1505
1506 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1507                         BlockDriverCompletionFunc *cb, void *opaque)
1508 {
1509     BlockDriverAIOCB *acb;
1510
1511     if (pool->free_aiocb) {
1512         acb = pool->free_aiocb;
1513         pool->free_aiocb = acb->next;
1514     } else {
1515         acb = qemu_mallocz(pool->aiocb_size);
1516         acb->pool = pool;
1517     }
1518     acb->bs = bs;
1519     acb->cb = cb;
1520     acb->opaque = opaque;
1521     return acb;
1522 }
1523
1524 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1525                    void *opaque)
1526 {
1527     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1528 }
1529
1530 void qemu_aio_release(void *p)
1531 {
1532     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1533     AIOPool *pool = acb->pool;
1534     acb->next = pool->free_aiocb;
1535     pool->free_aiocb = acb;
1536 }
1537
1538 /**************************************************************/
1539 /* removable device support */
1540
1541 /**
1542  * Return TRUE if the media is present
1543  */
1544 int bdrv_is_inserted(BlockDriverState *bs)
1545 {
1546     BlockDriver *drv = bs->drv;
1547     int ret;
1548     if (!drv)
1549         return 0;
1550     if (!drv->bdrv_is_inserted)
1551         return 1;
1552     ret = drv->bdrv_is_inserted(bs);
1553     return ret;
1554 }
1555
1556 /**
1557  * Return TRUE if the media changed since the last call to this
1558  * function. It is currently only used for floppy disks
1559  */
1560 int bdrv_media_changed(BlockDriverState *bs)
1561 {
1562     BlockDriver *drv = bs->drv;
1563     int ret;
1564
1565     if (!drv || !drv->bdrv_media_changed)
1566         ret = -ENOTSUP;
1567     else
1568         ret = drv->bdrv_media_changed(bs);
1569     if (ret == -ENOTSUP)
1570         ret = bs->media_changed;
1571     bs->media_changed = 0;
1572     return ret;
1573 }
1574
1575 /**
1576  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1577  */
1578 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1579 {
1580     BlockDriver *drv = bs->drv;
1581     int ret;
1582
1583     if (!drv || !drv->bdrv_eject) {
1584         ret = -ENOTSUP;
1585     } else {
1586         ret = drv->bdrv_eject(bs, eject_flag);
1587     }
1588     if (ret == -ENOTSUP) {
1589         if (eject_flag)
1590             bdrv_close(bs);
1591     }
1592 }
1593
1594 int bdrv_is_locked(BlockDriverState *bs)
1595 {
1596     return bs->locked;
1597 }
1598
1599 /**
1600  * Lock or unlock the media (if it is locked, the user won't be able
1601  * to eject it manually).
1602  */
1603 void bdrv_set_locked(BlockDriverState *bs, int locked)
1604 {
1605     BlockDriver *drv = bs->drv;
1606
1607     bs->locked = locked;
1608     if (drv && drv->bdrv_set_locked) {
1609         drv->bdrv_set_locked(bs, locked);
1610     }
1611 }
1612
1613 /* needed for generic scsi interface */
1614
1615 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1616 {
1617     BlockDriver *drv = bs->drv;
1618
1619     if (drv && drv->bdrv_ioctl)
1620         return drv->bdrv_ioctl(bs, req, buf);
1621     return -ENOTSUP;
1622 }
1623
1624 int bdrv_sg_send_command(BlockDriverState *bs, void *buf, int count)
1625 {
1626     return bs->drv->bdrv_sg_send_command(bs, buf, count);
1627 }
1628
1629 int bdrv_sg_recv_response(BlockDriverState *bs, void *buf, int count)
1630 {
1631     return bs->drv->bdrv_sg_recv_response(bs, buf, count);
1632 }
1633
1634 BlockDriverAIOCB *bdrv_sg_aio_read(BlockDriverState *bs, void *buf, int count,
1635                                    BlockDriverCompletionFunc *cb, void *opaque)
1636 {
1637     return bs->drv->bdrv_sg_aio_read(bs, buf, count, cb, opaque);
1638 }
1639
1640 BlockDriverAIOCB *bdrv_sg_aio_write(BlockDriverState *bs, void *buf, int count,
1641                                     BlockDriverCompletionFunc *cb, void *opaque)
1642 {
1643     return bs->drv->bdrv_sg_aio_write(bs, buf, count, cb, opaque);
1644 }