Fix savevm after BDRV_FILE size enforcement
[qemu] / block.c
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #ifdef HOST_BSD
26 /* include native header before sys-queue.h */
27 #include <sys/queue.h>
28 #endif
29
30 #include "qemu-common.h"
31 #include "monitor.h"
32 #include "block_int.h"
33
34 #ifdef HOST_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46
47 #define SECTOR_BITS 9
48 #define SECTOR_SIZE (1 << SECTOR_BITS)
49
50 static AIOPool vectored_aio_pool;
51
52 typedef struct BlockDriverAIOCBSync {
53     BlockDriverAIOCB common;
54     QEMUBH *bh;
55     int ret;
56 } BlockDriverAIOCBSync;
57
58 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
59         int64_t sector_num, uint8_t *buf, int nb_sectors,
60         BlockDriverCompletionFunc *cb, void *opaque);
61 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
62         int64_t sector_num, const uint8_t *buf, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
65 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
66                         uint8_t *buf, int nb_sectors);
67 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
68                          const uint8_t *buf, int nb_sectors);
69
70 BlockDriverState *bdrv_first;
71
72 static BlockDriver *first_drv;
73
74 int path_is_absolute(const char *path)
75 {
76     const char *p;
77 #ifdef _WIN32
78     /* specific case for names like: "\\.\d:" */
79     if (*path == '/' || *path == '\\')
80         return 1;
81 #endif
82     p = strchr(path, ':');
83     if (p)
84         p++;
85     else
86         p = path;
87 #ifdef _WIN32
88     return (*p == '/' || *p == '\\');
89 #else
90     return (*p == '/');
91 #endif
92 }
93
94 /* if filename is absolute, just copy it to dest. Otherwise, build a
95    path to it by considering it is relative to base_path. URL are
96    supported. */
97 void path_combine(char *dest, int dest_size,
98                   const char *base_path,
99                   const char *filename)
100 {
101     const char *p, *p1;
102     int len;
103
104     if (dest_size <= 0)
105         return;
106     if (path_is_absolute(filename)) {
107         pstrcpy(dest, dest_size, filename);
108     } else {
109         p = strchr(base_path, ':');
110         if (p)
111             p++;
112         else
113             p = base_path;
114         p1 = strrchr(base_path, '/');
115 #ifdef _WIN32
116         {
117             const char *p2;
118             p2 = strrchr(base_path, '\\');
119             if (!p1 || p2 > p1)
120                 p1 = p2;
121         }
122 #endif
123         if (p1)
124             p1++;
125         else
126             p1 = base_path;
127         if (p1 > p)
128             p = p1;
129         len = p - base_path;
130         if (len > dest_size - 1)
131             len = dest_size - 1;
132         memcpy(dest, base_path, len);
133         dest[len] = '\0';
134         pstrcat(dest, dest_size, filename);
135     }
136 }
137
138
139 static void bdrv_register(BlockDriver *bdrv)
140 {
141     if (!bdrv->bdrv_aio_read) {
142         /* add AIO emulation layer */
143         bdrv->bdrv_aio_read = bdrv_aio_read_em;
144         bdrv->bdrv_aio_write = bdrv_aio_write_em;
145         bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
146         bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
147     } else if (!bdrv->bdrv_read) {
148         /* add synchronous IO emulation layer */
149         bdrv->bdrv_read = bdrv_read_em;
150         bdrv->bdrv_write = bdrv_write_em;
151     }
152     aio_pool_init(&bdrv->aio_pool, bdrv->aiocb_size, bdrv->bdrv_aio_cancel);
153     bdrv->next = first_drv;
154     first_drv = bdrv;
155 }
156
157 /* create a new block device (by default it is empty) */
158 BlockDriverState *bdrv_new(const char *device_name)
159 {
160     BlockDriverState **pbs, *bs;
161
162     bs = qemu_mallocz(sizeof(BlockDriverState));
163     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
164     if (device_name[0] != '\0') {
165         /* insert at the end */
166         pbs = &bdrv_first;
167         while (*pbs != NULL)
168             pbs = &(*pbs)->next;
169         *pbs = bs;
170     }
171     return bs;
172 }
173
174 BlockDriver *bdrv_find_format(const char *format_name)
175 {
176     BlockDriver *drv1;
177     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
178         if (!strcmp(drv1->format_name, format_name))
179             return drv1;
180     }
181     return NULL;
182 }
183
184 int bdrv_create2(BlockDriver *drv,
185                 const char *filename, int64_t size_in_sectors,
186                 const char *backing_file, const char *backing_format,
187                 int flags)
188 {
189     if (drv->bdrv_create2)
190         return drv->bdrv_create2(filename, size_in_sectors, backing_file,
191                                  backing_format, flags);
192     if (drv->bdrv_create)
193         return drv->bdrv_create(filename, size_in_sectors, backing_file,
194                                 flags);
195     return -ENOTSUP;
196 }
197
198 int bdrv_create(BlockDriver *drv,
199                 const char *filename, int64_t size_in_sectors,
200                 const char *backing_file, int flags)
201 {
202     if (!drv->bdrv_create)
203         return -ENOTSUP;
204     return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
205 }
206
207 #ifdef _WIN32
208 void get_tmp_filename(char *filename, int size)
209 {
210     char temp_dir[MAX_PATH];
211
212     GetTempPath(MAX_PATH, temp_dir);
213     GetTempFileName(temp_dir, "qem", 0, filename);
214 }
215 #else
216 void get_tmp_filename(char *filename, int size)
217 {
218     int fd;
219     const char *tmpdir;
220     /* XXX: race condition possible */
221     tmpdir = getenv("TMPDIR");
222     if (!tmpdir)
223         tmpdir = "/tmp";
224     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
225     fd = mkstemp(filename);
226     close(fd);
227 }
228 #endif
229
230 #ifdef _WIN32
231 static int is_windows_drive_prefix(const char *filename)
232 {
233     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
234              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
235             filename[1] == ':');
236 }
237
238 static int is_windows_drive(const char *filename)
239 {
240     if (is_windows_drive_prefix(filename) &&
241         filename[2] == '\0')
242         return 1;
243     if (strstart(filename, "\\\\.\\", NULL) ||
244         strstart(filename, "//./", NULL))
245         return 1;
246     return 0;
247 }
248 #endif
249
250 static BlockDriver *find_protocol(const char *filename)
251 {
252     BlockDriver *drv1;
253     char protocol[128];
254     int len;
255     const char *p;
256
257 #ifdef _WIN32
258     if (is_windows_drive(filename) ||
259         is_windows_drive_prefix(filename))
260         return &bdrv_raw;
261 #endif
262     p = strchr(filename, ':');
263     if (!p)
264         return &bdrv_raw;
265     len = p - filename;
266     if (len > sizeof(protocol) - 1)
267         len = sizeof(protocol) - 1;
268     memcpy(protocol, filename, len);
269     protocol[len] = '\0';
270     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
271         if (drv1->protocol_name &&
272             !strcmp(drv1->protocol_name, protocol))
273             return drv1;
274     }
275     return NULL;
276 }
277
278 /* XXX: force raw format if block or character device ? It would
279    simplify the BSD case */
280 static BlockDriver *find_image_format(const char *filename)
281 {
282     int ret, score, score_max;
283     BlockDriver *drv1, *drv;
284     uint8_t buf[2048];
285     BlockDriverState *bs;
286
287     /* detect host devices. By convention, /dev/cdrom[N] is always
288        recognized as a host CDROM */
289     if (strstart(filename, "/dev/cdrom", NULL))
290         return &bdrv_host_device;
291 #ifdef _WIN32
292     if (is_windows_drive(filename))
293         return &bdrv_host_device;
294 #else
295     {
296         struct stat st;
297         if (stat(filename, &st) >= 0 &&
298             (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
299             return &bdrv_host_device;
300         }
301     }
302 #endif
303
304     drv = find_protocol(filename);
305     /* no need to test disk image formats for vvfat */
306     if (drv == &bdrv_vvfat)
307         return drv;
308
309     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
310     if (ret < 0)
311         return NULL;
312     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
313     bdrv_delete(bs);
314     if (ret < 0) {
315         return NULL;
316     }
317
318     score_max = 0;
319     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
320         if (drv1->bdrv_probe) {
321             score = drv1->bdrv_probe(buf, ret, filename);
322             if (score > score_max) {
323                 score_max = score;
324                 drv = drv1;
325             }
326         }
327     }
328     return drv;
329 }
330
331 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
332 {
333     BlockDriverState *bs;
334     int ret;
335
336     bs = bdrv_new("");
337     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
338     if (ret < 0) {
339         bdrv_delete(bs);
340         return ret;
341     }
342     bs->growable = 1;
343     *pbs = bs;
344     return 0;
345 }
346
347 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
348 {
349     return bdrv_open2(bs, filename, flags, NULL);
350 }
351
352 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
353                BlockDriver *drv)
354 {
355     int ret, open_flags;
356     char tmp_filename[PATH_MAX];
357     char backing_filename[PATH_MAX];
358
359     bs->read_only = 0;
360     bs->is_temporary = 0;
361     bs->encrypted = 0;
362     bs->valid_key = 0;
363
364     if (flags & BDRV_O_SNAPSHOT) {
365         BlockDriverState *bs1;
366         int64_t total_size;
367         int is_protocol = 0;
368
369         /* if snapshot, we create a temporary backing file and open it
370            instead of opening 'filename' directly */
371
372         /* if there is a backing file, use it */
373         bs1 = bdrv_new("");
374         ret = bdrv_open2(bs1, filename, 0, drv);
375         if (ret < 0) {
376             bdrv_delete(bs1);
377             return ret;
378         }
379         total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
380
381         if (bs1->drv && bs1->drv->protocol_name)
382             is_protocol = 1;
383
384         bdrv_delete(bs1);
385
386         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
387
388         /* Real path is meaningless for protocols */
389         if (is_protocol)
390             snprintf(backing_filename, sizeof(backing_filename),
391                      "%s", filename);
392         else
393             realpath(filename, backing_filename);
394
395         ret = bdrv_create2(&bdrv_qcow2, tmp_filename,
396                            total_size, backing_filename, 
397                            (drv ? drv->format_name : NULL), 0);
398         if (ret < 0) {
399             return ret;
400         }
401         filename = tmp_filename;
402         drv = &bdrv_qcow2;
403         bs->is_temporary = 1;
404     }
405
406     pstrcpy(bs->filename, sizeof(bs->filename), filename);
407     if (flags & BDRV_O_FILE) {
408         drv = find_protocol(filename);
409     } else if (!drv) {
410         drv = find_image_format(filename);
411     }
412     if (!drv) {
413         ret = -ENOENT;
414         goto unlink_and_fail;
415     }
416     bs->drv = drv;
417     bs->opaque = qemu_mallocz(drv->instance_size);
418     /* Note: for compatibility, we open disk image files as RDWR, and
419        RDONLY as fallback */
420     if (!(flags & BDRV_O_FILE))
421         open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
422     else
423         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
424     ret = drv->bdrv_open(bs, filename, open_flags);
425     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
426         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
427         bs->read_only = 1;
428     }
429     if (ret < 0) {
430         qemu_free(bs->opaque);
431         bs->opaque = NULL;
432         bs->drv = NULL;
433     unlink_and_fail:
434         if (bs->is_temporary)
435             unlink(filename);
436         return ret;
437     }
438     if (drv->bdrv_getlength) {
439         bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
440     }
441 #ifndef _WIN32
442     if (bs->is_temporary) {
443         unlink(filename);
444     }
445 #endif
446     if (bs->backing_file[0] != '\0') {
447         /* if there is a backing file, use it */
448         BlockDriver *back_drv = NULL;
449         bs->backing_hd = bdrv_new("");
450         path_combine(backing_filename, sizeof(backing_filename),
451                      filename, bs->backing_file);
452         if (bs->backing_format[0] != '\0')
453             back_drv = bdrv_find_format(bs->backing_format);
454         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
455                          back_drv);
456         if (ret < 0) {
457             bdrv_close(bs);
458             return ret;
459         }
460     }
461
462     if (!bdrv_key_required(bs)) {
463         /* call the change callback */
464         bs->media_changed = 1;
465         if (bs->change_cb)
466             bs->change_cb(bs->change_opaque);
467     }
468     return 0;
469 }
470
471 void bdrv_close(BlockDriverState *bs)
472 {
473     if (bs->drv) {
474         if (bs->backing_hd)
475             bdrv_delete(bs->backing_hd);
476         bs->drv->bdrv_close(bs);
477         qemu_free(bs->opaque);
478 #ifdef _WIN32
479         if (bs->is_temporary) {
480             unlink(bs->filename);
481         }
482 #endif
483         bs->opaque = NULL;
484         bs->drv = NULL;
485
486         /* call the change callback */
487         bs->media_changed = 1;
488         if (bs->change_cb)
489             bs->change_cb(bs->change_opaque);
490     }
491 }
492
493 void bdrv_delete(BlockDriverState *bs)
494 {
495     BlockDriverState **pbs;
496
497     pbs = &bdrv_first;
498     while (*pbs != bs && *pbs != NULL)
499         pbs = &(*pbs)->next;
500     if (*pbs == bs)
501         *pbs = bs->next;
502
503     bdrv_close(bs);
504     qemu_free(bs);
505 }
506
507 /* commit COW file into the raw image */
508 int bdrv_commit(BlockDriverState *bs)
509 {
510     BlockDriver *drv = bs->drv;
511     int64_t i, total_sectors;
512     int n, j;
513     unsigned char sector[512];
514
515     if (!drv)
516         return -ENOMEDIUM;
517
518     if (bs->read_only) {
519         return -EACCES;
520     }
521
522     if (!bs->backing_hd) {
523         return -ENOTSUP;
524     }
525
526     total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
527     for (i = 0; i < total_sectors;) {
528         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
529             for(j = 0; j < n; j++) {
530                 if (bdrv_read(bs, i, sector, 1) != 0) {
531                     return -EIO;
532                 }
533
534                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
535                     return -EIO;
536                 }
537                 i++;
538             }
539         } else {
540             i += n;
541         }
542     }
543
544     if (drv->bdrv_make_empty)
545         return drv->bdrv_make_empty(bs);
546
547     return 0;
548 }
549
550 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
551                                    size_t size)
552 {
553     int64_t len;
554
555     if (!bdrv_is_inserted(bs))
556         return -ENOMEDIUM;
557
558     if (bs->growable)
559         return 0;
560
561     len = bdrv_getlength(bs);
562
563     if ((offset + size) > len)
564         return -EIO;
565
566     return 0;
567 }
568
569 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
570                               int nb_sectors)
571 {
572     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
573 }
574
575 /* return < 0 if error. See bdrv_write() for the return codes */
576 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
577               uint8_t *buf, int nb_sectors)
578 {
579     BlockDriver *drv = bs->drv;
580
581     if (!drv)
582         return -ENOMEDIUM;
583     if (bdrv_check_request(bs, sector_num, nb_sectors))
584         return -EIO;
585
586     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
587 }
588
589 /* Return < 0 if error. Important errors are:
590   -EIO         generic I/O error (may happen for all errors)
591   -ENOMEDIUM   No media inserted.
592   -EINVAL      Invalid sector number or nb_sectors
593   -EACCES      Trying to write a read-only device
594 */
595 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
596                const uint8_t *buf, int nb_sectors)
597 {
598     BlockDriver *drv = bs->drv;
599     if (!bs->drv)
600         return -ENOMEDIUM;
601     if (bs->read_only)
602         return -EACCES;
603     if (bdrv_check_request(bs, sector_num, nb_sectors))
604         return -EIO;
605
606     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
607 }
608
609 int bdrv_pread(BlockDriverState *bs, int64_t offset,
610                void *buf, int count1)
611 {
612     uint8_t tmp_buf[SECTOR_SIZE];
613     int len, nb_sectors, count;
614     int64_t sector_num;
615
616     count = count1;
617     /* first read to align to sector start */
618     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
619     if (len > count)
620         len = count;
621     sector_num = offset >> SECTOR_BITS;
622     if (len > 0) {
623         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
624             return -EIO;
625         memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
626         count -= len;
627         if (count == 0)
628             return count1;
629         sector_num++;
630         buf += len;
631     }
632
633     /* read the sectors "in place" */
634     nb_sectors = count >> SECTOR_BITS;
635     if (nb_sectors > 0) {
636         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
637             return -EIO;
638         sector_num += nb_sectors;
639         len = nb_sectors << SECTOR_BITS;
640         buf += len;
641         count -= len;
642     }
643
644     /* add data from the last sector */
645     if (count > 0) {
646         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
647             return -EIO;
648         memcpy(buf, tmp_buf, count);
649     }
650     return count1;
651 }
652
653 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
654                 const void *buf, int count1)
655 {
656     uint8_t tmp_buf[SECTOR_SIZE];
657     int len, nb_sectors, count;
658     int64_t sector_num;
659
660     count = count1;
661     /* first write to align to sector start */
662     len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
663     if (len > count)
664         len = count;
665     sector_num = offset >> SECTOR_BITS;
666     if (len > 0) {
667         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
668             return -EIO;
669         memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
670         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
671             return -EIO;
672         count -= len;
673         if (count == 0)
674             return count1;
675         sector_num++;
676         buf += len;
677     }
678
679     /* write the sectors "in place" */
680     nb_sectors = count >> SECTOR_BITS;
681     if (nb_sectors > 0) {
682         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
683             return -EIO;
684         sector_num += nb_sectors;
685         len = nb_sectors << SECTOR_BITS;
686         buf += len;
687         count -= len;
688     }
689
690     /* add data from the last sector */
691     if (count > 0) {
692         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
693             return -EIO;
694         memcpy(tmp_buf, buf, count);
695         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
696             return -EIO;
697     }
698     return count1;
699 }
700
701 /**
702  * Truncate file to 'offset' bytes (needed only for file protocols)
703  */
704 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
705 {
706     BlockDriver *drv = bs->drv;
707     if (!drv)
708         return -ENOMEDIUM;
709     if (!drv->bdrv_truncate)
710         return -ENOTSUP;
711     return drv->bdrv_truncate(bs, offset);
712 }
713
714 /**
715  * Length of a file in bytes. Return < 0 if error or unknown.
716  */
717 int64_t bdrv_getlength(BlockDriverState *bs)
718 {
719     BlockDriver *drv = bs->drv;
720     if (!drv)
721         return -ENOMEDIUM;
722     if (!drv->bdrv_getlength) {
723         /* legacy mode */
724         return bs->total_sectors * SECTOR_SIZE;
725     }
726     return drv->bdrv_getlength(bs);
727 }
728
729 /* return 0 as number of sectors if no device present or error */
730 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
731 {
732     int64_t length;
733     length = bdrv_getlength(bs);
734     if (length < 0)
735         length = 0;
736     else
737         length = length >> SECTOR_BITS;
738     *nb_sectors_ptr = length;
739 }
740
741 struct partition {
742         uint8_t boot_ind;           /* 0x80 - active */
743         uint8_t head;               /* starting head */
744         uint8_t sector;             /* starting sector */
745         uint8_t cyl;                /* starting cylinder */
746         uint8_t sys_ind;            /* What partition type */
747         uint8_t end_head;           /* end head */
748         uint8_t end_sector;         /* end sector */
749         uint8_t end_cyl;            /* end cylinder */
750         uint32_t start_sect;        /* starting sector counting from 0 */
751         uint32_t nr_sects;          /* nr of sectors in partition */
752 } __attribute__((packed));
753
754 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
755 static int guess_disk_lchs(BlockDriverState *bs,
756                            int *pcylinders, int *pheads, int *psectors)
757 {
758     uint8_t buf[512];
759     int ret, i, heads, sectors, cylinders;
760     struct partition *p;
761     uint32_t nr_sects;
762     uint64_t nb_sectors;
763
764     bdrv_get_geometry(bs, &nb_sectors);
765
766     ret = bdrv_read(bs, 0, buf, 1);
767     if (ret < 0)
768         return -1;
769     /* test msdos magic */
770     if (buf[510] != 0x55 || buf[511] != 0xaa)
771         return -1;
772     for(i = 0; i < 4; i++) {
773         p = ((struct partition *)(buf + 0x1be)) + i;
774         nr_sects = le32_to_cpu(p->nr_sects);
775         if (nr_sects && p->end_head) {
776             /* We make the assumption that the partition terminates on
777                a cylinder boundary */
778             heads = p->end_head + 1;
779             sectors = p->end_sector & 63;
780             if (sectors == 0)
781                 continue;
782             cylinders = nb_sectors / (heads * sectors);
783             if (cylinders < 1 || cylinders > 16383)
784                 continue;
785             *pheads = heads;
786             *psectors = sectors;
787             *pcylinders = cylinders;
788 #if 0
789             printf("guessed geometry: LCHS=%d %d %d\n",
790                    cylinders, heads, sectors);
791 #endif
792             return 0;
793         }
794     }
795     return -1;
796 }
797
798 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
799 {
800     int translation, lba_detected = 0;
801     int cylinders, heads, secs;
802     uint64_t nb_sectors;
803
804     /* if a geometry hint is available, use it */
805     bdrv_get_geometry(bs, &nb_sectors);
806     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
807     translation = bdrv_get_translation_hint(bs);
808     if (cylinders != 0) {
809         *pcyls = cylinders;
810         *pheads = heads;
811         *psecs = secs;
812     } else {
813         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
814             if (heads > 16) {
815                 /* if heads > 16, it means that a BIOS LBA
816                    translation was active, so the default
817                    hardware geometry is OK */
818                 lba_detected = 1;
819                 goto default_geometry;
820             } else {
821                 *pcyls = cylinders;
822                 *pheads = heads;
823                 *psecs = secs;
824                 /* disable any translation to be in sync with
825                    the logical geometry */
826                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
827                     bdrv_set_translation_hint(bs,
828                                               BIOS_ATA_TRANSLATION_NONE);
829                 }
830             }
831         } else {
832         default_geometry:
833             /* if no geometry, use a standard physical disk geometry */
834             cylinders = nb_sectors / (16 * 63);
835
836             if (cylinders > 16383)
837                 cylinders = 16383;
838             else if (cylinders < 2)
839                 cylinders = 2;
840             *pcyls = cylinders;
841             *pheads = 16;
842             *psecs = 63;
843             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
844                 if ((*pcyls * *pheads) <= 131072) {
845                     bdrv_set_translation_hint(bs,
846                                               BIOS_ATA_TRANSLATION_LARGE);
847                 } else {
848                     bdrv_set_translation_hint(bs,
849                                               BIOS_ATA_TRANSLATION_LBA);
850                 }
851             }
852         }
853         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
854     }
855 }
856
857 void bdrv_set_geometry_hint(BlockDriverState *bs,
858                             int cyls, int heads, int secs)
859 {
860     bs->cyls = cyls;
861     bs->heads = heads;
862     bs->secs = secs;
863 }
864
865 void bdrv_set_type_hint(BlockDriverState *bs, int type)
866 {
867     bs->type = type;
868     bs->removable = ((type == BDRV_TYPE_CDROM ||
869                       type == BDRV_TYPE_FLOPPY));
870 }
871
872 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
873 {
874     bs->translation = translation;
875 }
876
877 void bdrv_get_geometry_hint(BlockDriverState *bs,
878                             int *pcyls, int *pheads, int *psecs)
879 {
880     *pcyls = bs->cyls;
881     *pheads = bs->heads;
882     *psecs = bs->secs;
883 }
884
885 int bdrv_get_type_hint(BlockDriverState *bs)
886 {
887     return bs->type;
888 }
889
890 int bdrv_get_translation_hint(BlockDriverState *bs)
891 {
892     return bs->translation;
893 }
894
895 int bdrv_is_removable(BlockDriverState *bs)
896 {
897     return bs->removable;
898 }
899
900 int bdrv_is_read_only(BlockDriverState *bs)
901 {
902     return bs->read_only;
903 }
904
905 int bdrv_is_sg(BlockDriverState *bs)
906 {
907     return bs->sg;
908 }
909
910 /* XXX: no longer used */
911 void bdrv_set_change_cb(BlockDriverState *bs,
912                         void (*change_cb)(void *opaque), void *opaque)
913 {
914     bs->change_cb = change_cb;
915     bs->change_opaque = opaque;
916 }
917
918 int bdrv_is_encrypted(BlockDriverState *bs)
919 {
920     if (bs->backing_hd && bs->backing_hd->encrypted)
921         return 1;
922     return bs->encrypted;
923 }
924
925 int bdrv_key_required(BlockDriverState *bs)
926 {
927     BlockDriverState *backing_hd = bs->backing_hd;
928
929     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
930         return 1;
931     return (bs->encrypted && !bs->valid_key);
932 }
933
934 int bdrv_set_key(BlockDriverState *bs, const char *key)
935 {
936     int ret;
937     if (bs->backing_hd && bs->backing_hd->encrypted) {
938         ret = bdrv_set_key(bs->backing_hd, key);
939         if (ret < 0)
940             return ret;
941         if (!bs->encrypted)
942             return 0;
943     }
944     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
945         return -1;
946     ret = bs->drv->bdrv_set_key(bs, key);
947     if (ret < 0) {
948         bs->valid_key = 0;
949     } else if (!bs->valid_key) {
950         bs->valid_key = 1;
951         /* call the change callback now, we skipped it on open */
952         bs->media_changed = 1;
953         if (bs->change_cb)
954             bs->change_cb(bs->change_opaque);
955     }
956     return ret;
957 }
958
959 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
960 {
961     if (!bs->drv) {
962         buf[0] = '\0';
963     } else {
964         pstrcpy(buf, buf_size, bs->drv->format_name);
965     }
966 }
967
968 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
969                          void *opaque)
970 {
971     BlockDriver *drv;
972
973     for (drv = first_drv; drv != NULL; drv = drv->next) {
974         it(opaque, drv->format_name);
975     }
976 }
977
978 BlockDriverState *bdrv_find(const char *name)
979 {
980     BlockDriverState *bs;
981
982     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
983         if (!strcmp(name, bs->device_name))
984             return bs;
985     }
986     return NULL;
987 }
988
989 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
990 {
991     BlockDriverState *bs;
992
993     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
994         it(opaque, bs);
995     }
996 }
997
998 const char *bdrv_get_device_name(BlockDriverState *bs)
999 {
1000     return bs->device_name;
1001 }
1002
1003 void bdrv_flush(BlockDriverState *bs)
1004 {
1005     if (!bs->drv)
1006         return;
1007     if (bs->drv->bdrv_flush)
1008         bs->drv->bdrv_flush(bs);
1009     if (bs->backing_hd)
1010         bdrv_flush(bs->backing_hd);
1011 }
1012
1013 void bdrv_flush_all(void)
1014 {
1015     BlockDriverState *bs;
1016
1017     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1018         if (bs->drv && !bdrv_is_read_only(bs) && 
1019             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1020             bdrv_flush(bs);
1021 }
1022
1023 /*
1024  * Returns true iff the specified sector is present in the disk image. Drivers
1025  * not implementing the functionality are assumed to not support backing files,
1026  * hence all their sectors are reported as allocated.
1027  *
1028  * 'pnum' is set to the number of sectors (including and immediately following
1029  * the specified sector) that are known to be in the same
1030  * allocated/unallocated state.
1031  *
1032  * 'nb_sectors' is the max value 'pnum' should be set to.
1033  */
1034 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1035         int *pnum)
1036 {
1037     int64_t n;
1038     if (!bs->drv->bdrv_is_allocated) {
1039         if (sector_num >= bs->total_sectors) {
1040             *pnum = 0;
1041             return 0;
1042         }
1043         n = bs->total_sectors - sector_num;
1044         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1045         return 1;
1046     }
1047     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1048 }
1049
1050 void bdrv_info(Monitor *mon)
1051 {
1052     BlockDriverState *bs;
1053
1054     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1055         monitor_printf(mon, "%s:", bs->device_name);
1056         monitor_printf(mon, " type=");
1057         switch(bs->type) {
1058         case BDRV_TYPE_HD:
1059             monitor_printf(mon, "hd");
1060             break;
1061         case BDRV_TYPE_CDROM:
1062             monitor_printf(mon, "cdrom");
1063             break;
1064         case BDRV_TYPE_FLOPPY:
1065             monitor_printf(mon, "floppy");
1066             break;
1067         }
1068         monitor_printf(mon, " removable=%d", bs->removable);
1069         if (bs->removable) {
1070             monitor_printf(mon, " locked=%d", bs->locked);
1071         }
1072         if (bs->drv) {
1073             monitor_printf(mon, " file=");
1074             monitor_print_filename(mon, bs->filename);
1075             if (bs->backing_file[0] != '\0') {
1076                 monitor_printf(mon, " backing_file=");
1077                 monitor_print_filename(mon, bs->backing_file);
1078             }
1079             monitor_printf(mon, " ro=%d", bs->read_only);
1080             monitor_printf(mon, " drv=%s", bs->drv->format_name);
1081             monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1082         } else {
1083             monitor_printf(mon, " [not inserted]");
1084         }
1085         monitor_printf(mon, "\n");
1086     }
1087 }
1088
1089 /* The "info blockstats" command. */
1090 void bdrv_info_stats(Monitor *mon)
1091 {
1092     BlockDriverState *bs;
1093
1094     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1095         monitor_printf(mon, "%s:"
1096                        " rd_bytes=%" PRIu64
1097                        " wr_bytes=%" PRIu64
1098                        " rd_operations=%" PRIu64
1099                        " wr_operations=%" PRIu64
1100                        "\n",
1101                        bs->device_name,
1102                        bs->rd_bytes, bs->wr_bytes,
1103                        bs->rd_ops, bs->wr_ops);
1104     }
1105 }
1106
1107 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1108 {
1109     if (bs->backing_hd && bs->backing_hd->encrypted)
1110         return bs->backing_file;
1111     else if (bs->encrypted)
1112         return bs->filename;
1113     else
1114         return NULL;
1115 }
1116
1117 void bdrv_get_backing_filename(BlockDriverState *bs,
1118                                char *filename, int filename_size)
1119 {
1120     if (!bs->backing_hd) {
1121         pstrcpy(filename, filename_size, "");
1122     } else {
1123         pstrcpy(filename, filename_size, bs->backing_file);
1124     }
1125 }
1126
1127 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1128                           const uint8_t *buf, int nb_sectors)
1129 {
1130     BlockDriver *drv = bs->drv;
1131     if (!drv)
1132         return -ENOMEDIUM;
1133     if (!drv->bdrv_write_compressed)
1134         return -ENOTSUP;
1135     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1136 }
1137
1138 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1139 {
1140     BlockDriver *drv = bs->drv;
1141     if (!drv)
1142         return -ENOMEDIUM;
1143     if (!drv->bdrv_get_info)
1144         return -ENOTSUP;
1145     memset(bdi, 0, sizeof(*bdi));
1146     return drv->bdrv_get_info(bs, bdi);
1147 }
1148
1149 int bdrv_put_buffer(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size)
1150 {
1151     BlockDriver *drv = bs->drv;
1152     if (!drv)
1153         return -ENOMEDIUM;
1154     if (!drv->bdrv_put_buffer)
1155         return -ENOTSUP;
1156     return drv->bdrv_put_buffer(bs, buf, pos, size);
1157 }
1158
1159 int bdrv_get_buffer(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size)
1160 {
1161     BlockDriver *drv = bs->drv;
1162     if (!drv)
1163         return -ENOMEDIUM;
1164     if (!drv->bdrv_get_buffer)
1165         return -ENOTSUP;
1166     return drv->bdrv_get_buffer(bs, buf, pos, size);
1167 }
1168
1169 /**************************************************************/
1170 /* handling of snapshots */
1171
1172 int bdrv_snapshot_create(BlockDriverState *bs,
1173                          QEMUSnapshotInfo *sn_info)
1174 {
1175     BlockDriver *drv = bs->drv;
1176     if (!drv)
1177         return -ENOMEDIUM;
1178     if (!drv->bdrv_snapshot_create)
1179         return -ENOTSUP;
1180     return drv->bdrv_snapshot_create(bs, sn_info);
1181 }
1182
1183 int bdrv_snapshot_goto(BlockDriverState *bs,
1184                        const char *snapshot_id)
1185 {
1186     BlockDriver *drv = bs->drv;
1187     if (!drv)
1188         return -ENOMEDIUM;
1189     if (!drv->bdrv_snapshot_goto)
1190         return -ENOTSUP;
1191     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1192 }
1193
1194 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1195 {
1196     BlockDriver *drv = bs->drv;
1197     if (!drv)
1198         return -ENOMEDIUM;
1199     if (!drv->bdrv_snapshot_delete)
1200         return -ENOTSUP;
1201     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1202 }
1203
1204 int bdrv_snapshot_list(BlockDriverState *bs,
1205                        QEMUSnapshotInfo **psn_info)
1206 {
1207     BlockDriver *drv = bs->drv;
1208     if (!drv)
1209         return -ENOMEDIUM;
1210     if (!drv->bdrv_snapshot_list)
1211         return -ENOTSUP;
1212     return drv->bdrv_snapshot_list(bs, psn_info);
1213 }
1214
1215 #define NB_SUFFIXES 4
1216
1217 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1218 {
1219     static const char suffixes[NB_SUFFIXES] = "KMGT";
1220     int64_t base;
1221     int i;
1222
1223     if (size <= 999) {
1224         snprintf(buf, buf_size, "%" PRId64, size);
1225     } else {
1226         base = 1024;
1227         for(i = 0; i < NB_SUFFIXES; i++) {
1228             if (size < (10 * base)) {
1229                 snprintf(buf, buf_size, "%0.1f%c",
1230                          (double)size / base,
1231                          suffixes[i]);
1232                 break;
1233             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1234                 snprintf(buf, buf_size, "%" PRId64 "%c",
1235                          ((size + (base >> 1)) / base),
1236                          suffixes[i]);
1237                 break;
1238             }
1239             base = base * 1024;
1240         }
1241     }
1242     return buf;
1243 }
1244
1245 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1246 {
1247     char buf1[128], date_buf[128], clock_buf[128];
1248 #ifdef _WIN32
1249     struct tm *ptm;
1250 #else
1251     struct tm tm;
1252 #endif
1253     time_t ti;
1254     int64_t secs;
1255
1256     if (!sn) {
1257         snprintf(buf, buf_size,
1258                  "%-10s%-20s%7s%20s%15s",
1259                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1260     } else {
1261         ti = sn->date_sec;
1262 #ifdef _WIN32
1263         ptm = localtime(&ti);
1264         strftime(date_buf, sizeof(date_buf),
1265                  "%Y-%m-%d %H:%M:%S", ptm);
1266 #else
1267         localtime_r(&ti, &tm);
1268         strftime(date_buf, sizeof(date_buf),
1269                  "%Y-%m-%d %H:%M:%S", &tm);
1270 #endif
1271         secs = sn->vm_clock_nsec / 1000000000;
1272         snprintf(clock_buf, sizeof(clock_buf),
1273                  "%02d:%02d:%02d.%03d",
1274                  (int)(secs / 3600),
1275                  (int)((secs / 60) % 60),
1276                  (int)(secs % 60),
1277                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1278         snprintf(buf, buf_size,
1279                  "%-10s%-20s%7s%20s%15s",
1280                  sn->id_str, sn->name,
1281                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1282                  date_buf,
1283                  clock_buf);
1284     }
1285     return buf;
1286 }
1287
1288
1289 /**************************************************************/
1290 /* async I/Os */
1291
1292 typedef struct VectorTranslationAIOCB {
1293     BlockDriverAIOCB common;
1294     QEMUIOVector *iov;
1295     uint8_t *bounce;
1296     int is_write;
1297     BlockDriverAIOCB *aiocb;
1298 } VectorTranslationAIOCB;
1299
1300 static void bdrv_aio_cancel_vector(BlockDriverAIOCB *_acb)
1301 {
1302     VectorTranslationAIOCB *acb
1303         = container_of(_acb, VectorTranslationAIOCB, common);
1304
1305     bdrv_aio_cancel(acb->aiocb);
1306 }
1307
1308 static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
1309 {
1310     VectorTranslationAIOCB *s = (VectorTranslationAIOCB *)opaque;
1311
1312     if (!s->is_write) {
1313         qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
1314     }
1315     qemu_vfree(s->bounce);
1316     s->common.cb(s->common.opaque, ret);
1317     qemu_aio_release(s);
1318 }
1319
1320 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1321                                             int64_t sector_num,
1322                                             QEMUIOVector *iov,
1323                                             int nb_sectors,
1324                                             BlockDriverCompletionFunc *cb,
1325                                             void *opaque,
1326                                             int is_write)
1327
1328 {
1329     VectorTranslationAIOCB *s = qemu_aio_get_pool(&vectored_aio_pool, bs,
1330                                                   cb, opaque);
1331
1332     s->iov = iov;
1333     s->bounce = qemu_memalign(512, nb_sectors * 512);
1334     s->is_write = is_write;
1335     if (is_write) {
1336         qemu_iovec_to_buffer(s->iov, s->bounce);
1337         s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
1338                                   bdrv_aio_rw_vector_cb, s);
1339     } else {
1340         s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
1341                                  bdrv_aio_rw_vector_cb, s);
1342     }
1343     if (!s->aiocb) {
1344         qemu_vfree(s->bounce);
1345         qemu_aio_release(s);
1346         return NULL;
1347     }
1348     return &s->common;
1349 }
1350
1351 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1352                                  QEMUIOVector *iov, int nb_sectors,
1353                                  BlockDriverCompletionFunc *cb, void *opaque)
1354 {
1355     if (bdrv_check_request(bs, sector_num, nb_sectors))
1356         return NULL;
1357
1358     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1359                               cb, opaque, 0);
1360 }
1361
1362 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1363                                   QEMUIOVector *iov, int nb_sectors,
1364                                   BlockDriverCompletionFunc *cb, void *opaque)
1365 {
1366     if (bdrv_check_request(bs, sector_num, nb_sectors))
1367         return NULL;
1368
1369     return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
1370                               cb, opaque, 1);
1371 }
1372
1373 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1374                                 uint8_t *buf, int nb_sectors,
1375                                 BlockDriverCompletionFunc *cb, void *opaque)
1376 {
1377     BlockDriver *drv = bs->drv;
1378     BlockDriverAIOCB *ret;
1379
1380     if (!drv)
1381         return NULL;
1382     if (bdrv_check_request(bs, sector_num, nb_sectors))
1383         return NULL;
1384
1385     ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1386
1387     if (ret) {
1388         /* Update stats even though technically transfer has not happened. */
1389         bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1390         bs->rd_ops ++;
1391     }
1392
1393     return ret;
1394 }
1395
1396 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1397                                  const uint8_t *buf, int nb_sectors,
1398                                  BlockDriverCompletionFunc *cb, void *opaque)
1399 {
1400     BlockDriver *drv = bs->drv;
1401     BlockDriverAIOCB *ret;
1402
1403     if (!drv)
1404         return NULL;
1405     if (bs->read_only)
1406         return NULL;
1407     if (bdrv_check_request(bs, sector_num, nb_sectors))
1408         return NULL;
1409
1410     ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1411
1412     if (ret) {
1413         /* Update stats even though technically transfer has not happened. */
1414         bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1415         bs->wr_ops ++;
1416     }
1417
1418     return ret;
1419 }
1420
1421 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1422 {
1423     acb->pool->cancel(acb);
1424 }
1425
1426
1427 /**************************************************************/
1428 /* async block device emulation */
1429
1430 static void bdrv_aio_bh_cb(void *opaque)
1431 {
1432     BlockDriverAIOCBSync *acb = opaque;
1433     acb->common.cb(acb->common.opaque, acb->ret);
1434     qemu_aio_release(acb);
1435 }
1436
1437 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1438         int64_t sector_num, uint8_t *buf, int nb_sectors,
1439         BlockDriverCompletionFunc *cb, void *opaque)
1440 {
1441     BlockDriverAIOCBSync *acb;
1442     int ret;
1443
1444     acb = qemu_aio_get(bs, cb, opaque);
1445     if (!acb->bh)
1446         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1447     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1448     acb->ret = ret;
1449     qemu_bh_schedule(acb->bh);
1450     return &acb->common;
1451 }
1452
1453 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1454         int64_t sector_num, const uint8_t *buf, int nb_sectors,
1455         BlockDriverCompletionFunc *cb, void *opaque)
1456 {
1457     BlockDriverAIOCBSync *acb;
1458     int ret;
1459
1460     acb = qemu_aio_get(bs, cb, opaque);
1461     if (!acb->bh)
1462         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1463     ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1464     acb->ret = ret;
1465     qemu_bh_schedule(acb->bh);
1466     return &acb->common;
1467 }
1468
1469 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1470 {
1471     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1472     qemu_bh_cancel(acb->bh);
1473     qemu_aio_release(acb);
1474 }
1475
1476 /**************************************************************/
1477 /* sync block device emulation */
1478
1479 static void bdrv_rw_em_cb(void *opaque, int ret)
1480 {
1481     *(int *)opaque = ret;
1482 }
1483
1484 #define NOT_DONE 0x7fffffff
1485
1486 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1487                         uint8_t *buf, int nb_sectors)
1488 {
1489     int async_ret;
1490     BlockDriverAIOCB *acb;
1491
1492     async_ret = NOT_DONE;
1493     acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1494                         bdrv_rw_em_cb, &async_ret);
1495     if (acb == NULL)
1496         return -1;
1497
1498     while (async_ret == NOT_DONE) {
1499         qemu_aio_wait();
1500     }
1501
1502     return async_ret;
1503 }
1504
1505 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1506                          const uint8_t *buf, int nb_sectors)
1507 {
1508     int async_ret;
1509     BlockDriverAIOCB *acb;
1510
1511     async_ret = NOT_DONE;
1512     acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1513                          bdrv_rw_em_cb, &async_ret);
1514     if (acb == NULL)
1515         return -1;
1516     while (async_ret == NOT_DONE) {
1517         qemu_aio_wait();
1518     }
1519     return async_ret;
1520 }
1521
1522 void bdrv_init(void)
1523 {
1524     aio_pool_init(&vectored_aio_pool, sizeof(VectorTranslationAIOCB),
1525                   bdrv_aio_cancel_vector);
1526
1527     bdrv_register(&bdrv_raw);
1528     bdrv_register(&bdrv_host_device);
1529 #ifndef _WIN32
1530     bdrv_register(&bdrv_cow);
1531 #endif
1532     bdrv_register(&bdrv_qcow);
1533     bdrv_register(&bdrv_vmdk);
1534     bdrv_register(&bdrv_cloop);
1535     bdrv_register(&bdrv_dmg);
1536     bdrv_register(&bdrv_bochs);
1537     bdrv_register(&bdrv_vpc);
1538     bdrv_register(&bdrv_vvfat);
1539     bdrv_register(&bdrv_qcow2);
1540     bdrv_register(&bdrv_parallels);
1541     bdrv_register(&bdrv_nbd);
1542 }
1543
1544 void aio_pool_init(AIOPool *pool, int aiocb_size,
1545                    void (*cancel)(BlockDriverAIOCB *acb))
1546 {
1547     pool->aiocb_size = aiocb_size;
1548     pool->cancel = cancel;
1549     pool->free_aiocb = NULL;
1550 }
1551
1552 void *qemu_aio_get_pool(AIOPool *pool, BlockDriverState *bs,
1553                         BlockDriverCompletionFunc *cb, void *opaque)
1554 {
1555     BlockDriverAIOCB *acb;
1556
1557     if (pool->free_aiocb) {
1558         acb = pool->free_aiocb;
1559         pool->free_aiocb = acb->next;
1560     } else {
1561         acb = qemu_mallocz(pool->aiocb_size);
1562         acb->pool = pool;
1563     }
1564     acb->bs = bs;
1565     acb->cb = cb;
1566     acb->opaque = opaque;
1567     return acb;
1568 }
1569
1570 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1571                    void *opaque)
1572 {
1573     return qemu_aio_get_pool(&bs->drv->aio_pool, bs, cb, opaque);
1574 }
1575
1576 void qemu_aio_release(void *p)
1577 {
1578     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1579     AIOPool *pool = acb->pool;
1580     acb->next = pool->free_aiocb;
1581     pool->free_aiocb = acb;
1582 }
1583
1584 /**************************************************************/
1585 /* removable device support */
1586
1587 /**
1588  * Return TRUE if the media is present
1589  */
1590 int bdrv_is_inserted(BlockDriverState *bs)
1591 {
1592     BlockDriver *drv = bs->drv;
1593     int ret;
1594     if (!drv)
1595         return 0;
1596     if (!drv->bdrv_is_inserted)
1597         return 1;
1598     ret = drv->bdrv_is_inserted(bs);
1599     return ret;
1600 }
1601
1602 /**
1603  * Return TRUE if the media changed since the last call to this
1604  * function. It is currently only used for floppy disks
1605  */
1606 int bdrv_media_changed(BlockDriverState *bs)
1607 {
1608     BlockDriver *drv = bs->drv;
1609     int ret;
1610
1611     if (!drv || !drv->bdrv_media_changed)
1612         ret = -ENOTSUP;
1613     else
1614         ret = drv->bdrv_media_changed(bs);
1615     if (ret == -ENOTSUP)
1616         ret = bs->media_changed;
1617     bs->media_changed = 0;
1618     return ret;
1619 }
1620
1621 /**
1622  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1623  */
1624 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1625 {
1626     BlockDriver *drv = bs->drv;
1627     int ret;
1628
1629     if (!drv || !drv->bdrv_eject) {
1630         ret = -ENOTSUP;
1631     } else {
1632         ret = drv->bdrv_eject(bs, eject_flag);
1633     }
1634     if (ret == -ENOTSUP) {
1635         if (eject_flag)
1636             bdrv_close(bs);
1637     }
1638 }
1639
1640 int bdrv_is_locked(BlockDriverState *bs)
1641 {
1642     return bs->locked;
1643 }
1644
1645 /**
1646  * Lock or unlock the media (if it is locked, the user won't be able
1647  * to eject it manually).
1648  */
1649 void bdrv_set_locked(BlockDriverState *bs, int locked)
1650 {
1651     BlockDriver *drv = bs->drv;
1652
1653     bs->locked = locked;
1654     if (drv && drv->bdrv_set_locked) {
1655         drv->bdrv_set_locked(bs, locked);
1656     }
1657 }
1658
1659 /* needed for generic scsi interface */
1660
1661 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1662 {
1663     BlockDriver *drv = bs->drv;
1664
1665     if (drv && drv->bdrv_ioctl)
1666         return drv->bdrv_ioctl(bs, req, buf);
1667     return -ENOTSUP;
1668 }
1669
1670 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1671         unsigned long int req, void *buf,
1672         BlockDriverCompletionFunc *cb, void *opaque)
1673 {
1674     BlockDriver *drv = bs->drv;
1675
1676     if (drv && drv->bdrv_aio_ioctl)
1677         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1678     return NULL;
1679 }