Add PowerPC power-management state check callback.
[qemu] / hw / cirrus_vga.c
index 6305c94..85bf4a2 100644 (file)
@@ -1,9 +1,9 @@
 /*
  * QEMU Cirrus CLGD 54xx VGA Emulator.
- * 
+ *
  * Copyright (c) 2004 Fabrice Bellard
  * Copyright (c) 2004 Makoto Suzuki (suzu)
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
 #include "vl.h"
 #include "vga_int.h"
 
+/*
+ * TODO:
+ *    - destination write mask support not complete (bits 5..7)
+ *    - optimize linear mappings
+ *    - optimize bitblt functions
+ */
+
 //#define DEBUG_CIRRUS
 //#define DEBUG_BITBLT
 
 #define CIRRUS_ID_CLGD5436  (0x2B<<2)
 #define CIRRUS_ID_CLGD5446  (0x2E<<2)
 
-/* this define is used to select the exact CLGD implementation we
-   emulate. */
-//#define CIRRUS_ID CIRRUS_ID_CLGD5430
-#define CIRRUS_ID CIRRUS_ID_CLGD5446
-
 // sequencer 0x07
 #define CIRRUS_SR7_BPP_VGA            0x00
 #define CIRRUS_SR7_BPP_SVGA           0x01
 #define CIRRUS_BLT_START                0x02
 #define CIRRUS_BLT_RESET                0x04
 #define CIRRUS_BLT_FIFOUSED             0x10
+#define CIRRUS_BLT_AUTOSTART            0x80
 
 // control 0x32
 #define CIRRUS_ROP_0                    0x00
 #define CIRRUS_ROP_NOTSRC_OR_DST        0xd6
 #define CIRRUS_ROP_NOTSRC_AND_NOTDST    0xda
 
+#define CIRRUS_ROP_NOP_INDEX 2
+#define CIRRUS_ROP_SRC_INDEX 5
+
 // control 0x33
-#define CIRRUS_BLTMODEEXT_SOLIDFILL     0x04
+#define CIRRUS_BLTMODEEXT_SOLIDFILL        0x04
+#define CIRRUS_BLTMODEEXT_COLOREXPINV      0x02
+#define CIRRUS_BLTMODEEXT_DWORDGRANULARITY 0x01
 
 // memory-mapped IO
 #define CIRRUS_MMIO_BLTBGCOLOR        0x00     // dword
 
 // PCI 0x00: vendor, 0x02: device
 #define PCI_VENDOR_CIRRUS             0x1013
-#define PCI_DEVICE_ID                 CIRRUS_ID
 #define PCI_DEVICE_CLGD5462           0x00d0
 #define PCI_DEVICE_CLGD5465           0x00d6
 
 #define CIRRUS_HOOK_NOT_HANDLED 0
 #define CIRRUS_HOOK_HANDLED 1
 
-typedef void (*cirrus_bitblt_rop_t) (uint8_t * dst, const uint8_t * src,
+struct CirrusVGAState;
+typedef void (*cirrus_bitblt_rop_t) (struct CirrusVGAState *s,
+                                     uint8_t * dst, const uint8_t * src,
                                     int dstpitch, int srcpitch,
                                     int bltwidth, int bltheight);
-
-typedef void (*cirrus_bitblt_handler_t) (void *opaque);
+typedef void (*cirrus_fill_t)(struct CirrusVGAState *s,
+                              uint8_t *dst, int dst_pitch, int width, int height);
 
 typedef struct CirrusVGAState {
     VGA_STATE_COMMON
 
     int cirrus_linear_io_addr;
+    int cirrus_linear_bitblt_io_addr;
     int cirrus_mmio_io_addr;
     uint32_t cirrus_addr_mask;
+    uint32_t linear_mmio_mask;
     uint8_t cirrus_shadow_gr0;
     uint8_t cirrus_shadow_gr1;
     uint8_t cirrus_hidden_dac_lockindex;
@@ -229,27 +240,33 @@ typedef struct CirrusVGAState {
     uint32_t cirrus_bank_base[2];
     uint32_t cirrus_bank_limit[2];
     uint8_t cirrus_hidden_palette[48];
-    uint32_t cirrus_hw_cursor_x;
-    uint32_t cirrus_hw_cursor_y;
+    uint32_t hw_cursor_x;
+    uint32_t hw_cursor_y;
     int cirrus_blt_pixelwidth;
     int cirrus_blt_width;
     int cirrus_blt_height;
     int cirrus_blt_dstpitch;
     int cirrus_blt_srcpitch;
+    uint32_t cirrus_blt_fgcol;
+    uint32_t cirrus_blt_bgcol;
     uint32_t cirrus_blt_dstaddr;
     uint32_t cirrus_blt_srcaddr;
     uint8_t cirrus_blt_mode;
+    uint8_t cirrus_blt_modeext;
     cirrus_bitblt_rop_t cirrus_rop;
-#define CIRRUS_BLTBUFSIZE 256
+#define CIRRUS_BLTBUFSIZE (2048 * 4) /* one line width */
     uint8_t cirrus_bltbuf[CIRRUS_BLTBUFSIZE];
     uint8_t *cirrus_srcptr;
     uint8_t *cirrus_srcptr_end;
     uint32_t cirrus_srccounter;
-    uint8_t *cirrus_dstptr;
-    uint8_t *cirrus_dstptr_end;
-    uint32_t cirrus_dstcounter;
-    cirrus_bitblt_handler_t cirrus_blt_handler;
-    int cirrus_blt_horz_counter;
+    /* hwcursor display state */
+    int last_hw_cursor_size;
+    int last_hw_cursor_x;
+    int last_hw_cursor_y;
+    int last_hw_cursor_y_start;
+    int last_hw_cursor_y_end;
+    int real_vram_size; /* XXX: suppress that */
+    CPUWriteMemoryFunc **cirrus_linear_write;
 } CirrusVGAState;
 
 typedef struct PCICirrusVGAState {
@@ -257,6 +274,8 @@ typedef struct PCICirrusVGAState {
     CirrusVGAState cirrus_vga;
 } PCICirrusVGAState;
 
+static uint8_t rop_to_index[256];
+
 /***************************************
  *
  *  prototypes.
@@ -264,7 +283,8 @@ typedef struct PCICirrusVGAState {
  ***************************************/
 
 
-static void cirrus_bitblt_reset(CirrusVGAState * s);
+static void cirrus_bitblt_reset(CirrusVGAState *s);
+static void cirrus_update_memory_access(CirrusVGAState *s);
 
 /***************************************
  *
@@ -272,343 +292,337 @@ static void cirrus_bitblt_reset(CirrusVGAState * s);
  *
  ***************************************/
 
-#define IMPLEMENT_BITBLT(name,opline) \
-  static void \
-  cirrus_bitblt_rop_fwd_##name( \
-    uint8_t *dst,const uint8_t *src, \
-    int dstpitch,int srcpitch, \
-    int bltwidth,int bltheight) \
-  { \
-    int x,y; \
-    dstpitch -= bltwidth; \
-    srcpitch -= bltwidth; \
-    for (y = 0; y < bltheight; y++) { \
-      for (x = 0; x < bltwidth; x++) { \
-        opline; \
-        dst++; \
-        src++; \
-        } \
-      dst += dstpitch; \
-      src += srcpitch; \
-      } \
-    } \
- \
-  static void \
-  cirrus_bitblt_rop_bkwd_##name( \
-    uint8_t *dst,const uint8_t *src, \
-    int dstpitch,int srcpitch, \
-    int bltwidth,int bltheight) \
-  { \
-    int x,y; \
-    dstpitch += bltwidth; \
-    srcpitch += bltwidth; \
-    for (y = 0; y < bltheight; y++) { \
-      for (x = 0; x < bltwidth; x++) { \
-        opline; \
-        dst--; \
-        src--; \
-      } \
-      dst += dstpitch; \
-      src += srcpitch; \
-    } \
-  }
-
-IMPLEMENT_BITBLT(0, *dst = 0)
-IMPLEMENT_BITBLT(src_and_dst, *dst = (*src) & (*dst))
-IMPLEMENT_BITBLT(nop, (void) 0)
-IMPLEMENT_BITBLT(src_and_notdst, *dst = (*src) & (~(*dst)))
-IMPLEMENT_BITBLT(notdst, *dst = ~(*dst))
-IMPLEMENT_BITBLT(src, *dst = *src)
-IMPLEMENT_BITBLT(1, *dst = 0xff)
-IMPLEMENT_BITBLT(notsrc_and_dst, *dst = (~(*src)) & (*dst))
-IMPLEMENT_BITBLT(src_xor_dst, *dst = (*src) ^ (*dst))
-IMPLEMENT_BITBLT(src_or_dst, *dst = (*src) | (*dst))
-IMPLEMENT_BITBLT(notsrc_or_notdst, *dst = (~(*src)) | (~(*dst)))
-IMPLEMENT_BITBLT(src_notxor_dst, *dst = ~((*src) ^ (*dst)))
-IMPLEMENT_BITBLT(src_or_notdst, *dst = (*src) | (~(*dst)))
-IMPLEMENT_BITBLT(notsrc, *dst = (~(*src)))
-IMPLEMENT_BITBLT(notsrc_or_dst, *dst = (~(*src)) | (*dst))
-IMPLEMENT_BITBLT(notsrc_and_notdst, *dst = (~(*src)) & (~(*dst)))
-
-static cirrus_bitblt_rop_t cirrus_get_fwd_rop_handler(uint8_t rop)
-{
-    cirrus_bitblt_rop_t rop_handler = cirrus_bitblt_rop_fwd_nop;
-
-    switch (rop) {
-    case CIRRUS_ROP_0:
-       rop_handler = cirrus_bitblt_rop_fwd_0;
-       break;
-    case CIRRUS_ROP_SRC_AND_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_and_dst;
-       break;
-    case CIRRUS_ROP_NOP:
-       rop_handler = cirrus_bitblt_rop_fwd_nop;
-       break;
-    case CIRRUS_ROP_SRC_AND_NOTDST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_and_notdst;
-       break;
-    case CIRRUS_ROP_NOTDST:
-       rop_handler = cirrus_bitblt_rop_fwd_notdst;
-       break;
-    case CIRRUS_ROP_SRC:
-       rop_handler = cirrus_bitblt_rop_fwd_src;
-       break;
-    case CIRRUS_ROP_1:
-       rop_handler = cirrus_bitblt_rop_fwd_1;
-       break;
-    case CIRRUS_ROP_NOTSRC_AND_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_notsrc_and_dst;
-       break;
-    case CIRRUS_ROP_SRC_XOR_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_xor_dst;
-       break;
-    case CIRRUS_ROP_SRC_OR_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_or_dst;
-       break;
-    case CIRRUS_ROP_NOTSRC_OR_NOTDST:
-       rop_handler = cirrus_bitblt_rop_fwd_notsrc_or_notdst;
-       break;
-    case CIRRUS_ROP_SRC_NOTXOR_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_notxor_dst;
-       break;
-    case CIRRUS_ROP_SRC_OR_NOTDST:
-       rop_handler = cirrus_bitblt_rop_fwd_src_or_notdst;
-       break;
-    case CIRRUS_ROP_NOTSRC:
-       rop_handler = cirrus_bitblt_rop_fwd_notsrc;
-       break;
-    case CIRRUS_ROP_NOTSRC_OR_DST:
-       rop_handler = cirrus_bitblt_rop_fwd_notsrc_or_dst;
-       break;
-    case CIRRUS_ROP_NOTSRC_AND_NOTDST:
-       rop_handler = cirrus_bitblt_rop_fwd_notsrc_and_notdst;
-       break;
-    default:
-#ifdef DEBUG_CIRRUS
-       printf("unknown ROP %02x\n", rop);
-#endif
-       break;
-    }
-
-    return rop_handler;
+static void cirrus_bitblt_rop_nop(CirrusVGAState *s,
+                                  uint8_t *dst,const uint8_t *src,
+                                  int dstpitch,int srcpitch,
+                                  int bltwidth,int bltheight)
+{
 }
 
-static cirrus_bitblt_rop_t cirrus_get_bkwd_rop_handler(uint8_t rop)
+static void cirrus_bitblt_fill_nop(CirrusVGAState *s,
+                                   uint8_t *dst,
+                                   int dstpitch, int bltwidth,int bltheight)
 {
-    cirrus_bitblt_rop_t rop_handler = cirrus_bitblt_rop_bkwd_nop;
+}
 
-    switch (rop) {
-    case CIRRUS_ROP_0:
-       rop_handler = cirrus_bitblt_rop_bkwd_0;
-       break;
-    case CIRRUS_ROP_SRC_AND_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_and_dst;
-       break;
-    case CIRRUS_ROP_NOP:
-       rop_handler = cirrus_bitblt_rop_bkwd_nop;
-       break;
-    case CIRRUS_ROP_SRC_AND_NOTDST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_and_notdst;
-       break;
-    case CIRRUS_ROP_NOTDST:
-       rop_handler = cirrus_bitblt_rop_bkwd_notdst;
-       break;
-    case CIRRUS_ROP_SRC:
-       rop_handler = cirrus_bitblt_rop_bkwd_src;
-       break;
-    case CIRRUS_ROP_1:
-       rop_handler = cirrus_bitblt_rop_bkwd_1;
-       break;
-    case CIRRUS_ROP_NOTSRC_AND_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_notsrc_and_dst;
-       break;
-    case CIRRUS_ROP_SRC_XOR_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_xor_dst;
-       break;
-    case CIRRUS_ROP_SRC_OR_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_or_dst;
-       break;
-    case CIRRUS_ROP_NOTSRC_OR_NOTDST:
-       rop_handler = cirrus_bitblt_rop_bkwd_notsrc_or_notdst;
-       break;
-    case CIRRUS_ROP_SRC_NOTXOR_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_notxor_dst;
-       break;
-    case CIRRUS_ROP_SRC_OR_NOTDST:
-       rop_handler = cirrus_bitblt_rop_bkwd_src_or_notdst;
-       break;
-    case CIRRUS_ROP_NOTSRC:
-       rop_handler = cirrus_bitblt_rop_bkwd_notsrc;
-       break;
-    case CIRRUS_ROP_NOTSRC_OR_DST:
-       rop_handler = cirrus_bitblt_rop_bkwd_notsrc_or_dst;
-       break;
-    case CIRRUS_ROP_NOTSRC_AND_NOTDST:
-       rop_handler = cirrus_bitblt_rop_bkwd_notsrc_and_notdst;
-       break;
-    default:
-#ifdef DEBUG_CIRRUS
-       printf("unknown ROP %02x\n", rop);
-#endif
-       break;
-    }
+#define ROP_NAME 0
+#define ROP_OP(d, s) d = 0
+#include "cirrus_vga_rop.h"
 
-    return rop_handler;
-}
+#define ROP_NAME src_and_dst
+#define ROP_OP(d, s) d = (s) & (d)
+#include "cirrus_vga_rop.h"
 
-/***************************************
- *
- *  color expansion
- *
- ***************************************/
+#define ROP_NAME src_and_notdst
+#define ROP_OP(d, s) d = (s) & (~(d))
+#include "cirrus_vga_rop.h"
 
-static void
-cirrus_colorexpand_8(CirrusVGAState * s, uint8_t * dst,
-                    const uint8_t * src, int count)
-{
-    int x;
-    uint8_t colors[2];
-    unsigned bits;
-    unsigned bitmask;
-    int srcskipleft = 0;
-
-    colors[0] = s->cirrus_shadow_gr0;
-    colors[1] = s->cirrus_shadow_gr1;
-
-    bitmask = 0x80 >> srcskipleft;
-    bits = *src++;
-    for (x = 0; x < count; x++) {
-       if ((bitmask & 0xff) == 0) {
-           bitmask = 0x80;
-           bits = *src++;
-       }
-       *dst++ = colors[!!(bits & bitmask)];
-       bitmask >>= 1;
-    }
-}
+#define ROP_NAME notdst
+#define ROP_OP(d, s) d = ~(d)
+#include "cirrus_vga_rop.h"
 
-static void
-cirrus_colorexpand_16(CirrusVGAState * s, uint8_t * dst,
-                     const uint8_t * src, int count)
-{
-    int x;
-    uint8_t colors[2][2];
-    unsigned bits;
-    unsigned bitmask;
-    unsigned index;
-    int srcskipleft = 0;
-
-    colors[0][0] = s->cirrus_shadow_gr0;
-    colors[0][1] = s->gr[0x10];
-    colors[1][0] = s->cirrus_shadow_gr1;
-    colors[1][1] = s->gr[0x11];
-
-    bitmask = 0x80 >> srcskipleft;
-    bits = *src++;
-    for (x = 0; x < count; x++) {
-       if ((bitmask & 0xff) == 0) {
-           bitmask = 0x80;
-           bits = *src++;
-       }
-       index = !!(bits & bitmask);
-       *dst++ = colors[index][0];
-       *dst++ = colors[index][1];
-       bitmask >>= 1;
-    }
-}
+#define ROP_NAME src
+#define ROP_OP(d, s) d = s
+#include "cirrus_vga_rop.h"
 
-static void
-cirrus_colorexpand_24(CirrusVGAState * s, uint8_t * dst,
-                     const uint8_t * src, int count)
-{
-    int x;
-    uint8_t colors[2][3];
-    unsigned bits;
-    unsigned bitmask;
-    unsigned index;
-    int srcskipleft = 0;
-
-    colors[0][0] = s->cirrus_shadow_gr0;
-    colors[0][1] = s->gr[0x10];
-    colors[0][2] = s->gr[0x12];
-    colors[1][0] = s->cirrus_shadow_gr1;
-    colors[1][1] = s->gr[0x11];
-    colors[1][2] = s->gr[0x13];
-
-    bitmask = 0x80 << srcskipleft;
-    bits = *src++;
-    for (x = 0; x < count; x++) {
-       if ((bitmask & 0xff) == 0) {
-           bitmask = 0x80;
-           bits = *src++;
-       }
-       index = !!(bits & bitmask);
-       *dst++ = colors[index][0];
-       *dst++ = colors[index][1];
-       *dst++ = colors[index][2];
-       bitmask >>= 1;
-    }
-}
+#define ROP_NAME 1
+#define ROP_OP(d, s) d = ~0
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_and_dst
+#define ROP_OP(d, s) d = (~(s)) & (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_xor_dst
+#define ROP_OP(d, s) d = (s) ^ (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_or_dst
+#define ROP_OP(d, s) d = (s) | (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_or_notdst
+#define ROP_OP(d, s) d = (~(s)) | (~(d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_notxor_dst
+#define ROP_OP(d, s) d = ~((s) ^ (d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_or_notdst
+#define ROP_OP(d, s) d = (s) | (~(d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc
+#define ROP_OP(d, s) d = (~(s))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_or_dst
+#define ROP_OP(d, s) d = (~(s)) | (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_and_notdst
+#define ROP_OP(d, s) d = (~(s)) & (~(d))
+#include "cirrus_vga_rop.h"
+
+static const cirrus_bitblt_rop_t cirrus_fwd_rop[16] = {
+    cirrus_bitblt_rop_fwd_0,
+    cirrus_bitblt_rop_fwd_src_and_dst,
+    cirrus_bitblt_rop_nop,
+    cirrus_bitblt_rop_fwd_src_and_notdst,
+    cirrus_bitblt_rop_fwd_notdst,
+    cirrus_bitblt_rop_fwd_src,
+    cirrus_bitblt_rop_fwd_1,
+    cirrus_bitblt_rop_fwd_notsrc_and_dst,
+    cirrus_bitblt_rop_fwd_src_xor_dst,
+    cirrus_bitblt_rop_fwd_src_or_dst,
+    cirrus_bitblt_rop_fwd_notsrc_or_notdst,
+    cirrus_bitblt_rop_fwd_src_notxor_dst,
+    cirrus_bitblt_rop_fwd_src_or_notdst,
+    cirrus_bitblt_rop_fwd_notsrc,
+    cirrus_bitblt_rop_fwd_notsrc_or_dst,
+    cirrus_bitblt_rop_fwd_notsrc_and_notdst,
+};
+
+static const cirrus_bitblt_rop_t cirrus_bkwd_rop[16] = {
+    cirrus_bitblt_rop_bkwd_0,
+    cirrus_bitblt_rop_bkwd_src_and_dst,
+    cirrus_bitblt_rop_nop,
+    cirrus_bitblt_rop_bkwd_src_and_notdst,
+    cirrus_bitblt_rop_bkwd_notdst,
+    cirrus_bitblt_rop_bkwd_src,
+    cirrus_bitblt_rop_bkwd_1,
+    cirrus_bitblt_rop_bkwd_notsrc_and_dst,
+    cirrus_bitblt_rop_bkwd_src_xor_dst,
+    cirrus_bitblt_rop_bkwd_src_or_dst,
+    cirrus_bitblt_rop_bkwd_notsrc_or_notdst,
+    cirrus_bitblt_rop_bkwd_src_notxor_dst,
+    cirrus_bitblt_rop_bkwd_src_or_notdst,
+    cirrus_bitblt_rop_bkwd_notsrc,
+    cirrus_bitblt_rop_bkwd_notsrc_or_dst,
+    cirrus_bitblt_rop_bkwd_notsrc_and_notdst,
+};
+
+#define TRANSP_ROP(name) {\
+    name ## _8,\
+    name ## _16,\
+        }
+#define TRANSP_NOP(func) {\
+    func,\
+    func,\
+        }
+
+static const cirrus_bitblt_rop_t cirrus_fwd_transp_rop[16][2] = {
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_0),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_and_dst),
+    TRANSP_NOP(cirrus_bitblt_rop_nop),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_and_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_1),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notsrc_and_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_xor_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_or_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notsrc_or_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_notxor_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_src_or_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notsrc),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notsrc_or_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_fwd_transp_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_bkwd_transp_rop[16][2] = {
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_0),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_and_dst),
+    TRANSP_NOP(cirrus_bitblt_rop_nop),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_and_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_1),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notsrc_and_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_xor_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_or_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notsrc_or_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_notxor_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_src_or_notdst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notsrc),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notsrc_or_dst),
+    TRANSP_ROP(cirrus_bitblt_rop_bkwd_transp_notsrc_and_notdst),
+};
+
+#define ROP2(name) {\
+    name ## _8,\
+    name ## _16,\
+    name ## _24,\
+    name ## _32,\
+        }
 
-static void
-cirrus_colorexpand_32(CirrusVGAState * s, uint8_t * dst,
-                     const uint8_t * src, int count)
+#define ROP_NOP2(func) {\
+    func,\
+    func,\
+    func,\
+    func,\
+        }
+
+static const cirrus_bitblt_rop_t cirrus_patternfill[16][4] = {
+    ROP2(cirrus_patternfill_0),
+    ROP2(cirrus_patternfill_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_rop_nop),
+    ROP2(cirrus_patternfill_src_and_notdst),
+    ROP2(cirrus_patternfill_notdst),
+    ROP2(cirrus_patternfill_src),
+    ROP2(cirrus_patternfill_1),
+    ROP2(cirrus_patternfill_notsrc_and_dst),
+    ROP2(cirrus_patternfill_src_xor_dst),
+    ROP2(cirrus_patternfill_src_or_dst),
+    ROP2(cirrus_patternfill_notsrc_or_notdst),
+    ROP2(cirrus_patternfill_src_notxor_dst),
+    ROP2(cirrus_patternfill_src_or_notdst),
+    ROP2(cirrus_patternfill_notsrc),
+    ROP2(cirrus_patternfill_notsrc_or_dst),
+    ROP2(cirrus_patternfill_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_transp[16][4] = {
+    ROP2(cirrus_colorexpand_transp_0),
+    ROP2(cirrus_colorexpand_transp_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_rop_nop),
+    ROP2(cirrus_colorexpand_transp_src_and_notdst),
+    ROP2(cirrus_colorexpand_transp_notdst),
+    ROP2(cirrus_colorexpand_transp_src),
+    ROP2(cirrus_colorexpand_transp_1),
+    ROP2(cirrus_colorexpand_transp_notsrc_and_dst),
+    ROP2(cirrus_colorexpand_transp_src_xor_dst),
+    ROP2(cirrus_colorexpand_transp_src_or_dst),
+    ROP2(cirrus_colorexpand_transp_notsrc_or_notdst),
+    ROP2(cirrus_colorexpand_transp_src_notxor_dst),
+    ROP2(cirrus_colorexpand_transp_src_or_notdst),
+    ROP2(cirrus_colorexpand_transp_notsrc),
+    ROP2(cirrus_colorexpand_transp_notsrc_or_dst),
+    ROP2(cirrus_colorexpand_transp_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand[16][4] = {
+    ROP2(cirrus_colorexpand_0),
+    ROP2(cirrus_colorexpand_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_rop_nop),
+    ROP2(cirrus_colorexpand_src_and_notdst),
+    ROP2(cirrus_colorexpand_notdst),
+    ROP2(cirrus_colorexpand_src),
+    ROP2(cirrus_colorexpand_1),
+    ROP2(cirrus_colorexpand_notsrc_and_dst),
+    ROP2(cirrus_colorexpand_src_xor_dst),
+    ROP2(cirrus_colorexpand_src_or_dst),
+    ROP2(cirrus_colorexpand_notsrc_or_notdst),
+    ROP2(cirrus_colorexpand_src_notxor_dst),
+    ROP2(cirrus_colorexpand_src_or_notdst),
+    ROP2(cirrus_colorexpand_notsrc),
+    ROP2(cirrus_colorexpand_notsrc_or_dst),
+    ROP2(cirrus_colorexpand_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_pattern_transp[16][4] = {
+    ROP2(cirrus_colorexpand_pattern_transp_0),
+    ROP2(cirrus_colorexpand_pattern_transp_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_rop_nop),
+    ROP2(cirrus_colorexpand_pattern_transp_src_and_notdst),
+    ROP2(cirrus_colorexpand_pattern_transp_notdst),
+    ROP2(cirrus_colorexpand_pattern_transp_src),
+    ROP2(cirrus_colorexpand_pattern_transp_1),
+    ROP2(cirrus_colorexpand_pattern_transp_notsrc_and_dst),
+    ROP2(cirrus_colorexpand_pattern_transp_src_xor_dst),
+    ROP2(cirrus_colorexpand_pattern_transp_src_or_dst),
+    ROP2(cirrus_colorexpand_pattern_transp_notsrc_or_notdst),
+    ROP2(cirrus_colorexpand_pattern_transp_src_notxor_dst),
+    ROP2(cirrus_colorexpand_pattern_transp_src_or_notdst),
+    ROP2(cirrus_colorexpand_pattern_transp_notsrc),
+    ROP2(cirrus_colorexpand_pattern_transp_notsrc_or_dst),
+    ROP2(cirrus_colorexpand_pattern_transp_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_pattern[16][4] = {
+    ROP2(cirrus_colorexpand_pattern_0),
+    ROP2(cirrus_colorexpand_pattern_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_rop_nop),
+    ROP2(cirrus_colorexpand_pattern_src_and_notdst),
+    ROP2(cirrus_colorexpand_pattern_notdst),
+    ROP2(cirrus_colorexpand_pattern_src),
+    ROP2(cirrus_colorexpand_pattern_1),
+    ROP2(cirrus_colorexpand_pattern_notsrc_and_dst),
+    ROP2(cirrus_colorexpand_pattern_src_xor_dst),
+    ROP2(cirrus_colorexpand_pattern_src_or_dst),
+    ROP2(cirrus_colorexpand_pattern_notsrc_or_notdst),
+    ROP2(cirrus_colorexpand_pattern_src_notxor_dst),
+    ROP2(cirrus_colorexpand_pattern_src_or_notdst),
+    ROP2(cirrus_colorexpand_pattern_notsrc),
+    ROP2(cirrus_colorexpand_pattern_notsrc_or_dst),
+    ROP2(cirrus_colorexpand_pattern_notsrc_and_notdst),
+};
+
+static const cirrus_fill_t cirrus_fill[16][4] = {
+    ROP2(cirrus_fill_0),
+    ROP2(cirrus_fill_src_and_dst),
+    ROP_NOP2(cirrus_bitblt_fill_nop),
+    ROP2(cirrus_fill_src_and_notdst),
+    ROP2(cirrus_fill_notdst),
+    ROP2(cirrus_fill_src),
+    ROP2(cirrus_fill_1),
+    ROP2(cirrus_fill_notsrc_and_dst),
+    ROP2(cirrus_fill_src_xor_dst),
+    ROP2(cirrus_fill_src_or_dst),
+    ROP2(cirrus_fill_notsrc_or_notdst),
+    ROP2(cirrus_fill_src_notxor_dst),
+    ROP2(cirrus_fill_src_or_notdst),
+    ROP2(cirrus_fill_notsrc),
+    ROP2(cirrus_fill_notsrc_or_dst),
+    ROP2(cirrus_fill_notsrc_and_notdst),
+};
+
+static inline void cirrus_bitblt_fgcol(CirrusVGAState *s)
 {
-    int x;
-    uint8_t colors[2][4];
-    unsigned bits;
-    unsigned bitmask;
-    unsigned index;
-    int srcskipleft = 0;
-
-    colors[0][0] = s->cirrus_shadow_gr0;
-    colors[0][1] = s->gr[0x10];
-    colors[0][2] = s->gr[0x12];
-    colors[0][3] = s->gr[0x14];
-    colors[1][0] = s->cirrus_shadow_gr1;
-    colors[1][1] = s->gr[0x11];
-    colors[1][2] = s->gr[0x13];
-    colors[1][3] = s->gr[0x15];
-
-    bitmask = 0x80 << srcskipleft;
-    bits = *src++;
-    for (x = 0; x < count; x++) {
-       if ((bitmask & 0xff) == 0) {
-           bitmask = 0x80;
-           bits = *src++;
-       }
-       index = !!(bits & bitmask);
-       *dst++ = colors[index][0];
-       *dst++ = colors[index][1];
-       *dst++ = colors[index][2];
-       *dst++ = colors[index][3];
-       bitmask >>= 1;
+    unsigned int color;
+    switch (s->cirrus_blt_pixelwidth) {
+    case 1:
+        s->cirrus_blt_fgcol = s->cirrus_shadow_gr1;
+        break;
+    case 2:
+        color = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8);
+        s->cirrus_blt_fgcol = le16_to_cpu(color);
+        break;
+    case 3:
+        s->cirrus_blt_fgcol = s->cirrus_shadow_gr1 |
+            (s->gr[0x11] << 8) | (s->gr[0x13] << 16);
+        break;
+    default:
+    case 4:
+        color = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8) |
+            (s->gr[0x13] << 16) | (s->gr[0x15] << 24);
+        s->cirrus_blt_fgcol = le32_to_cpu(color);
+        break;
     }
 }
 
-static void
-cirrus_colorexpand(CirrusVGAState * s, uint8_t * dst, const uint8_t * src,
-                  int count)
+static inline void cirrus_bitblt_bgcol(CirrusVGAState *s)
 {
+    unsigned int color;
     switch (s->cirrus_blt_pixelwidth) {
     case 1:
-       cirrus_colorexpand_8(s, dst, src, count);
-       break;
+        s->cirrus_blt_bgcol = s->cirrus_shadow_gr0;
+        break;
     case 2:
-       cirrus_colorexpand_16(s, dst, src, count);
-       break;
+        color = s->cirrus_shadow_gr0 | (s->gr[0x10] << 8);
+        s->cirrus_blt_bgcol = le16_to_cpu(color);
+        break;
     case 3:
-       cirrus_colorexpand_24(s, dst, src, count);
-       break;
-    case 4:
-       cirrus_colorexpand_32(s, dst, src, count);
-       break;
+        s->cirrus_blt_bgcol = s->cirrus_shadow_gr0 |
+            (s->gr[0x10] << 8) | (s->gr[0x12] << 16);
+        break;
     default:
-#ifdef DEBUG_CIRRUS
-       printf("cirrus: COLOREXPAND pixelwidth %d - unimplemented\n",
-              s->cirrus_blt_pixelwidth);
-#endif
-       break;
+    case 4:
+        color = s->cirrus_shadow_gr0 | (s->gr[0x10] << 8) |
+            (s->gr[0x12] << 16) | (s->gr[0x14] << 24);
+        s->cirrus_blt_bgcol = le32_to_cpu(color);
+        break;
     }
 }
 
@@ -632,157 +646,31 @@ static void cirrus_invalidate_region(CirrusVGAState * s, int off_begin,
     }
 }
 
-
-
 static int cirrus_bitblt_common_patterncopy(CirrusVGAState * s,
                                            const uint8_t * src)
 {
-    uint8_t work_colorexp[256];
     uint8_t *dst;
-    uint8_t *dstc;
-    int x, y;
-    int tilewidth, tileheight;
-    int patternbytes = s->cirrus_blt_pixelwidth * 8;
-
-    if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
-       cirrus_colorexpand(s, work_colorexp, src, 8 * 8);
-       src = work_colorexp;
-       s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_COLOREXPAND;
-    }
-    if (s->cirrus_blt_mode & ~CIRRUS_BLTMODE_PATTERNCOPY) {
-#ifdef DEBUG_CIRRUS
-       printf("cirrus: blt mode %02x (pattercopy) - unimplemented\n",
-              s->cirrus_blt_mode);
-#endif
-       return 0;
-    }
-    
+
     dst = s->vram_ptr + s->cirrus_blt_dstaddr;
-    for (y = 0; y < s->cirrus_blt_height; y += 8) {
-       dstc = dst;
-       tileheight = qemu_MIN(8, s->cirrus_blt_height - y);
-       for (x = 0; x < s->cirrus_blt_width; x += patternbytes) {
-           tilewidth = qemu_MIN(patternbytes, s->cirrus_blt_width - x);
-           (*s->cirrus_rop) (dstc, src,
-                             s->cirrus_blt_dstpitch, patternbytes,
-                             tilewidth, tileheight);
-           dstc += patternbytes;
-       }
-       dst += s->cirrus_blt_dstpitch * 8;
-    }
+    (*s->cirrus_rop) (s, dst, src,
+                      s->cirrus_blt_dstpitch, 0,
+                      s->cirrus_blt_width, s->cirrus_blt_height);
     cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
-                            s->cirrus_blt_dstpitch, s->cirrus_blt_width,
-                            s->cirrus_blt_height);
+                             s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+                             s->cirrus_blt_height);
     return 1;
 }
 
 /* fill */
 
-static void cirrus_fill_8(CirrusVGAState *s,
-                          uint8_t *dst, int dst_pitch, int width, int height)
-{
-    uint8_t *d, *d1;
-    uint32_t val;
-    int x, y;
-
-    val = s->cirrus_shadow_gr1;
-
-    d1 = dst;
-    for(y = 0; y < height; y++) {
-        d = d1;
-        for(x = 0; x < width; x++) {
-            *d++ = val;
-        }
-        d1 += dst_pitch;
-    }
-}
-
-static void cirrus_fill_16(CirrusVGAState *s,
-                           uint8_t *dst, int dst_pitch, int width, int height)
-{
-    uint8_t *d, *d1;
-    uint32_t val;
-    int x, y;
-
-    val = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8);
-    val = le16_to_cpu(val);
-    width >>= 1;
-
-    d1 = dst;
-    for(y = 0; y < height; y++) {
-        d = d1;
-        for(x = 0; x < width; x++) {
-            ((uint16_t *)d)[0] = val;
-            d += 2;
-        }
-        d1 += dst_pitch;
-    }
-}
-
-static void cirrus_fill_24(CirrusVGAState *s,
-                           uint8_t *dst, int dst_pitch, int width, int height)
-{
-    uint8_t *d, *d1;
-    int x, y;
-
-    d1 = dst;
-    for(y = 0; y < height; y++) {
-        d = d1;
-        for(x = 0; x < width; x += 3) {
-            *d++ = s->cirrus_shadow_gr1;
-            *d++ = s->gr[0x11];
-            *d++ = s->gr[0x13];
-        }
-        d1 += dst_pitch;
-    }
-}
-
-static void cirrus_fill_32(CirrusVGAState *s,
-                           uint8_t *dst, int dst_pitch, int width, int height)
+static int cirrus_bitblt_solidfill(CirrusVGAState *s, int blt_rop)
 {
-    uint8_t *d, *d1;
-    uint32_t val;
-    int x, y;
-
-    val = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8) | 
-        (s->gr[0x13] << 8) | (s->gr[0x15] << 8);
-    val = le32_to_cpu(val);
-    width >>= 2;
-
-    d1 = dst;
-    for(y = 0; y < height; y++) {
-        d = d1;
-        for(x = 0; x < width; x++) {
-            ((uint32_t *)d)[0] = val;
-            d += 4;
-        }
-        d1 += dst_pitch;
-    }
-}
+    cirrus_fill_t rop_func;
 
-static int cirrus_bitblt_solidfill(CirrusVGAState *s)
-{
-    uint8_t *dst;
-    dst = s->vram_ptr + s->cirrus_blt_dstaddr;
-    switch (s->cirrus_blt_pixelwidth) {
-    case 1:
-       cirrus_fill_8(s, dst, s->cirrus_blt_dstpitch,
-                      s->cirrus_blt_width, s->cirrus_blt_height);
-       break;
-    case 2:
-       cirrus_fill_16(s, dst, s->cirrus_blt_dstpitch,
-                       s->cirrus_blt_width, s->cirrus_blt_height);
-       break;
-    case 3:
-       cirrus_fill_24(s, dst, s->cirrus_blt_dstpitch,
-                       s->cirrus_blt_width, s->cirrus_blt_height);
-       break;
-    default:
-    case 4:
-       cirrus_fill_32(s, dst, s->cirrus_blt_dstpitch,
-                       s->cirrus_blt_width, s->cirrus_blt_height);
-       break;
-    }
+    rop_func = cirrus_fill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+    rop_func(s, s->vram_ptr + s->cirrus_blt_dstaddr,
+             s->cirrus_blt_dstpitch,
+             s->cirrus_blt_width, s->cirrus_blt_height);
     cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
                             s->cirrus_blt_dstpitch, s->cirrus_blt_width,
                             s->cirrus_blt_height);
@@ -800,165 +688,134 @@ static int cirrus_bitblt_videotovideo_patterncopy(CirrusVGAState * s)
 {
     return cirrus_bitblt_common_patterncopy(s,
                                            s->vram_ptr +
-                                           s->cirrus_blt_srcaddr);
+                                            (s->cirrus_blt_srcaddr & ~7));
 }
 
-static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
+static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
 {
-    if ((s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) != 0) {
-#ifdef DEBUG_CIRRUS
-       printf("cirrus: CIRRUS_BLTMODE_COLOREXPAND - unimplemented\n");
-#endif
-       return 0;
+    int sx, sy;
+    int dx, dy;
+    int width, height;
+    int depth;
+    int notify = 0;
+
+    depth = s->get_bpp((VGAState *)s) / 8;
+    s->get_resolution((VGAState *)s, &width, &height);
+
+    /* extra x, y */
+    sx = (src % (width * depth)) / depth;
+    sy = (src / (width * depth));
+    dx = (dst % (width *depth)) / depth;
+    dy = (dst / (width * depth));
+
+    /* normalize width */
+    w /= depth;
+
+    /* if we're doing a backward copy, we have to adjust
+       our x/y to be the upper left corner (instead of the lower
+       right corner) */
+    if (s->cirrus_blt_dstpitch < 0) {
+       sx -= (s->cirrus_blt_width / depth) - 1;
+       dx -= (s->cirrus_blt_width / depth) - 1;
+       sy -= s->cirrus_blt_height - 1;
+       dy -= s->cirrus_blt_height - 1;
     }
-    if ((s->cirrus_blt_mode & (~CIRRUS_BLTMODE_BACKWARDS)) != 0) {
-#ifdef DEBUG_CIRRUS
-       printf("cirrus: blt mode %02x - unimplemented\n",
-              s->cirrus_blt_mode);
-#endif
-       return 0;
+
+    /* are we in the visible portion of memory? */
+    if (sx >= 0 && sy >= 0 && dx >= 0 && dy >= 0 &&
+       (sx + w) <= width && (sy + h) <= height &&
+       (dx + w) <= width && (dy + h) <= height) {
+       notify = 1;
     }
 
-    (*s->cirrus_rop) (s->vram_ptr + s->cirrus_blt_dstaddr,
+    /* make to sure only copy if it's a plain copy ROP */
+    if (*s->cirrus_rop != cirrus_bitblt_rop_fwd_src &&
+       *s->cirrus_rop != cirrus_bitblt_rop_bkwd_src)
+       notify = 0;
+
+    /* we have to flush all pending changes so that the copy
+       is generated at the appropriate moment in time */
+    if (notify)
+       vga_hw_update();
+
+    (*s->cirrus_rop) (s, s->vram_ptr + s->cirrus_blt_dstaddr,
                      s->vram_ptr + s->cirrus_blt_srcaddr,
                      s->cirrus_blt_dstpitch, s->cirrus_blt_srcpitch,
                      s->cirrus_blt_width, s->cirrus_blt_height);
-    cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
-                            s->cirrus_blt_dstpitch, s->cirrus_blt_width,
-                            s->cirrus_blt_height);
-    return 1;
-}
 
-/***************************************
- *
- *  bitblt (cpu-to-video)
- *
- ***************************************/
-
-static void cirrus_bitblt_cputovideo_patterncopy(void *opaque)
-{
-    CirrusVGAState *s = (CirrusVGAState *) opaque;
-    int data_count;
+    if (notify)
+       s->ds->dpy_copy(s->ds,
+                       sx, sy, dx, dy,
+                       s->cirrus_blt_width / depth,
+                       s->cirrus_blt_height);
 
-    data_count = s->cirrus_srcptr - &s->cirrus_bltbuf[0];
+    /* we don't have to notify the display that this portion has
+       changed since dpy_copy implies this */
 
-    if (data_count > 0) {
-       if (data_count != s->cirrus_srccounter) {
-#ifdef DEBUG_CIRRUS
-           printf("cirrus: internal error\n");
-#endif
-       } else {
-           cirrus_bitblt_common_patterncopy(s, &s->cirrus_bltbuf[0]);
-       }
-       cirrus_bitblt_reset(s);
-    }
+    if (!notify)
+       cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
+                                s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+                                s->cirrus_blt_height);
 }
 
-static void cirrus_bitblt_cputovideo_copy(void *opaque)
+static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
 {
-    CirrusVGAState *s = (CirrusVGAState *) opaque;
-    int data_count;
-    int data_avail;
-    uint8_t work_colorexp[256];
-    uint8_t *src_ptr = NULL;
-    int src_avail = 0;
-    int src_processing;
-    int src_linepad = 0;
-
-    if (s->cirrus_blt_height <= 0) {
-       s->cirrus_srcptr = s->cirrus_srcptr_end;
-       return;
+    if (s->ds->dpy_copy) {
+       cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->start_addr,
+                      s->cirrus_blt_srcaddr - s->start_addr,
+                      s->cirrus_blt_width, s->cirrus_blt_height);
+    } else {
+       (*s->cirrus_rop) (s, s->vram_ptr + s->cirrus_blt_dstaddr,
+                         s->vram_ptr + s->cirrus_blt_srcaddr,
+                         s->cirrus_blt_dstpitch, s->cirrus_blt_srcpitch,
+                         s->cirrus_blt_width, s->cirrus_blt_height);
+
+       cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
+                                s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+                                s->cirrus_blt_height);
     }
 
-    s->cirrus_srcptr = &s->cirrus_bltbuf[0];
-    while (1) {
-       /* get BLT source. */
-       if (src_avail <= 0) {
-           data_count = s->cirrus_srcptr_end - s->cirrus_srcptr;
-           if (data_count <= 0)
-               break;
-
-           if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
-               if (s->cirrus_blt_mode & ~CIRRUS_BLTMODE_COLOREXPAND) {
-#ifdef DEBUG_CIRRUS
-                   printf("cirrus: unsupported\n");
-#endif
-                   cirrus_bitblt_reset(s);
-                   return;
-               }
-               data_avail = qemu_MIN(data_count, 256 / 32);
-               cirrus_colorexpand(s, work_colorexp, s->cirrus_srcptr,
-                                  data_avail * 8);
-               src_ptr = &work_colorexp[0];
-               src_avail = data_avail * 8 * s->cirrus_blt_pixelwidth;
-               s->cirrus_srcptr += data_avail;
-               src_linepad =
-                   ((s->cirrus_blt_width + 7) / 8) * 8 -
-                   s->cirrus_blt_width;
-               src_linepad *= s->cirrus_blt_pixelwidth;
-           } else {
-               if (s->cirrus_blt_mode != 0) {
-#ifdef DEBUG_CIRRUS
-                   printf("cirrus: unsupported\n");
-#endif
-                   cirrus_bitblt_reset(s);
-                   return;
-               }
-               src_ptr = s->cirrus_srcptr;
-               src_avail =
-                   data_count / s->cirrus_blt_pixelwidth *
-                   s->cirrus_blt_pixelwidth;
-               s->cirrus_srcptr += src_avail;
-           }
-           if (src_avail <= 0)
-               break;
-       }
-
-       /* 1-line BLT */
-       src_processing =
-           s->cirrus_blt_srcpitch - s->cirrus_blt_horz_counter;
-       src_processing = qemu_MIN(src_avail, src_processing);
-       (*s->cirrus_rop) (s->vram_ptr + s->cirrus_blt_dstaddr,
-                         src_ptr, 0, 0, src_processing, 1);
-       cirrus_invalidate_region(s, s->cirrus_blt_dstaddr, 0,
-                                src_processing, 1);
-
-       s->cirrus_blt_dstaddr += src_processing;
-       src_ptr += src_processing;
-       src_avail -= src_processing;
-       s->cirrus_blt_horz_counter += src_processing;
-       if (s->cirrus_blt_horz_counter >= s->cirrus_blt_srcpitch) {
-           src_ptr += src_linepad;
-           src_avail -= src_linepad;
-           s->cirrus_blt_dstaddr +=
-               s->cirrus_blt_dstpitch - s->cirrus_blt_srcpitch;
-           s->cirrus_blt_horz_counter = 0;
-           s->cirrus_blt_height--;
-           if (s->cirrus_blt_height <= 0) {
-               s->cirrus_srcptr = s->cirrus_srcptr_end;
-               return;
-           }
-       }
-    }
+    return 1;
 }
 
+/***************************************
+ *
+ *  bitblt (cpu-to-video)
+ *
+ ***************************************/
+
 static void cirrus_bitblt_cputovideo_next(CirrusVGAState * s)
 {
     int copy_count;
-    int avail_count;
-
-    s->cirrus_blt_handler(s);
+    uint8_t *end_ptr;
 
     if (s->cirrus_srccounter > 0) {
-       s->cirrus_srccounter -= s->cirrus_srcptr - &s->cirrus_bltbuf[0];
-       copy_count = s->cirrus_srcptr_end - s->cirrus_srcptr;
-       memmove(&s->cirrus_bltbuf[0], s->cirrus_srcptr, copy_count);
-       avail_count = qemu_MIN(CIRRUS_BLTBUFSIZE, s->cirrus_srccounter);
-       s->cirrus_srcptr = &s->cirrus_bltbuf[0];
-       s->cirrus_srcptr_end = s->cirrus_srcptr + avail_count;
-       if (s->cirrus_srccounter <= 0) {
-           cirrus_bitblt_reset(s);
-       }
+        if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+            cirrus_bitblt_common_patterncopy(s, s->cirrus_bltbuf);
+        the_end:
+            s->cirrus_srccounter = 0;
+            cirrus_bitblt_reset(s);
+        } else {
+            /* at least one scan line */
+            do {
+                (*s->cirrus_rop)(s, s->vram_ptr + s->cirrus_blt_dstaddr,
+                                 s->cirrus_bltbuf, 0, 0, s->cirrus_blt_width, 1);
+                cirrus_invalidate_region(s, s->cirrus_blt_dstaddr, 0,
+                                         s->cirrus_blt_width, 1);
+                s->cirrus_blt_dstaddr += s->cirrus_blt_dstpitch;
+                s->cirrus_srccounter -= s->cirrus_blt_srcpitch;
+                if (s->cirrus_srccounter <= 0)
+                    goto the_end;
+                /* more bytes than needed can be transfered because of
+                   word alignment, so we keep them for the next line */
+                /* XXX: keep alignment to speed up transfer */
+                end_ptr = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+                copy_count = s->cirrus_srcptr_end - end_ptr;
+                memmove(s->cirrus_bltbuf, end_ptr, copy_count);
+                s->cirrus_srcptr = s->cirrus_bltbuf + copy_count;
+                s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+            } while (s->cirrus_srcptr >= s->cirrus_srcptr_end);
+        }
     }
 }
 
@@ -975,52 +832,48 @@ static void cirrus_bitblt_reset(CirrusVGAState * s)
     s->cirrus_srcptr = &s->cirrus_bltbuf[0];
     s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
     s->cirrus_srccounter = 0;
-    s->cirrus_dstptr = &s->cirrus_bltbuf[0];
-    s->cirrus_dstptr_end = &s->cirrus_bltbuf[0];
-    s->cirrus_dstcounter = 0;
-    s->cirrus_blt_handler = NULL;
+    cirrus_update_memory_access(s);
 }
 
 static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 {
+    int w;
+
     s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
     s->cirrus_srcptr = &s->cirrus_bltbuf[0];
     s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
 
     if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
        if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
-           s->cirrus_srccounter = 8;
+           s->cirrus_blt_srcpitch = 8;
        } else {
-           s->cirrus_srccounter = 8 * 8 * s->cirrus_blt_pixelwidth;
+            /* XXX: check for 24 bpp */
+           s->cirrus_blt_srcpitch = 8 * 8 * s->cirrus_blt_pixelwidth;
        }
-       s->cirrus_blt_srcpitch = 0;
-       s->cirrus_blt_handler = cirrus_bitblt_cputovideo_patterncopy;
+       s->cirrus_srccounter = s->cirrus_blt_srcpitch;
     } else {
        if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
-           s->cirrus_srccounter =
-               ((s->cirrus_blt_width + 7) / 8) * s->cirrus_blt_height;
-           s->cirrus_blt_srcpitch =
-               s->cirrus_blt_width * s->cirrus_blt_pixelwidth;
+            w = s->cirrus_blt_width / s->cirrus_blt_pixelwidth;
+            if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_DWORDGRANULARITY)
+                s->cirrus_blt_srcpitch = ((w + 31) >> 5);
+            else
+                s->cirrus_blt_srcpitch = ((w + 7) >> 3);
        } else {
-           s->cirrus_srccounter =
-               s->cirrus_blt_width * s->cirrus_blt_height;
-           s->cirrus_blt_srcpitch = s->cirrus_blt_width;
+            /* always align input size to 32 bits */
+           s->cirrus_blt_srcpitch = (s->cirrus_blt_width + 3) & ~3;
        }
-       /* 4-byte alignment */
-       s->cirrus_srccounter = (s->cirrus_srccounter + 3) & (~3);
-
-       s->cirrus_blt_handler = cirrus_bitblt_cputovideo_copy;
-       s->cirrus_blt_horz_counter = 0;
+        s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
     }
-
-    cirrus_bitblt_cputovideo_next(s);
+    s->cirrus_srcptr = s->cirrus_bltbuf;
+    s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+    cirrus_update_memory_access(s);
     return 1;
 }
 
 static int cirrus_bitblt_videotocpu(CirrusVGAState * s)
 {
     /* XXX */
-#ifdef DEBUG_CIRRUS
+#ifdef DEBUG_BITBLT
     printf("cirrus: bitblt (video to cpu) is not implemented yet\n");
 #endif
     return 0;
@@ -1035,7 +888,6 @@ static int cirrus_bitblt_videotovideo(CirrusVGAState * s)
     } else {
        ret = cirrus_bitblt_videotovideo_copy(s);
     }
-
     if (ret)
        cirrus_bitblt_reset(s);
     return ret;
@@ -1045,6 +897,8 @@ static void cirrus_bitblt_start(CirrusVGAState * s)
 {
     uint8_t blt_rop;
 
+    s->gr[0x31] |= CIRRUS_BLT_BUSY;
+
     s->cirrus_blt_width = (s->gr[0x20] | (s->gr[0x21] << 8)) + 1;
     s->cirrus_blt_height = (s->gr[0x22] | (s->gr[0x23] << 8)) + 1;
     s->cirrus_blt_dstpitch = (s->gr[0x24] | (s->gr[0x25] << 8));
@@ -1054,19 +908,21 @@ static void cirrus_bitblt_start(CirrusVGAState * s)
     s->cirrus_blt_srcaddr =
        (s->gr[0x2c] | (s->gr[0x2d] << 8) | (s->gr[0x2e] << 16));
     s->cirrus_blt_mode = s->gr[0x30];
+    s->cirrus_blt_modeext = s->gr[0x33];
     blt_rop = s->gr[0x32];
 
 #ifdef DEBUG_BITBLT
-    printf("rop=%02x mode=%02x modeext=%02x w=%d h=%d dpitch=%d spicth=%d daddr=%08x saddr=%08x\n",
-           blt_rop, 
+    printf("rop=0x%02x mode=0x%02x modeext=0x%02x w=%d h=%d dpitch=%d spitch=%d daddr=0x%08x saddr=0x%08x writemask=0x%02x\n",
+           blt_rop,
            s->cirrus_blt_mode,
-           s->gr[0x33],
+           s->cirrus_blt_modeext,
            s->cirrus_blt_width,
            s->cirrus_blt_height,
            s->cirrus_blt_dstpitch,
            s->cirrus_blt_srcpitch,
            s->cirrus_blt_dstaddr,
-           s->cirrus_blt_srcaddr);
+           s->cirrus_blt_srcaddr,
+           s->gr[0x2f]);
 #endif
 
     switch (s->cirrus_blt_mode & CIRRUS_BLTMODE_PIXELWIDTHMASK) {
@@ -1083,7 +939,7 @@ static void cirrus_bitblt_start(CirrusVGAState * s)
        s->cirrus_blt_pixelwidth = 4;
        break;
     default:
-#ifdef DEBUG_CIRRUS
+#ifdef DEBUG_BITBLT
        printf("cirrus: bitblt - pixel width is unknown\n");
 #endif
        goto bitblt_ignore;
@@ -1094,28 +950,75 @@ static void cirrus_bitblt_start(CirrusVGAState * s)
         cirrus_blt_mode & (CIRRUS_BLTMODE_MEMSYSSRC |
                            CIRRUS_BLTMODE_MEMSYSDEST))
        == (CIRRUS_BLTMODE_MEMSYSSRC | CIRRUS_BLTMODE_MEMSYSDEST)) {
-#ifdef DEBUG_CIRRUS
+#ifdef DEBUG_BITBLT
        printf("cirrus: bitblt - memory-to-memory copy is requested\n");
 #endif
        goto bitblt_ignore;
     }
 
-    if ((s->gr[0x33] & CIRRUS_BLTMODEEXT_SOLIDFILL) &&
-        (s->cirrus_blt_mode & (CIRRUS_BLTMODE_MEMSYSDEST | 
+    if ((s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_SOLIDFILL) &&
+        (s->cirrus_blt_mode & (CIRRUS_BLTMODE_MEMSYSDEST |
                                CIRRUS_BLTMODE_TRANSPARENTCOMP |
-                               CIRRUS_BLTMODE_PATTERNCOPY | 
-                               CIRRUS_BLTMODE_COLOREXPAND)) == 
+                               CIRRUS_BLTMODE_PATTERNCOPY |
+                               CIRRUS_BLTMODE_COLOREXPAND)) ==
          (CIRRUS_BLTMODE_PATTERNCOPY | CIRRUS_BLTMODE_COLOREXPAND)) {
-        cirrus_bitblt_solidfill(s);
+        cirrus_bitblt_fgcol(s);
+        cirrus_bitblt_solidfill(s, blt_rop);
     } else {
-        if (s->cirrus_blt_mode & CIRRUS_BLTMODE_BACKWARDS) {
-            s->cirrus_blt_dstpitch = -s->cirrus_blt_dstpitch;
-            s->cirrus_blt_srcpitch = -s->cirrus_blt_srcpitch;
-            s->cirrus_rop = cirrus_get_bkwd_rop_handler(blt_rop);
+        if ((s->cirrus_blt_mode & (CIRRUS_BLTMODE_COLOREXPAND |
+                                   CIRRUS_BLTMODE_PATTERNCOPY)) ==
+            CIRRUS_BLTMODE_COLOREXPAND) {
+
+            if (s->cirrus_blt_mode & CIRRUS_BLTMODE_TRANSPARENTCOMP) {
+                if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV)
+                    cirrus_bitblt_bgcol(s);
+                else
+                    cirrus_bitblt_fgcol(s);
+                s->cirrus_rop = cirrus_colorexpand_transp[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+            } else {
+                cirrus_bitblt_fgcol(s);
+                cirrus_bitblt_bgcol(s);
+                s->cirrus_rop = cirrus_colorexpand[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+            }
+        } else if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+            if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
+                if (s->cirrus_blt_mode & CIRRUS_BLTMODE_TRANSPARENTCOMP) {
+                    if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV)
+                        cirrus_bitblt_bgcol(s);
+                    else
+                        cirrus_bitblt_fgcol(s);
+                    s->cirrus_rop = cirrus_colorexpand_pattern_transp[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+                } else {
+                    cirrus_bitblt_fgcol(s);
+                    cirrus_bitblt_bgcol(s);
+                    s->cirrus_rop = cirrus_colorexpand_pattern[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+                }
+            } else {
+                s->cirrus_rop = cirrus_patternfill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+            }
         } else {
-            s->cirrus_rop = cirrus_get_fwd_rop_handler(blt_rop);
-        }
-        
+           if (s->cirrus_blt_mode & CIRRUS_BLTMODE_TRANSPARENTCOMP) {
+               if (s->cirrus_blt_pixelwidth > 2) {
+                   printf("src transparent without colorexpand must be 8bpp or 16bpp\n");
+                   goto bitblt_ignore;
+               }
+               if (s->cirrus_blt_mode & CIRRUS_BLTMODE_BACKWARDS) {
+                   s->cirrus_blt_dstpitch = -s->cirrus_blt_dstpitch;
+                   s->cirrus_blt_srcpitch = -s->cirrus_blt_srcpitch;
+                   s->cirrus_rop = cirrus_bkwd_transp_rop[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+               } else {
+                   s->cirrus_rop = cirrus_fwd_transp_rop[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+               }
+           } else {
+               if (s->cirrus_blt_mode & CIRRUS_BLTMODE_BACKWARDS) {
+                   s->cirrus_blt_dstpitch = -s->cirrus_blt_dstpitch;
+                   s->cirrus_blt_srcpitch = -s->cirrus_blt_srcpitch;
+                   s->cirrus_rop = cirrus_bkwd_rop[rop_to_index[blt_rop]];
+               } else {
+                   s->cirrus_rop = cirrus_fwd_rop[rop_to_index[blt_rop]];
+               }
+           }
+       }
         // setup bitblt engine.
         if (s->cirrus_blt_mode & CIRRUS_BLTMODE_MEMSYSSRC) {
             if (!cirrus_bitblt_cputovideo(s))
@@ -1145,7 +1048,6 @@ static void cirrus_write_bitblt(CirrusVGAState * s, unsigned reg_value)
        cirrus_bitblt_reset(s);
     } else if (((old_value & CIRRUS_BLT_START) == 0) &&
               ((reg_value & CIRRUS_BLT_START) != 0)) {
-       s->gr[0x31] |= CIRRUS_BLT_BUSY;
        cirrus_bitblt_start(s);
     }
 }
@@ -1157,13 +1059,13 @@ static void cirrus_write_bitblt(CirrusVGAState * s, unsigned reg_value)
  *
  ***************************************/
 
-static void cirrus_get_offsets(VGAState *s1, 
-                                   uint32_t *pline_offset,
-                                   uint32_t *pstart_addr)
+static void cirrus_get_offsets(VGAState *s1,
+                               uint32_t *pline_offset,
+                               uint32_t *pstart_addr,
+                               uint32_t *pline_compare)
 {
     CirrusVGAState * s = (CirrusVGAState *)s1;
-    uint32_t start_addr;
-    uint32_t line_offset;
+    uint32_t start_addr, line_offset, line_compare;
 
     line_offset = s->cr[0x13]
        | ((s->cr[0x1b] & 0x10) << 4);
@@ -1176,6 +1078,11 @@ static void cirrus_get_offsets(VGAState *s1,
        | ((s->cr[0x1b] & 0x0c) << 15)
        | ((s->cr[0x1d] & 0x80) << 12);
     *pstart_addr = start_addr;
+
+    line_compare = s->cr[0x18] |
+        ((s->cr[0x07] & 0x10) << 4) |
+        ((s->cr[0x09] & 0x40) << 3);
+    *pline_compare = line_compare;
 }
 
 static uint32_t cirrus_get_bpp16_depth(CirrusVGAState * s)
@@ -1238,6 +1145,22 @@ static int cirrus_get_bpp(VGAState *s1)
     return ret;
 }
 
+static void cirrus_get_resolution(VGAState *s, int *pwidth, int *pheight)
+{
+    int width, height;
+
+    width = (s->cr[0x01] + 1) * 8;
+    height = s->cr[0x12] |
+        ((s->cr[0x07] & 0x02) << 7) |
+        ((s->cr[0x07] & 0x40) << 3);
+    height = (height + 1);
+    /* interlace support */
+    if (s->cr[0x1a] & 0x01)
+        height = height * 2;
+    *pwidth = width;
+    *pheight = height;
+}
+
 /***************************************
  *
  * bank memory
@@ -1259,10 +1182,10 @@ static void cirrus_update_bank_ptr(CirrusVGAState * s, unsigned bank_index)
     else
        offset <<= 12;
 
-    if (s->vram_size <= offset)
+    if (s->real_vram_size <= offset)
        limit = 0;
     else
-       limit = s->vram_size - offset;
+       limit = s->real_vram_size - offset;
 
     if (((s->gr[0x0b] & 0x01) == 0) && (bank_index != 0)) {
        if (limit > 0x8000) {
@@ -1318,6 +1241,7 @@ cirrus_hook_read_sr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
     case 0x91:
     case 0xb1:
     case 0xd1:
+    case 0xf1:                 // Graphics Cursor Y
        *reg_value = s->sr[0x11];
        break;
     case 0x05:                 // ???
@@ -1330,7 +1254,6 @@ cirrus_hook_read_sr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
     case 0x0d:                 // VCLK 2
     case 0x0e:                 // VCLK 3
     case 0x0f:                 // DRAM Control
-    case 0xf1:                 // Graphics Cursor Y
     case 0x12:                 // Graphics Cursor Attribute
     case 0x13:                 // Graphics Cursor Pattern Address
     case 0x14:                 // Scratch Register 2
@@ -1388,7 +1311,7 @@ cirrus_hook_write_sr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0xd0:
     case 0xf0:                 // Graphics Cursor X
        s->sr[0x10] = reg_value;
-       s->cirrus_hw_cursor_x = ((reg_index << 3) & 0x700) | reg_value;
+       s->hw_cursor_x = (reg_value << 3) | (reg_index >> 5);
        break;
     case 0x11:
     case 0x31:
@@ -1399,7 +1322,7 @@ cirrus_hook_write_sr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0xd1:
     case 0xf1:                 // Graphics Cursor Y
        s->sr[0x11] = reg_value;
-       s->cirrus_hw_cursor_y = ((reg_index << 3) & 0x700) | reg_value;
+       s->hw_cursor_y = (reg_value << 3) | (reg_index >> 5);
        break;
     case 0x07:                 // Extended Sequencer Mode
     case 0x08:                 // EEPROM Control
@@ -1415,7 +1338,6 @@ cirrus_hook_write_sr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0x14:                 // Scratch Register 2
     case 0x15:                 // Scratch Register 3
     case 0x16:                 // Performance Tuning Register
-    case 0x17:                 // Configuration Readback and Extended Control
     case 0x18:                 // Signature Generator Control
     case 0x19:                 // Signature Generator Result
     case 0x1a:                 // Signature Generator Result
@@ -1430,6 +1352,10 @@ cirrus_hook_write_sr(CirrusVGAState * s, unsigned reg_index, int reg_value)
               reg_index, reg_value);
 #endif
        break;
+    case 0x17:                 // Configuration Readback and Extended Control
+       s->sr[reg_index] = (s->sr[reg_index] & 0x38) | (reg_value & 0xc7);
+        cirrus_update_memory_access(s);
+        break;
     default:
 #ifdef DEBUG_CIRRUS
        printf("cirrus: outport sr_index %02x, sr_value %02x\n", reg_index,
@@ -1477,13 +1403,9 @@ static int cirrus_hook_read_palette(CirrusVGAState * s, int *reg_value)
 {
     if (!(s->sr[0x12] & CIRRUS_CURSOR_HIDDENPEL))
        return CIRRUS_HOOK_NOT_HANDLED;
-    if (s->dac_read_index < 0x10) {
-       *reg_value =
-           s->cirrus_hidden_palette[s->dac_read_index * 3 +
-                                    s->dac_sub_index];
-    } else {
-       *reg_value = 0xff;      /* XXX */
-    }
+    *reg_value =
+        s->cirrus_hidden_palette[(s->dac_read_index & 0x0f) * 3 +
+                                 s->dac_sub_index];
     if (++s->dac_sub_index == 3) {
        s->dac_sub_index = 0;
        s->dac_read_index++;
@@ -1497,11 +1419,9 @@ static int cirrus_hook_write_palette(CirrusVGAState * s, int reg_value)
        return CIRRUS_HOOK_NOT_HANDLED;
     s->dac_cache[s->dac_sub_index] = reg_value;
     if (++s->dac_sub_index == 3) {
-       if (s->dac_read_index < 0x10) {
-           memcpy(&s->cirrus_hidden_palette[s->dac_write_index * 3],
-                  s->dac_cache, 3);
-           /* XXX update cursor */
-       }
+        memcpy(&s->cirrus_hidden_palette[(s->dac_write_index & 0x0f) * 3],
+               s->dac_cache, 3);
+        /* XXX update cursor */
        s->dac_sub_index = 0;
        s->dac_write_index++;
     }
@@ -1551,6 +1471,9 @@ cirrus_hook_read_gr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
 static int
 cirrus_hook_write_gr(CirrusVGAState * s, unsigned reg_index, int reg_value)
 {
+#if defined(DEBUG_BITBLT) && 0
+    printf("gr%02x: %02x\n", reg_index, reg_value);
+#endif
     switch (reg_index) {
     case 0x00:                 // Standard VGA, BGCOLOR 0x000000ff
        s->cirrus_shadow_gr0 = reg_value;
@@ -1567,13 +1490,19 @@ cirrus_hook_write_gr(CirrusVGAState * s, unsigned reg_index, int reg_value)
        return CIRRUS_HOOK_NOT_HANDLED;
     case 0x05:                 // Standard VGA, Cirrus extended mode
        s->gr[reg_index] = reg_value & 0x7f;
+        cirrus_update_memory_access(s);
        break;
     case 0x09:                 // bank offset #0
     case 0x0A:                 // bank offset #1
+       s->gr[reg_index] = reg_value;
+       cirrus_update_bank_ptr(s, 0);
+       cirrus_update_bank_ptr(s, 1);
+        break;
     case 0x0B:
        s->gr[reg_index] = reg_value;
        cirrus_update_bank_ptr(s, 0);
        cirrus_update_bank_ptr(s, 1);
+        cirrus_update_memory_access(s);
        break;
     case 0x10:                 // BGCOLOR 0x0000ff00
     case 0x11:                 // FGCOLOR 0x0000ff00
@@ -1589,6 +1518,7 @@ cirrus_hook_write_gr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0x29:                 // BLT DEST ADDR 0x00ff00
     case 0x2c:                 // BLT SRC ADDR 0x0000ff
     case 0x2d:                 // BLT SRC ADDR 0x00ff00
+    case 0x2f:                  // BLT WRITEMASK
     case 0x30:                 // BLT MODE
     case 0x32:                 // RASTER OP
     case 0x33:                 // BLT MODEEXT
@@ -1605,6 +1535,12 @@ cirrus_hook_write_gr(CirrusVGAState * s, unsigned reg_index, int reg_value)
        s->gr[reg_index] = reg_value & 0x1f;
        break;
     case 0x2a:                 // BLT DEST ADDR 0x3f0000
+       s->gr[reg_index] = reg_value & 0x3f;
+        /* if auto start mode, starts bit blt now */
+        if (s->gr[0x31] & CIRRUS_BLT_AUTOSTART) {
+            cirrus_bitblt_start(s);
+        }
+       break;
     case 0x2e:                 // BLT SRC ADDR 0x3f0000
        s->gr[reg_index] = reg_value & 0x3f;
        break;
@@ -1717,6 +1653,7 @@ cirrus_hook_write_cr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0x1a:                 // Miscellaneous Control
     case 0x1b:                 // Extended Display Control
     case 0x1c:                 // Sync Adjust and Genlock
+    case 0x1d:                 // Overlay Extended Control
        s->cr[reg_index] = reg_value;
 #ifdef DEBUG_CIRRUS
        printf("cirrus: handled outport cr_index %02x, cr_value %02x\n",
@@ -1728,7 +1665,6 @@ cirrus_hook_write_cr(CirrusVGAState * s, unsigned reg_index, int reg_value)
     case 0x26:                 // Attribute Controller Index Readback (R)
     case 0x27:                 // Part ID (R)
        break;
-    case 0x1d:                 // Overlay Extended Control
     case 0x25:                 // Part Status
     default:
 #ifdef DEBUG_CIRRUS
@@ -1984,11 +1920,12 @@ static void cirrus_mem_writeb_mode4and5_8bpp(CirrusVGAState * s,
     dst = s->vram_ptr + offset;
     for (x = 0; x < 8; x++) {
        if (val & 0x80) {
-           *dst++ = s->cirrus_shadow_gr1;
+           *dst = s->cirrus_shadow_gr1;
        } else if (mode == 5) {
-           *dst++ = s->cirrus_shadow_gr0;
+           *dst = s->cirrus_shadow_gr0;
        }
        val <<= 1;
+       dst++;
     }
     cpu_physical_memory_set_dirty(s->vram_offset + offset);
     cpu_physical_memory_set_dirty(s->vram_offset + offset + 7);
@@ -2006,13 +1943,14 @@ static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
     dst = s->vram_ptr + offset;
     for (x = 0; x < 8; x++) {
        if (val & 0x80) {
-           *dst++ = s->cirrus_shadow_gr1;
-           *dst++ = s->gr[0x11];
+           *dst = s->cirrus_shadow_gr1;
+           *(dst + 1) = s->gr[0x11];
        } else if (mode == 5) {
-           *dst++ = s->cirrus_shadow_gr0;
-           *dst++ = s->gr[0x10];
+           *dst = s->cirrus_shadow_gr0;
+           *(dst + 1) = s->gr[0x10];
        }
        val <<= 1;
+       dst += 2;
     }
     cpu_physical_memory_set_dirty(s->vram_offset + offset);
     cpu_physical_memory_set_dirty(s->vram_offset + offset + 15);
@@ -2098,7 +2036,7 @@ static uint32_t cirrus_vga_mem_readl(void *opaque, target_phys_addr_t addr)
     return v;
 }
 
-static void cirrus_vga_mem_writeb(void *opaque, target_phys_addr_t addr, 
+static void cirrus_vga_mem_writeb(void *opaque, target_phys_addr_t addr,
                                   uint32_t mem_value)
 {
     CirrusVGAState *s = opaque;
@@ -2117,7 +2055,7 @@ static void cirrus_vga_mem_writeb(void *opaque, target_phys_addr_t addr,
        if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
            /* bitblt */
            *s->cirrus_srcptr++ = (uint8_t) mem_value;
-           if (s->cirrus_srcptr == s->cirrus_srcptr_end) {
+           if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
                cirrus_bitblt_cputovideo_next(s);
            }
        } else {
@@ -2202,6 +2140,176 @@ static CPUWriteMemoryFunc *cirrus_vga_mem_write[3] = {
 
 /***************************************
  *
+ *  hardware cursor
+ *
+ ***************************************/
+
+static inline void invalidate_cursor1(CirrusVGAState *s)
+{
+    if (s->last_hw_cursor_size) {
+        vga_invalidate_scanlines((VGAState *)s,
+                                 s->last_hw_cursor_y + s->last_hw_cursor_y_start,
+                                 s->last_hw_cursor_y + s->last_hw_cursor_y_end);
+    }
+}
+
+static inline void cirrus_cursor_compute_yrange(CirrusVGAState *s)
+{
+    const uint8_t *src;
+    uint32_t content;
+    int y, y_min, y_max;
+
+    src = s->vram_ptr + s->real_vram_size - 16 * 1024;
+    if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+        src += (s->sr[0x13] & 0x3c) * 256;
+        y_min = 64;
+        y_max = -1;
+        for(y = 0; y < 64; y++) {
+            content = ((uint32_t *)src)[0] |
+                ((uint32_t *)src)[1] |
+                ((uint32_t *)src)[2] |
+                ((uint32_t *)src)[3];
+            if (content) {
+                if (y < y_min)
+                    y_min = y;
+                if (y > y_max)
+                    y_max = y;
+            }
+            src += 16;
+        }
+    } else {
+        src += (s->sr[0x13] & 0x3f) * 256;
+        y_min = 32;
+        y_max = -1;
+        for(y = 0; y < 32; y++) {
+            content = ((uint32_t *)src)[0] |
+                ((uint32_t *)(src + 128))[0];
+            if (content) {
+                if (y < y_min)
+                    y_min = y;
+                if (y > y_max)
+                    y_max = y;
+            }
+            src += 4;
+        }
+    }
+    if (y_min > y_max) {
+        s->last_hw_cursor_y_start = 0;
+        s->last_hw_cursor_y_end = 0;
+    } else {
+        s->last_hw_cursor_y_start = y_min;
+        s->last_hw_cursor_y_end = y_max + 1;
+    }
+}
+
+/* NOTE: we do not currently handle the cursor bitmap change, so we
+   update the cursor only if it moves. */
+static void cirrus_cursor_invalidate(VGAState *s1)
+{
+    CirrusVGAState *s = (CirrusVGAState *)s1;
+    int size;
+
+    if (!s->sr[0x12] & CIRRUS_CURSOR_SHOW) {
+        size = 0;
+    } else {
+        if (s->sr[0x12] & CIRRUS_CURSOR_LARGE)
+            size = 64;
+        else
+            size = 32;
+    }
+    /* invalidate last cursor and new cursor if any change */
+    if (s->last_hw_cursor_size != size ||
+        s->last_hw_cursor_x != s->hw_cursor_x ||
+        s->last_hw_cursor_y != s->hw_cursor_y) {
+
+        invalidate_cursor1(s);
+
+        s->last_hw_cursor_size = size;
+        s->last_hw_cursor_x = s->hw_cursor_x;
+        s->last_hw_cursor_y = s->hw_cursor_y;
+        /* compute the real cursor min and max y */
+        cirrus_cursor_compute_yrange(s);
+        invalidate_cursor1(s);
+    }
+}
+
+static void cirrus_cursor_draw_line(VGAState *s1, uint8_t *d1, int scr_y)
+{
+    CirrusVGAState *s = (CirrusVGAState *)s1;
+    int w, h, bpp, x1, x2, poffset;
+    unsigned int color0, color1;
+    const uint8_t *palette, *src;
+    uint32_t content;
+
+    if (!(s->sr[0x12] & CIRRUS_CURSOR_SHOW))
+        return;
+    /* fast test to see if the cursor intersects with the scan line */
+    if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+        h = 64;
+    } else {
+        h = 32;
+    }
+    if (scr_y < s->hw_cursor_y ||
+        scr_y >= (s->hw_cursor_y + h))
+        return;
+
+    src = s->vram_ptr + s->real_vram_size - 16 * 1024;
+    if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+        src += (s->sr[0x13] & 0x3c) * 256;
+        src += (scr_y - s->hw_cursor_y) * 16;
+        poffset = 8;
+        content = ((uint32_t *)src)[0] |
+            ((uint32_t *)src)[1] |
+            ((uint32_t *)src)[2] |
+            ((uint32_t *)src)[3];
+    } else {
+        src += (s->sr[0x13] & 0x3f) * 256;
+        src += (scr_y - s->hw_cursor_y) * 4;
+        poffset = 128;
+        content = ((uint32_t *)src)[0] |
+            ((uint32_t *)(src + 128))[0];
+    }
+    /* if nothing to draw, no need to continue */
+    if (!content)
+        return;
+    w = h;
+
+    x1 = s->hw_cursor_x;
+    if (x1 >= s->last_scr_width)
+        return;
+    x2 = s->hw_cursor_x + w;
+    if (x2 > s->last_scr_width)
+        x2 = s->last_scr_width;
+    w = x2 - x1;
+    palette = s->cirrus_hidden_palette;
+    color0 = s->rgb_to_pixel(c6_to_8(palette[0x0 * 3]),
+                             c6_to_8(palette[0x0 * 3 + 1]),
+                             c6_to_8(palette[0x0 * 3 + 2]));
+    color1 = s->rgb_to_pixel(c6_to_8(palette[0xf * 3]),
+                             c6_to_8(palette[0xf * 3 + 1]),
+                             c6_to_8(palette[0xf * 3 + 2]));
+    bpp = ((s->ds->depth + 7) >> 3);
+    d1 += x1 * bpp;
+    switch(s->ds->depth) {
+    default:
+        break;
+    case 8:
+        vga_draw_cursor_line_8(d1, src, poffset, w, color0, color1, 0xff);
+        break;
+    case 15:
+        vga_draw_cursor_line_16(d1, src, poffset, w, color0, color1, 0x7fff);
+        break;
+    case 16:
+        vga_draw_cursor_line_16(d1, src, poffset, w, color0, color1, 0xffff);
+        break;
+    case 32:
+        vga_draw_cursor_line_32(d1, src, poffset, w, color0, color1, 0xffffff);
+        break;
+    }
+}
+
+/***************************************
+ *
  *  LFB memory access
  *
  ***************************************/
@@ -2211,10 +2319,10 @@ static uint32_t cirrus_linear_readb(void *opaque, target_phys_addr_t addr)
     CirrusVGAState *s = (CirrusVGAState *) opaque;
     uint32_t ret;
 
-    /* XXX: s->vram_size must be a power of two */
     addr &= s->cirrus_addr_mask;
 
-    if (((s->sr[0x17] & 0x44) == 0x44) && ((addr & 0x1fff00) == 0x1fff00)) {
+    if (((s->sr[0x17] & 0x44) == 0x44) &&
+        ((addr & s->linear_mmio_mask) == s->linear_mmio_mask)) {
        /* memory-mapped I/O */
        ret = cirrus_mmio_blt_read(s, addr & 0xff);
     } else if (0) {
@@ -2272,13 +2380,14 @@ static void cirrus_linear_writeb(void *opaque, target_phys_addr_t addr,
 
     addr &= s->cirrus_addr_mask;
 
-    if (((s->sr[0x17] & 0x44) == 0x44) && ((addr & 0x1fff00) == 0x1fff00)) {
+    if (((s->sr[0x17] & 0x44) == 0x44) &&
+        ((addr & s->linear_mmio_mask) ==  s->linear_mmio_mask)) {
        /* memory-mapped I/O */
        cirrus_mmio_blt_write(s, addr & 0xff, val);
     } else if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
        /* bitblt */
        *s->cirrus_srcptr++ = (uint8_t) val;
-       if (s->cirrus_srcptr == s->cirrus_srcptr_end) {
+       if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
            cirrus_bitblt_cputovideo_next(s);
        }
     } else {
@@ -2345,6 +2454,168 @@ static CPUWriteMemoryFunc *cirrus_linear_write[3] = {
     cirrus_linear_writel,
 };
 
+static void cirrus_linear_mem_writeb(void *opaque, target_phys_addr_t addr,
+                                     uint32_t val)
+{
+    CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+    addr &= s->cirrus_addr_mask;
+    *(s->vram_ptr + addr) = val;
+    cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+static void cirrus_linear_mem_writew(void *opaque, target_phys_addr_t addr,
+                                     uint32_t val)
+{
+    CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+    addr &= s->cirrus_addr_mask;
+    cpu_to_le16w((uint16_t *)(s->vram_ptr + addr), val);
+    cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+static void cirrus_linear_mem_writel(void *opaque, target_phys_addr_t addr,
+                                     uint32_t val)
+{
+    CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+    addr &= s->cirrus_addr_mask;
+    cpu_to_le32w((uint32_t *)(s->vram_ptr + addr), val);
+    cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+/***************************************
+ *
+ *  system to screen memory access
+ *
+ ***************************************/
+
+
+static uint32_t cirrus_linear_bitblt_readb(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t ret;
+
+    /* XXX handle bitblt */
+    ret = 0xff;
+    return ret;
+}
+
+static uint32_t cirrus_linear_bitblt_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+    v = cirrus_linear_bitblt_readb(opaque, addr) << 8;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 1);
+#else
+    v = cirrus_linear_bitblt_readb(opaque, addr);
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 8;
+#endif
+    return v;
+}
+
+static uint32_t cirrus_linear_bitblt_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+    v = cirrus_linear_bitblt_readb(opaque, addr) << 24;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 16;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 2) << 8;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 3);
+#else
+    v = cirrus_linear_bitblt_readb(opaque, addr);
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 8;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 2) << 16;
+    v |= cirrus_linear_bitblt_readb(opaque, addr + 3) << 24;
+#endif
+    return v;
+}
+
+static void cirrus_linear_bitblt_writeb(void *opaque, target_phys_addr_t addr,
+                                uint32_t val)
+{
+    CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+    if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+       /* bitblt */
+       *s->cirrus_srcptr++ = (uint8_t) val;
+       if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
+           cirrus_bitblt_cputovideo_next(s);
+       }
+    }
+}
+
+static void cirrus_linear_bitblt_writew(void *opaque, target_phys_addr_t addr,
+                                uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    cirrus_linear_bitblt_writeb(opaque, addr, (val >> 8) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 1, val & 0xff);
+#else
+    cirrus_linear_bitblt_writeb(opaque, addr, val & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void cirrus_linear_bitblt_writel(void *opaque, target_phys_addr_t addr,
+                                uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    cirrus_linear_bitblt_writeb(opaque, addr, (val >> 24) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 3, val & 0xff);
+#else
+    cirrus_linear_bitblt_writeb(opaque, addr, val & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+    cirrus_linear_bitblt_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+
+static CPUReadMemoryFunc *cirrus_linear_bitblt_read[3] = {
+    cirrus_linear_bitblt_readb,
+    cirrus_linear_bitblt_readw,
+    cirrus_linear_bitblt_readl,
+};
+
+static CPUWriteMemoryFunc *cirrus_linear_bitblt_write[3] = {
+    cirrus_linear_bitblt_writeb,
+    cirrus_linear_bitblt_writew,
+    cirrus_linear_bitblt_writel,
+};
+
+/* Compute the memory access functions */
+static void cirrus_update_memory_access(CirrusVGAState *s)
+{
+    unsigned mode;
+
+    if ((s->sr[0x17] & 0x44) == 0x44) {
+        goto generic_io;
+    } else if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+        goto generic_io;
+    } else {
+       if ((s->gr[0x0B] & 0x14) == 0x14) {
+            goto generic_io;
+       } else if (s->gr[0x0B] & 0x02) {
+            goto generic_io;
+        }
+
+       mode = s->gr[0x05] & 0x7;
+       if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+            s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
+            s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
+            s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
+        } else {
+        generic_io:
+            s->cirrus_linear_write[0] = cirrus_linear_writeb;
+            s->cirrus_linear_write[1] = cirrus_linear_writew;
+            s->cirrus_linear_write[2] = cirrus_linear_writel;
+        }
+    }
+}
+
+
 /* I/O ports */
 
 static uint32_t vga_ioport_read(void *opaque, uint32_t addr)
@@ -2393,7 +2664,11 @@ static uint32_t vga_ioport_read(void *opaque, uint32_t addr)
        case 0x3c7:
            val = s->dac_state;
            break;
-       case 0x3c9:
+       case 0x3c8:
+           val = s->dac_write_index;
+           s->cirrus_hidden_dac_lockindex = 0;
+           break;
+        case 0x3c9:
            if (cirrus_hook_read_palette(s, &val))
                break;
            val = s->palette[s->dac_read_index * 3 + s->dac_sub_index];
@@ -2557,7 +2832,7 @@ static void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
        printf("vga: write CR%x = 0x%02x\n", s->cr_index, val);
 #endif
        /* handle CR0-7 protection */
-       if ((s->cr[11] & 0x80) && s->cr_index <= 7) {
+       if ((s->cr[0x11] & 0x80) && s->cr_index <= 7) {
            /* can always write bit 4 of CR7 */
            if (s->cr_index == 7)
                s->cr[7] = (s->cr[7] & ~0x10) | (val & 0x10);
@@ -2569,7 +2844,7 @@ static void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
        case 0x09:
        case 0x0c:
        case 0x0d:
-       case 0x12:              /* veritcal display end */
+       case 0x12:              /* vertical display end */
            s->cr[s->cr_index] = val;
            break;
 
@@ -2689,15 +2964,137 @@ static CPUWriteMemoryFunc *cirrus_mmio_write[3] = {
     cirrus_mmio_writel,
 };
 
+/* load/save state */
+
+static void cirrus_vga_save(QEMUFile *f, void *opaque)
+{
+    CirrusVGAState *s = opaque;
+
+    if (s->pci_dev)
+        pci_device_save(s->pci_dev, f);
+
+    qemu_put_be32s(f, &s->latch);
+    qemu_put_8s(f, &s->sr_index);
+    qemu_put_buffer(f, s->sr, 256);
+    qemu_put_8s(f, &s->gr_index);
+    qemu_put_8s(f, &s->cirrus_shadow_gr0);
+    qemu_put_8s(f, &s->cirrus_shadow_gr1);
+    qemu_put_buffer(f, s->gr + 2, 254);
+    qemu_put_8s(f, &s->ar_index);
+    qemu_put_buffer(f, s->ar, 21);
+    qemu_put_be32s(f, &s->ar_flip_flop);
+    qemu_put_8s(f, &s->cr_index);
+    qemu_put_buffer(f, s->cr, 256);
+    qemu_put_8s(f, &s->msr);
+    qemu_put_8s(f, &s->fcr);
+    qemu_put_8s(f, &s->st00);
+    qemu_put_8s(f, &s->st01);
+
+    qemu_put_8s(f, &s->dac_state);
+    qemu_put_8s(f, &s->dac_sub_index);
+    qemu_put_8s(f, &s->dac_read_index);
+    qemu_put_8s(f, &s->dac_write_index);
+    qemu_put_buffer(f, s->dac_cache, 3);
+    qemu_put_buffer(f, s->palette, 768);
+
+    qemu_put_be32s(f, &s->bank_offset);
+
+    qemu_put_8s(f, &s->cirrus_hidden_dac_lockindex);
+    qemu_put_8s(f, &s->cirrus_hidden_dac_data);
+
+    qemu_put_be32s(f, &s->hw_cursor_x);
+    qemu_put_be32s(f, &s->hw_cursor_y);
+    /* XXX: we do not save the bitblt state - we assume we do not save
+       the state when the blitter is active */
+}
+
+static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
+{
+    CirrusVGAState *s = opaque;
+    int ret;
+
+    if (version_id > 2)
+        return -EINVAL;
+
+    if (s->pci_dev && version_id >= 2) {
+        ret = pci_device_load(s->pci_dev, f);
+        if (ret < 0)
+            return ret;
+    }
+
+    qemu_get_be32s(f, &s->latch);
+    qemu_get_8s(f, &s->sr_index);
+    qemu_get_buffer(f, s->sr, 256);
+    qemu_get_8s(f, &s->gr_index);
+    qemu_get_8s(f, &s->cirrus_shadow_gr0);
+    qemu_get_8s(f, &s->cirrus_shadow_gr1);
+    s->gr[0x00] = s->cirrus_shadow_gr0 & 0x0f;
+    s->gr[0x01] = s->cirrus_shadow_gr1 & 0x0f;
+    qemu_get_buffer(f, s->gr + 2, 254);
+    qemu_get_8s(f, &s->ar_index);
+    qemu_get_buffer(f, s->ar, 21);
+    qemu_get_be32s(f, &s->ar_flip_flop);
+    qemu_get_8s(f, &s->cr_index);
+    qemu_get_buffer(f, s->cr, 256);
+    qemu_get_8s(f, &s->msr);
+    qemu_get_8s(f, &s->fcr);
+    qemu_get_8s(f, &s->st00);
+    qemu_get_8s(f, &s->st01);
+
+    qemu_get_8s(f, &s->dac_state);
+    qemu_get_8s(f, &s->dac_sub_index);
+    qemu_get_8s(f, &s->dac_read_index);
+    qemu_get_8s(f, &s->dac_write_index);
+    qemu_get_buffer(f, s->dac_cache, 3);
+    qemu_get_buffer(f, s->palette, 768);
+
+    qemu_get_be32s(f, &s->bank_offset);
+
+    qemu_get_8s(f, &s->cirrus_hidden_dac_lockindex);
+    qemu_get_8s(f, &s->cirrus_hidden_dac_data);
+
+    qemu_get_be32s(f, &s->hw_cursor_x);
+    qemu_get_be32s(f, &s->hw_cursor_y);
+
+    /* force refresh */
+    s->graphic_mode = -1;
+    cirrus_update_bank_ptr(s, 0);
+    cirrus_update_bank_ptr(s, 1);
+    return 0;
+}
+
 /***************************************
  *
  *  initialize
  *
  ***************************************/
 
-static void cirrus_init_common(CirrusVGAState * s)
-{
-    int vga_io_memory;
+static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
+{
+    int vga_io_memory, i;
+    static int inited;
+
+    if (!inited) {
+        inited = 1;
+        for(i = 0;i < 256; i++)
+            rop_to_index[i] = CIRRUS_ROP_NOP_INDEX; /* nop rop */
+        rop_to_index[CIRRUS_ROP_0] = 0;
+        rop_to_index[CIRRUS_ROP_SRC_AND_DST] = 1;
+        rop_to_index[CIRRUS_ROP_NOP] = 2;
+        rop_to_index[CIRRUS_ROP_SRC_AND_NOTDST] = 3;
+        rop_to_index[CIRRUS_ROP_NOTDST] = 4;
+        rop_to_index[CIRRUS_ROP_SRC] = 5;
+        rop_to_index[CIRRUS_ROP_1] = 6;
+        rop_to_index[CIRRUS_ROP_NOTSRC_AND_DST] = 7;
+        rop_to_index[CIRRUS_ROP_SRC_XOR_DST] = 8;
+        rop_to_index[CIRRUS_ROP_SRC_OR_DST] = 9;
+        rop_to_index[CIRRUS_ROP_NOTSRC_OR_NOTDST] = 10;
+        rop_to_index[CIRRUS_ROP_SRC_NOTXOR_DST] = 11;
+        rop_to_index[CIRRUS_ROP_SRC_OR_NOTDST] = 12;
+        rop_to_index[CIRRUS_ROP_NOTSRC] = 13;
+        rop_to_index[CIRRUS_ROP_NOTSRC_OR_DST] = 14;
+        rop_to_index[CIRRUS_ROP_NOTSRC_AND_NOTDST] = 15;
+    }
 
     register_ioport_write(0x3c0, 16, 1, vga_ioport_write, s);
 
@@ -2713,16 +3110,42 @@ static void cirrus_init_common(CirrusVGAState * s)
     register_ioport_read(0x3ba, 1, 1, vga_ioport_read, s);
     register_ioport_read(0x3da, 1, 1, vga_ioport_read, s);
 
-    vga_io_memory = cpu_register_io_memory(0, cirrus_vga_mem_read, 
+    vga_io_memory = cpu_register_io_memory(0, cirrus_vga_mem_read,
                                            cirrus_vga_mem_write, s);
-    cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000, 
+    cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
                                  vga_io_memory);
 
     s->sr[0x06] = 0x0f;
-    s->sr[0x0F] = CIRRUS_MEMSIZE_2M;
-    s->sr[0x1F] = 0x22;                // MemClock
+    if (device_id == CIRRUS_ID_CLGD5446) {
+        /* 4MB 64 bit memory config, always PCI */
+        s->sr[0x1F] = 0x2d;            // MemClock
+        s->gr[0x18] = 0x0f;             // fastest memory configuration
+#if 1
+        s->sr[0x0f] = 0x98;
+        s->sr[0x17] = 0x20;
+        s->sr[0x15] = 0x04; /* memory size, 3=2MB, 4=4MB */
+        s->real_vram_size = 4096 * 1024;
+#else
+        s->sr[0x0f] = 0x18;
+        s->sr[0x17] = 0x20;
+        s->sr[0x15] = 0x03; /* memory size, 3=2MB, 4=4MB */
+        s->real_vram_size = 2048 * 1024;
+#endif
+    } else {
+        s->sr[0x1F] = 0x22;            // MemClock
+        s->sr[0x0F] = CIRRUS_MEMSIZE_2M;
+        if (is_pci)
+            s->sr[0x17] = CIRRUS_BUSTYPE_PCI;
+        else
+            s->sr[0x17] = CIRRUS_BUSTYPE_ISA;
+        s->real_vram_size = 2048 * 1024;
+        s->sr[0x15] = 0x03; /* memory size, 3=2MB, 4=4MB */
+    }
+    s->cr[0x27] = device_id;
 
-    s->cr[0x27] = CIRRUS_ID;
+    /* Win2K seems to assume that the pattern buffer is at 0xff
+       initially ! */
+    memset(s->vram_ptr, 0xff, s->real_vram_size);
 
     s->cirrus_hidden_dac_lockindex = 5;
     s->cirrus_hidden_dac_data = 0;
@@ -2731,15 +3154,28 @@ static void cirrus_init_common(CirrusVGAState * s)
     s->cirrus_linear_io_addr =
        cpu_register_io_memory(0, cirrus_linear_read, cirrus_linear_write,
                               s);
+    s->cirrus_linear_write = cpu_get_io_memory_write(s->cirrus_linear_io_addr);
+
+    /* I/O handler for LFB */
+    s->cirrus_linear_bitblt_io_addr =
+       cpu_register_io_memory(0, cirrus_linear_bitblt_read, cirrus_linear_bitblt_write,
+                              s);
+
     /* I/O handler for memory-mapped I/O */
     s->cirrus_mmio_io_addr =
        cpu_register_io_memory(0, cirrus_mmio_read, cirrus_mmio_write, s);
 
     /* XXX: s->vram_size must be a power of two */
-    s->cirrus_addr_mask = s->vram_size - 1;
+    s->cirrus_addr_mask = s->real_vram_size - 1;
+    s->linear_mmio_mask = s->real_vram_size - 256;
 
     s->get_bpp = cirrus_get_bpp;
     s->get_offsets = cirrus_get_offsets;
+    s->get_resolution = cirrus_get_resolution;
+    s->cursor_invalidate = cirrus_cursor_invalidate;
+    s->cursor_draw_line = cirrus_cursor_draw_line;
+
+    register_savevm("cirrus_vga", 0, 2, cirrus_vga_save, cirrus_vga_load, s);
 }
 
 /***************************************
@@ -2748,17 +3184,16 @@ static void cirrus_init_common(CirrusVGAState * s)
  *
  ***************************************/
 
-void isa_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base, 
+void isa_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
                          unsigned long vga_ram_offset, int vga_ram_size)
 {
     CirrusVGAState *s;
 
     s = qemu_mallocz(sizeof(CirrusVGAState));
-    
-    vga_common_init((VGAState *)s, 
+
+    vga_common_init((VGAState *)s,
                     ds, vga_ram_base, vga_ram_offset, vga_ram_size);
-    cirrus_init_common(s);
-    s->sr[0x17] = CIRRUS_BUSTYPE_ISA;
+    cirrus_init_common(s, CIRRUS_ID_CLGD5430, 0);
     /* XXX ISA-LFB support */
 }
 
@@ -2773,8 +3208,11 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num,
 {
     CirrusVGAState *s = &((PCICirrusVGAState *)d)->cirrus_vga;
 
+    /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
                                 s->cirrus_linear_io_addr);
+    cpu_register_physical_memory(addr + 0x1000000, 0x400000,
+                                s->cirrus_linear_bitblt_io_addr);
 }
 
 static void cirrus_pci_mmio_map(PCIDevice *d, int region_num,
@@ -2786,22 +3224,25 @@ static void cirrus_pci_mmio_map(PCIDevice *d, int region_num,
                                 s->cirrus_mmio_io_addr);
 }
 
-void pci_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base, 
+void pci_cirrus_vga_init(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base,
                          unsigned long vga_ram_offset, int vga_ram_size)
 {
     PCICirrusVGAState *d;
     uint8_t *pci_conf;
     CirrusVGAState *s;
+    int device_id;
+
+    device_id = CIRRUS_ID_CLGD5446;
 
     /* setup PCI configuration registers */
-    d = (PCICirrusVGAState *)pci_register_device("Cirrus VGA", 
-                                                 sizeof(PCICirrusVGAState), 
-                                                 0, -1, NULL, NULL);
+    d = (PCICirrusVGAState *)pci_register_device(bus, "Cirrus VGA",
+                                                 sizeof(PCICirrusVGAState),
+                                                 -1, NULL, NULL);
     pci_conf = d->dev.config;
     pci_conf[0x00] = (uint8_t) (PCI_VENDOR_CIRRUS & 0xff);
     pci_conf[0x01] = (uint8_t) (PCI_VENDOR_CIRRUS >> 8);
-    pci_conf[0x02] = (uint8_t) (PCI_DEVICE_ID & 0xff);
-    pci_conf[0x03] = (uint8_t) (PCI_DEVICE_ID >> 8);
+    pci_conf[0x02] = (uint8_t) (device_id & 0xff);
+    pci_conf[0x03] = (uint8_t) (device_id >> 8);
     pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
     pci_conf[0x0a] = PCI_CLASS_SUB_VGA;
     pci_conf[0x0b] = PCI_CLASS_BASE_DISPLAY;
@@ -2809,18 +3250,21 @@ void pci_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
 
     /* setup VGA */
     s = &d->cirrus_vga;
-    vga_common_init((VGAState *)s, 
+    vga_common_init((VGAState *)s,
                     ds, vga_ram_base, vga_ram_offset, vga_ram_size);
-    cirrus_init_common(s);
-    s->sr[0x17] = CIRRUS_BUSTYPE_PCI;
+    cirrus_init_common(s, device_id, 1);
+
+    graphic_console_init(s->ds, s->update, s->invalidate, s->screen_dump, s);
+
+    s->pci_dev = (PCIDevice *)d;
 
     /* setup memory space */
     /* memory #0 LFB */
     /* memory #1 memory-mapped I/O */
     /* XXX: s->vram_size must be a power of two */
-    pci_register_io_region((PCIDevice *)d, 0, s->vram_size,
+    pci_register_io_region((PCIDevice *)d, 0, 0x2000000,
                           PCI_ADDRESS_SPACE_MEM_PREFETCH, cirrus_pci_lfb_map);
-    if (CIRRUS_ID == CIRRUS_ID_CLGD5446) {
+    if (device_id == CIRRUS_ID_CLGD5446) {
         pci_register_io_region((PCIDevice *)d, 1, CIRRUS_PNPMMIO_SIZE,
                                PCI_ADDRESS_SPACE_MEM, cirrus_pci_mmio_map);
     }