Maemo patchset 20101501+0m5
[h-e-n] / drivers / usb / musb / musb_gadget.c
index d6a802c..70d6f91 100644 (file)
@@ -106,50 +106,207 @@ __acquires(ep->musb->lock)
 {
        struct musb_request     *req;
        struct musb             *musb;
-       int                     busy = ep->busy;
 
        req = to_musb_request(request);
+       req->complete = false;
 
        list_del(&request->list);
        if (req->request.status == -EINPROGRESS)
                req->request.status = status;
        musb = req->musb;
 
-       ep->busy = 1;
        spin_unlock(&musb->lock);
-       if (is_dma_capable()) {
-               if (req->mapped) {
-                       dma_unmap_single(musb->controller,
-                                       req->request.dma,
-                                       req->request.length,
-                                       req->tx
-                                               ? DMA_TO_DEVICE
-                                               : DMA_FROM_DEVICE);
-                       req->request.dma = DMA_ADDR_INVALID;
-                       req->mapped = 0;
-               } else if (req->request.dma != DMA_ADDR_INVALID)
-                       dma_sync_single_for_cpu(musb->controller,
-                                       req->request.dma,
-                                       req->request.length,
-                                       req->tx
-                                               ? DMA_TO_DEVICE
-                                               : DMA_FROM_DEVICE);
-       }
-       if (request->status == 0)
+       if (request->status == 0) {
                DBG(5, "%s done request %p,  %d/%d\n",
-                               ep->end_point.name, request,
-                               req->request.actual, req->request.length);
-       else
+                   ep->name, request, req->request.actual,
+                   req->request.length);
+       } else
                DBG(2, "%s request %p, %d/%d fault %d\n",
-                               ep->end_point.name, request,
+                               ep->name, request,
                                req->request.actual, req->request.length,
                                request->status);
        req->request.complete(&req->ep->end_point, &req->request);
        spin_lock(&musb->lock);
-       ep->busy = busy;
 }
 
-/* ----------------------------------------------------------------------- */
+/**
+ * start_dma - starts dma for a transfer
+ * @musb:      musb controller pointer
+ * @epnum:     endpoint number to kick dma
+ * @req:       musb request to be received
+ *
+ * Context: controller locked, IRQs blocked, endpoint selected
+ */
+static int start_dma(struct musb *musb, struct musb_request *req)
+{
+       struct musb_ep          *musb_ep = req->ep;
+       struct dma_controller   *cntr = musb->dma_controller;
+       struct musb_hw_ep       *hw_ep = musb_ep->hw_ep;
+       struct dma_channel      *dma;
+       void __iomem            *epio;
+       size_t                  transfer_size;
+       int                     packet_sz;
+       u16                     csr;
+
+       if (!musb->use_dma || musb->dma_controller == NULL)
+               return -1;
+
+       if (musb_ep->type == USB_ENDPOINT_XFER_INT) {
+               DBG(5, "not allocating dma for interrupt endpoint\n");
+               return -1;
+       }
+
+       if (((unsigned long) req->request.buf) & 0x01) {
+               DBG(5, "unaligned buffer %p for %s\n", req->request.buf,
+                   musb_ep->name);
+               return -1;
+       }
+
+       packet_sz = musb_ep->packet_sz;
+       transfer_size = req->request.length;
+
+       if (transfer_size < packet_sz ||
+           (transfer_size == packet_sz && packet_sz < 512)) {
+               DBG(4, "small transfer, using pio\n");
+               return -1;
+       }
+
+       epio = musb->endpoints[musb_ep->current_epnum].regs;
+       if (!musb_ep->is_in) {
+               csr = musb_readw(epio, MUSB_RXCSR);
+
+               /* If RXPKTRDY we might have something already waiting
+                * in the fifo. If that something is less than packet_sz
+                * it means we only have a short packet waiting in the fifo
+                * so we unload it with pio.
+                */
+               if (csr & MUSB_RXCSR_RXPKTRDY) {
+                       u16 count;
+
+                       count = musb_readw(epio, MUSB_RXCOUNT);
+                       if (count < packet_sz) {
+                               DBG(4, "small packet in FIFO (%d bytes), "
+                                   "using PIO\n", count);
+                               return -1;
+                       }
+               }
+       }
+
+       dma = cntr->channel_alloc(cntr, hw_ep, musb_ep->is_in);
+       if (dma == NULL) {
+               DBG(4, "unable to allocate dma channel for %s\n",
+                   musb_ep->name);
+               return -1;
+       }
+
+       if (transfer_size > dma->max_len)
+               transfer_size = dma->max_len;
+
+       if (req->request.dma == DMA_ADDR_INVALID) {
+               req->request.dma = dma_map_single(musb->controller,
+                                                 req->request.buf,
+                                                 transfer_size,
+                                                 musb_ep->is_in ?
+                                                 DMA_TO_DEVICE :
+                                                 DMA_FROM_DEVICE);
+               req->mapped = 1;
+       } else {
+               dma_sync_single_for_device(musb->controller,
+                                          req->request.dma,
+                                          transfer_size,
+                                          musb_ep->is_in ? DMA_TO_DEVICE :
+                                          DMA_FROM_DEVICE);
+               req->mapped = 0;
+       }
+
+       if (musb_ep->is_in) {
+               csr = musb_readw(epio, MUSB_TXCSR);
+               csr |= MUSB_TXCSR_DMAENAB | MUSB_TXCSR_DMAMODE;
+               csr |= MUSB_TXCSR_AUTOSET | MUSB_TXCSR_MODE;
+               csr &= ~MUSB_TXCSR_P_UNDERRUN;
+               musb_writew(epio, MUSB_TXCSR, csr);
+       } else {
+               /* We only use mode1 dma and assume we never know the size of
+                * the data we're receiving. For anything else, we're gonna use
+                * pio.
+                */
+
+               /* this special sequence is necessary to get DMAReq to
+                * activate
+                */
+               csr = musb_readw(epio, MUSB_RXCSR);
+               csr |= MUSB_RXCSR_AUTOCLEAR;
+               musb_writew(epio, MUSB_RXCSR, csr);
+
+               csr |= MUSB_RXCSR_DMAENAB;
+               musb_writew(epio, MUSB_RXCSR, csr);
+
+               csr |= MUSB_RXCSR_DMAMODE;
+               musb_writew(epio, MUSB_RXCSR, csr);
+               musb_writew(epio, MUSB_RXCSR, csr);
+
+               csr = musb_readw(epio, MUSB_RXCSR);
+       }
+
+       musb_ep->dma = dma;
+
+       (void) cntr->channel_program(dma, packet_sz, true, req->request.dma,
+                                    transfer_size);
+
+       DBG(4, "%s dma started (addr 0x%08x, len %u, CSR %04x)\n",
+           musb_ep->name, req->request.dma, transfer_size, csr);
+
+       return 0;
+}
+
+/**
+ * stop_dma - stops a dma transfer and unmaps a buffer
+ * @musb:      the musb controller pointer
+ * @ep:                the enpoint being used
+ * @req:       the request to stop
+ */
+static void stop_dma(struct musb *musb, struct musb_ep *ep,
+                       struct musb_request *req)
+{
+       void __iomem *epio;
+
+       DBG(4, "%s dma stopped (addr 0x%08x, len %d)\n", ep->name,
+                       req->request.dma, req->request.actual);
+
+       if (req->mapped) {
+               dma_unmap_single(musb->controller, req->request.dma,
+                                req->request.actual, req->tx ?
+                                DMA_TO_DEVICE : DMA_FROM_DEVICE);
+               req->request.dma = DMA_ADDR_INVALID;
+               req->mapped = 0;
+       } else {
+               dma_sync_single_for_cpu(musb->controller, req->request.dma,
+                                       req->request.actual, req->tx ?
+                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+       }
+
+       epio = musb->endpoints[ep->current_epnum].regs;
+       if (req->tx) {
+               u16 csr;
+
+               csr = musb_readw(epio, MUSB_TXCSR);
+               csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_AUTOSET);
+               musb_writew(epio, MUSB_TXCSR, csr | MUSB_TXCSR_P_WZC_BITS);
+               csr &= ~MUSB_TXCSR_DMAMODE;
+               musb_writew(epio, MUSB_TXCSR, csr | MUSB_TXCSR_P_WZC_BITS);
+       } else {
+               u16 csr;
+
+               csr = musb_readw(epio, MUSB_RXCSR);
+               csr &= ~(MUSB_RXCSR_DMAENAB | MUSB_RXCSR_AUTOCLEAR);
+               musb_writew(epio, MUSB_RXCSR, csr | MUSB_RXCSR_P_WZC_BITS);
+               csr &= ~MUSB_RXCSR_DMAMODE;
+               musb_writew(epio, MUSB_RXCSR, csr | MUSB_RXCSR_P_WZC_BITS);
+       }
+
+       musb->dma_controller->channel_release(ep->dma);
+       ep->dma = NULL;
+}
 
 /*
  * Abort requests queued to an endpoint using the status. Synchronous.
@@ -157,31 +314,55 @@ __acquires(ep->musb->lock)
  */
 static void nuke(struct musb_ep *ep, const int status)
 {
+       void __iomem            *epio;
        struct musb_request     *req = NULL;
-       void __iomem *epio = ep->musb->endpoints[ep->current_epnum].regs;
+       struct musb             *musb;
 
+       musb = ep->musb;
+       epio = musb->endpoints[ep->current_epnum].regs;
        ep->busy = 1;
 
-       if (is_dma_capable() && ep->dma) {
-               struct dma_controller   *c = ep->musb->dma_controller;
-               int value;
+       DBG(2, "%s nuke, DMA %p RxCSR %04x TxCSR %04x\n", ep->name, ep->dma,
+           musb_readw(epio, MUSB_RXCSR), musb_readw(epio, MUSB_TXCSR));
+       if (ep->dma) {
+               struct dma_controller   *c = musb->dma_controller;
+
+               BUG_ON(next_request(ep) == NULL);
+               req = to_musb_request(next_request(ep));
+               (void) c->channel_abort(ep->dma);
+               stop_dma(musb, ep, req);
+
                if (ep->is_in) {
-                       musb_writew(epio, MUSB_TXCSR,
-                                       0 | MUSB_TXCSR_FLUSHFIFO);
-                       musb_writew(epio, MUSB_TXCSR,
-                                       0 | MUSB_TXCSR_FLUSHFIFO);
+                       u16 csr;
+
+                       csr = musb_readw(epio, MUSB_TXCSR);
+                       musb_writew(epio, MUSB_TXCSR, MUSB_TXCSR_DMAENAB
+                                       | MUSB_TXCSR_FLUSHFIFO);
+                       musb_writew(epio, MUSB_TXCSR, MUSB_TXCSR_FLUSHFIFO);
+                       if (csr & MUSB_TXCSR_TXPKTRDY) {
+                               /* If TxPktRdy was set, an extra IRQ was just
+                                * generated. This IRQ will confuse things if
+                                * a we don't handle it before a new TX request
+                                * is started. So we clear it here, in a bit
+                                * unsafe fashion (if nuke() is called outside
+                                * musb_interrupt(), we might have a delay in
+                                * handling other TX EPs.) */
+                               musb->int_tx |= musb_readw(musb->mregs,
+                                                          MUSB_INTRTX);
+                               musb->int_tx &= ~(1 << ep->current_epnum);
+                       }
                } else {
-                       musb_writew(epio, MUSB_RXCSR,
-                                       0 | MUSB_RXCSR_FLUSHFIFO);
-                       musb_writew(epio, MUSB_RXCSR,
-                                       0 | MUSB_RXCSR_FLUSHFIFO);
+                       musb_writew(epio, MUSB_RXCSR, MUSB_RXCSR_DMAENAB
+                                       | MUSB_RXCSR_FLUSHFIFO);
+                       musb_writew(epio, MUSB_RXCSR, MUSB_RXCSR_FLUSHFIFO);
                }
-
-               value = c->channel_abort(ep->dma);
-               DBG(value ? 1 : 6, "%s: abort DMA --> %d\n", ep->name, value);
-               c->channel_release(ep->dma);
-               ep->dma = NULL;
        }
+       if (ep->is_in)
+               musb_writew(epio, MUSB_TXCSR, 0);
+       else
+               musb_writew(epio, MUSB_RXCSR, 0);
+
+       ep->rx_pending = false;
 
        while (!list_empty(&(ep->req_list))) {
                req = container_of(ep->req_list.next, struct musb_request,
@@ -207,81 +388,43 @@ static inline int max_ep_writesize(struct musb *musb, struct musb_ep *ep)
                return ep->packet_sz;
 }
 
-
-#ifdef CONFIG_USB_INVENTRA_DMA
-
-/* Peripheral tx (IN) using Mentor DMA works as follows:
-       Only mode 0 is used for transfers <= wPktSize,
-       mode 1 is used for larger transfers,
-
-       One of the following happens:
-       - Host sends IN token which causes an endpoint interrupt
-               -> TxAvail
-                       -> if DMA is currently busy, exit.
-                       -> if queue is non-empty, txstate().
-
-       - Request is queued by the gadget driver.
-               -> if queue was previously empty, txstate()
-
-       txstate()
-               -> start
-                 /\    -> setup DMA
-                 |     (data is transferred to the FIFO, then sent out when
-                 |     IN token(s) are recd from Host.
-                 |             -> DMA interrupt on completion
-                 |                calls TxAvail.
-                 |                   -> stop DMA, ~DmaEenab,
-                 |                   -> set TxPktRdy for last short pkt or zlp
-                 |                   -> Complete Request
-                 |                   -> Continue next request (call txstate)
-                 |___________________________________|
-
- * Non-Mentor DMA engines can of course work differently, such as by
- * upleveling from irq-per-packet to irq-per-buffer.
- */
-
-#endif
-
-/*
- * An endpoint is transmitting data. This can be called either from
- * the IRQ routine or from ep.queue() to kickstart a request on an
- * endpoint.
+/**
+ * do_pio_tx - kicks TX pio transfer
+ * @musb:      musb controller pointer
+ * @req:       the request to be transfered via pio
+ *
+ * An endpoint is transmitting data. This can be called from
+ * the IRQ routine.
  *
  * Context: controller locked, IRQs blocked, endpoint selected
  */
-static void txstate(struct musb *musb, struct musb_request *req)
+static void do_pio_tx(struct musb *musb, struct musb_request *req)
 {
        u8                      epnum = req->epnum;
        struct musb_ep          *musb_ep;
        void __iomem            *epio = musb->endpoints[epnum].regs;
        struct usb_request      *request;
        u16                     fifo_count = 0, csr;
-       int                     use_dma = 0;
 
        musb_ep = req->ep;
 
-       /* we shouldn't get here while DMA is active ... but we do ... */
-       if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) {
-               DBG(4, "dma pending...\n");
-               return;
-       }
-
        /* read TXCSR before */
        csr = musb_readw(epio, MUSB_TXCSR);
 
        request = &req->request;
+
        fifo_count = min(max_ep_writesize(musb, musb_ep),
                        (int)(request->length - request->actual));
 
        if (csr & MUSB_TXCSR_TXPKTRDY) {
                DBG(5, "%s old packet still ready , txcsr %03x\n",
-                               musb_ep->end_point.name, csr);
+                               musb_ep->name, csr);
                return;
        }
 
        if (csr & MUSB_TXCSR_P_SENDSTALL) {
                DBG(5, "%s stalling, txcsr %03x\n",
-                               musb_ep->end_point.name, csr);
+                               musb_ep->name, csr);
                return;
        }
 
@@ -289,107 +432,17 @@ static void txstate(struct musb *musb, struct musb_request *req)
                        epnum, musb_ep->packet_sz, fifo_count,
                        csr);
 
-#ifndef        CONFIG_MUSB_PIO_ONLY
-       if (is_dma_capable() && musb_ep->dma) {
-               struct dma_controller   *c = musb->dma_controller;
-
-               use_dma = (request->dma != DMA_ADDR_INVALID);
-
-               /* MUSB_TXCSR_P_ISO is still set correctly */
-
-#ifdef CONFIG_USB_INVENTRA_DMA
-               {
-                       size_t request_size;
-
-                       /* setup DMA, then program endpoint CSR */
-                       request_size = min(request->length,
-                                               musb_ep->dma->max_len);
-                       if (request_size <= musb_ep->packet_sz)
-                               musb_ep->dma->desired_mode = 0;
-                       else
-                               musb_ep->dma->desired_mode = 1;
-
-                       use_dma = use_dma && c->channel_program(
-                                       musb_ep->dma, musb_ep->packet_sz,
-                                       musb_ep->dma->desired_mode,
-                                       request->dma, request_size);
-                       if (use_dma) {
-                               if (musb_ep->dma->desired_mode == 0) {
-                                       /* ASSERT: DMAENAB is clear */
-                                       csr &= ~(MUSB_TXCSR_AUTOSET |
-                                                       MUSB_TXCSR_DMAMODE);
-                                       csr |= (MUSB_TXCSR_DMAENAB |
-                                                       MUSB_TXCSR_MODE);
-                                       /* against programming guide */
-                               } else
-                                       csr |= (MUSB_TXCSR_AUTOSET
-                                                       | MUSB_TXCSR_DMAENAB
-                                                       | MUSB_TXCSR_DMAMODE
-                                                       | MUSB_TXCSR_MODE);
-
-                               csr &= ~MUSB_TXCSR_P_UNDERRUN;
-                               musb_writew(epio, MUSB_TXCSR, csr);
-                       }
-               }
-
-#elif defined(CONFIG_USB_TI_CPPI_DMA)
-               /* program endpoint CSR first, then setup DMA */
-               csr &= ~(MUSB_TXCSR_AUTOSET
-                               | MUSB_TXCSR_DMAMODE
-                               | MUSB_TXCSR_P_UNDERRUN
-                               | MUSB_TXCSR_TXPKTRDY);
-               csr |= MUSB_TXCSR_MODE | MUSB_TXCSR_DMAENAB;
-               musb_writew(epio, MUSB_TXCSR,
-                       (MUSB_TXCSR_P_WZC_BITS & ~MUSB_TXCSR_P_UNDERRUN)
-                               | csr);
-
-               /* ensure writebuffer is empty */
-               csr = musb_readw(epio, MUSB_TXCSR);
-
-               /* NOTE host side sets DMAENAB later than this; both are
-                * OK since the transfer dma glue (between CPPI and Mentor
-                * fifos) just tells CPPI it could start.  Data only moves
-                * to the USB TX fifo when both fifos are ready.
-                */
-
-               /* "mode" is irrelevant here; handle terminating ZLPs like
-                * PIO does, since the hardware RNDIS mode seems unreliable
-                * except for the last-packet-is-already-short case.
-                */
-               use_dma = use_dma && c->channel_program(
-                               musb_ep->dma, musb_ep->packet_sz,
-                               0,
-                               request->dma,
-                               request->length);
-               if (!use_dma) {
-                       c->channel_release(musb_ep->dma);
-                       musb_ep->dma = NULL;
-                       /* ASSERT: DMAENAB clear */
-                       csr &= ~(MUSB_TXCSR_DMAMODE | MUSB_TXCSR_MODE);
-                       /* invariant: prequest->buf is non-null */
-               }
-#elif defined(CONFIG_USB_TUSB_OMAP_DMA)
-               use_dma = use_dma && c->channel_program(
-                               musb_ep->dma, musb_ep->packet_sz,
-                               request->zero,
-                               request->dma,
-                               request->length);
-#endif
-       }
-#endif
-
-       if (!use_dma) {
-               musb_write_fifo(musb_ep->hw_ep, fifo_count,
-                               (u8 *) (request->buf + request->actual));
-               request->actual += fifo_count;
-               csr |= MUSB_TXCSR_TXPKTRDY;
-               csr &= ~MUSB_TXCSR_P_UNDERRUN;
-               musb_writew(epio, MUSB_TXCSR, csr);
-       }
+       musb_write_fifo(musb_ep->hw_ep, fifo_count,
+                       (u8 *) (request->buf + request->actual));
+       request->actual += fifo_count;
+       csr |= MUSB_TXCSR_TXPKTRDY;
+       /* REVISIT wasn't this cleared by musb_g_tx() ? */
+       csr &= ~MUSB_TXCSR_P_UNDERRUN;
+       musb_writew(epio, MUSB_TXCSR, csr);
 
        /* host may already have the data when this message shows... */
-       DBG(3, "%s TX/IN %s len %d/%d, txcsr %04x, fifo %d/%d\n",
-                       musb_ep->end_point.name, use_dma ? "dma" : "pio",
+       DBG(3, "%s TX/IN pio len %d/%d, txcsr %04x, fifo %d/%d\n",
+                       musb_ep->name,
                        request->actual, request->length,
                        musb_readw(epio, MUSB_TXCSR),
                        fifo_count,
@@ -397,341 +450,241 @@ static void txstate(struct musb *musb, struct musb_request *req)
 }
 
 /*
+ * Context: controller locked, IRQs blocked.
+ */
+static void musb_ep_restart(struct musb *musb, struct musb_request *req)
+{
+       DBG(3, "<== TX/IN request %p len %u on hw_ep%d%s\n",
+               &req->request, req->request.length, req->epnum,
+               req->ep->dma ? " (dma)" : "(pio)");
+
+       musb_ep_select(musb->mregs, req->epnum);
+
+       if (start_dma(musb, req) < 0)
+               do_pio_tx(musb, req);
+}
+
+/*
  * FIFO state update (e.g. data ready).
  * Called from IRQ,  with controller locked.
  */
 void musb_g_tx(struct musb *musb, u8 epnum)
 {
        u16                     csr;
+       struct musb_request     *req;
        struct usb_request      *request;
        u8 __iomem              *mbase = musb->mregs;
        struct musb_ep          *musb_ep = &musb->endpoints[epnum].ep_in;
        void __iomem            *epio = musb->endpoints[epnum].regs;
        struct dma_channel      *dma;
+       int                     count;
 
        musb_ep_select(mbase, epnum);
        request = next_request(musb_ep);
 
        csr = musb_readw(epio, MUSB_TXCSR);
-       DBG(4, "<== %s, txcsr %04x\n", musb_ep->end_point.name, csr);
+       dma = musb_ep->dma;
+       DBG(4, "<== %s, TxCSR %04x, DMA %p\n", musb_ep->name, csr, dma);
 
-       dma = is_dma_capable() ? musb_ep->dma : NULL;
-       do {
-               /* REVISIT for high bandwidth, MUSB_TXCSR_P_INCOMPTX
-                * probably rates reporting as a host error
-                */
-               if (csr & MUSB_TXCSR_P_SENTSTALL) {
-                       csr |= MUSB_TXCSR_P_WZC_BITS;
-                       csr &= ~MUSB_TXCSR_P_SENTSTALL;
-                       musb_writew(epio, MUSB_TXCSR, csr);
-                       if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
-                               dma->status = MUSB_DMA_STATUS_CORE_ABORT;
-                               musb->dma_controller->channel_abort(dma);
-                       }
-
-                       if (request)
-                               musb_g_giveback(musb_ep, request, -EPIPE);
+       if (csr & MUSB_TXCSR_P_SENDSTALL) {
+               DBG(5, "%s stalling, txcsr %04x\n",
+                               musb_ep->name, csr);
+               return;
+       }
 
-                       break;
+       /* REVISIT for high bandwidth, MUSB_TXCSR_P_INCOMPTX
+        * probably rates reporting as a host error
+        */
+       if (csr & MUSB_TXCSR_P_SENTSTALL) {
+               DBG(5, "ep%d is halted, cannot transfer\n", epnum);
+               csr |= MUSB_TXCSR_P_WZC_BITS;
+               csr &= ~MUSB_TXCSR_P_SENTSTALL;
+               musb_writew(epio, MUSB_TXCSR, csr);
+               if (dma != NULL) {
+                       BUG_ON(request == NULL);
+                       dma->status = MUSB_DMA_STATUS_CORE_ABORT;
+                       musb->dma_controller->channel_abort(dma);
+                       stop_dma(musb, musb_ep, to_musb_request(request));
+                       dma = NULL;
                }
 
-               if (csr & MUSB_TXCSR_P_UNDERRUN) {
-                       /* we NAKed, no big deal ... little reason to care */
-                       csr |= MUSB_TXCSR_P_WZC_BITS;
-                       csr &= ~(MUSB_TXCSR_P_UNDERRUN
-                                       | MUSB_TXCSR_TXPKTRDY);
-                       musb_writew(epio, MUSB_TXCSR, csr);
-                       DBG(20, "underrun on ep%d, req %p\n", epnum, request);
-               }
+               if (request && musb_ep->stalled)
+                       musb_g_giveback(musb_ep, request, -EPIPE);
 
-               if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
-                       /* SHOULD NOT HAPPEN ... has with cppi though, after
-                        * changing SENDSTALL (and other cases); harmless?
-                        */
-                       DBG(5, "%s dma still busy?\n", musb_ep->end_point.name);
-                       break;
+               return;
+       }
+
+       if (csr & MUSB_TXCSR_P_UNDERRUN) {
+               /* we NAKed, no big deal ... little reason to care */
+               csr |= MUSB_TXCSR_P_WZC_BITS;
+               csr &= ~MUSB_TXCSR_P_UNDERRUN;
+               musb_writew(epio, MUSB_TXCSR, csr);
+               DBG(2, "underrun on ep%d, req %p\n", epnum, request);
+       }
+
+       /* The interrupt is generated when this bit gets cleared,
+        * if we fall here while TXPKTRDY is still set, then that's
+        * a really messed up case. One such case seems to be due to
+        * the HW -- sometimes the IRQ is generated early.
+        */
+       count = 0;
+       while (csr & MUSB_TXCSR_TXPKTRDY) {
+               count++;
+               if (count == 1000) {
+                       DBG(1, "TX IRQ while TxPktRdy still set "
+                           "(CSR %04x)\n", csr);
+                       return;
                }
+               csr = musb_readw(epio, MUSB_TXCSR);
+       }
 
-               if (request) {
-                       u8      is_dma = 0;
-
-                       if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
-                               is_dma = 1;
-                               csr |= MUSB_TXCSR_P_WZC_BITS;
-                               csr &= ~(MUSB_TXCSR_DMAENAB
-                                               | MUSB_TXCSR_P_UNDERRUN
-                                               | MUSB_TXCSR_TXPKTRDY);
-                               musb_writew(epio, MUSB_TXCSR, csr);
-                               /* ensure writebuffer is empty */
-                               csr = musb_readw(epio, MUSB_TXCSR);
-                               request->actual += musb_ep->dma->actual_len;
-                               DBG(4, "TXCSR%d %04x, dma off, "
-                                               "len %zu, req %p\n",
-                                       epnum, csr,
-                                       musb_ep->dma->actual_len,
-                                       request);
-                       }
+       if (dma != NULL && dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
+               /* SHOULD NOT HAPPEN ... has with cppi though, after
+                * changing SENDSTALL (and other cases); harmless?
+                */
+               DBG(3, "%s dma still busy?\n", musb_ep->name);
+               return;
+       }
 
-                       if (is_dma || request->actual == request->length) {
-
-                               /* First, maybe a terminating short packet.
-                                * Some DMA engines might handle this by
-                                * themselves.
-                                */
-                               if ((request->zero
-                                               && request->length
-                                               && (request->length
-                                                       % musb_ep->packet_sz)
-                                                       == 0)
-#ifdef CONFIG_USB_INVENTRA_DMA
-                                       || (is_dma &&
-                                               ((!dma->desired_mode) ||
-                                                   (request->actual &
-                                                   (musb_ep->packet_sz - 1))))
-#endif
-                               ) {
-                                       /* on dma completion, fifo may not
-                                        * be available yet ...
-                                        */
-                                       if (csr & MUSB_TXCSR_TXPKTRDY)
-                                               break;
-
-                                       DBG(4, "sending zero pkt\n");
-                                       musb_writew(epio, MUSB_TXCSR,
-                                                       MUSB_TXCSR_MODE
-                                                       | MUSB_TXCSR_TXPKTRDY);
-                                       request->zero = 0;
-                               }
-
-                               /* ... or if not, then complete it */
-                               musb_g_giveback(musb_ep, request, 0);
-
-                               /* kickstart next transfer if appropriate;
-                                * the packet that just completed might not
-                                * be transmitted for hours or days.
-                                * REVISIT for double buffering...
-                                * FIXME revisit for stalls too...
-                                */
-                               musb_ep_select(mbase, epnum);
+       if (request == NULL) {
+               DBG(2, "%s, spurious TX IRQ", musb_ep->name);
+               return;
+       }
+
+       req = to_musb_request(request);
+
+       if (dma) {
+               int short_packet = 0;
+
+               BUG_ON(!(csr & MUSB_TXCSR_DMAENAB));
+
+               request->actual += dma->actual_len;
+               DBG(4, "TxCSR%d %04x, dma finished, len %zu, req %p\n",
+                   epnum, csr, dma->actual_len, request);
+
+               stop_dma(musb, musb_ep, req);
+
+               WARN(request->actual != request->length,
+                    "actual %d length %d\n", request->actual,
+                    request->length);
+
+               if (request->length % musb_ep->packet_sz)
+                       short_packet = 1;
+
+               req->complete = true;
+               if (request->zero || short_packet) {
+                       csr = musb_readw(epio, MUSB_TXCSR);
+                       DBG(4, "sending zero pkt, DMA, TxCSR %04x\n", csr);
+                       musb_writew(epio, MUSB_TXCSR,
+                                   csr | MUSB_TXCSR_TXPKTRDY);
+                       return;
+               }
+       }
+
+       if (request->actual == request->length) {
+               if (!req->complete) {
+                       /* Maybe we have to send a zero length packet */
+                       if (request->zero && request->length &&
+                           (request->length % musb_ep->packet_sz) == 0) {
                                csr = musb_readw(epio, MUSB_TXCSR);
-                               if (csr & MUSB_TXCSR_FIFONOTEMPTY)
-                                       break;
-                               request = musb_ep->desc
-                                               ? next_request(musb_ep)
-                                               : NULL;
-                               if (!request) {
-                                       DBG(4, "%s idle now\n",
-                                               musb_ep->end_point.name);
-                                       break;
-                               }
+                               DBG(4, "sending zero pkt, TxCSR %04x\n", csr);
+                               musb_writew(epio, MUSB_TXCSR,
+                                           csr | MUSB_TXCSR_TXPKTRDY);
+                               req->complete = true;
+                               return;
                        }
+               }
+               musb_ep->busy = 1;
+               musb_g_giveback(musb_ep, request, 0);
+               musb_ep->busy = 0;
 
-                       txstate(musb, to_musb_request(request));
+               request = musb_ep->desc ? next_request(musb_ep) : NULL;
+               if (!request) {
+                       DBG(4, "%s idle now\n", musb_ep->name);
+                       return;
                }
+               musb_ep_restart(musb, to_musb_request(request));
+               return;
+       }
 
-       } while (0);
+       do_pio_tx(musb, to_musb_request(request));
 }
 
 /* ------------------------------------------------------------ */
 
-#ifdef CONFIG_USB_INVENTRA_DMA
-
-/* Peripheral rx (OUT) using Mentor DMA works as follows:
-       - Only mode 0 is used.
-
-       - Request is queued by the gadget class driver.
-               -> if queue was previously empty, rxstate()
-
-       - Host sends OUT token which causes an endpoint interrupt
-         /\      -> RxReady
-         |           -> if request queued, call rxstate
-         |             /\      -> setup DMA
-         |             |            -> DMA interrupt on completion
-         |             |               -> RxReady
-         |             |                     -> stop DMA
-         |             |                     -> ack the read
-         |             |                     -> if data recd = max expected
-         |             |                               by the request, or host
-         |             |                               sent a short packet,
-         |             |                               complete the request,
-         |             |                               and start the next one.
-         |             |_____________________________________|
-         |                                      else just wait for the host
-         |                                         to send the next OUT token.
-         |__________________________________________________|
-
- * Non-Mentor DMA engines can of course work differently.
- */
-
-#endif
-
-/*
+/**
+ * do_pio_rx - kicks RX pio transfer
+ * @musb:      musb controller pointer
+ * @req:       the request to be transfered via pio
+ *
  * Context: controller locked, IRQs blocked, endpoint selected
  */
-static void rxstate(struct musb *musb, struct musb_request *req)
+static void do_pio_rx(struct musb *musb, struct musb_request *req)
 {
        u16                     csr = 0;
        const u8                epnum = req->epnum;
        struct usb_request      *request = &req->request;
        struct musb_ep          *musb_ep = &musb->endpoints[epnum].ep_out;
        void __iomem            *epio = musb->endpoints[epnum].regs;
-       u16                     fifo_count = 0;
-       u16                     len = musb_ep->packet_sz;
+       unsigned                fifo_count = 0;
+       u16                     count = musb_ep->packet_sz;
+       int                     retries = 1000;
 
        csr = musb_readw(epio, MUSB_RXCSR);
 
-       if (is_cppi_enabled() && musb_ep->dma) {
-               struct dma_controller   *c = musb->dma_controller;
-               struct dma_channel      *channel = musb_ep->dma;
-
-               /* NOTE:  CPPI won't actually stop advancing the DMA
-                * queue after short packet transfers, so this is almost
-                * always going to run as IRQ-per-packet DMA so that
-                * faults will be handled correctly.
-                */
-               if (c->channel_program(channel,
-                               musb_ep->packet_sz,
-                               !request->short_not_ok,
-                               request->dma + request->actual,
-                               request->length - request->actual)) {
-
-                       /* make sure that if an rxpkt arrived after the irq,
-                        * the cppi engine will be ready to take it as soon
-                        * as DMA is enabled
-                        */
-                       csr &= ~(MUSB_RXCSR_AUTOCLEAR
-                                       | MUSB_RXCSR_DMAMODE);
-                       csr |= MUSB_RXCSR_DMAENAB | MUSB_RXCSR_P_WZC_BITS;
-                       musb_writew(epio, MUSB_RXCSR, csr);
-                       return;
+       /* RxPktRdy should be the only possibility here.
+        * Sometimes the IRQ is generated before
+        * RxPktRdy gets set, so we'll wait a while. */
+       while (!(csr & MUSB_RXCSR_RXPKTRDY)) {
+               if (retries-- == 0) {
+                       DBG(1, "RxPktRdy did not get set (CSR %04x)\n", csr);
+                       BUG_ON(!(csr & MUSB_RXCSR_RXPKTRDY));
                }
+               csr = musb_readw(epio, MUSB_RXCSR);
        }
 
-       if (csr & MUSB_RXCSR_RXPKTRDY) {
-               len = musb_readw(epio, MUSB_RXCOUNT);
-               if (request->actual < request->length) {
-#ifdef CONFIG_USB_INVENTRA_DMA
-                       if (is_dma_capable() && musb_ep->dma) {
-                               struct dma_controller   *c;
-                               struct dma_channel      *channel;
-                               int                     use_dma = 0;
-
-                               c = musb->dma_controller;
-                               channel = musb_ep->dma;
-
-       /* We use DMA Req mode 0 in rx_csr, and DMA controller operates in
-        * mode 0 only. So we do not get endpoint interrupts due to DMA
-        * completion. We only get interrupts from DMA controller.
-        *
-        * We could operate in DMA mode 1 if we knew the size of the tranfer
-        * in advance. For mass storage class, request->length = what the host
-        * sends, so that'd work.  But for pretty much everything else,
-        * request->length is routinely more than what the host sends. For
-        * most these gadgets, end of is signified either by a short packet,
-        * or filling the last byte of the buffer.  (Sending extra data in
-        * that last pckate should trigger an overflow fault.)  But in mode 1,
-        * we don't get DMA completion interrrupt for short packets.
-        *
-        * Theoretically, we could enable DMAReq irq (MUSB_RXCSR_DMAMODE = 1),
-        * to get endpoint interrupt on every DMA req, but that didn't seem
-        * to work reliably.
-        *
-        * REVISIT an updated g_file_storage can set req->short_not_ok, which
-        * then becomes usable as a runtime "use mode 1" hint...
-        */
+       musb_ep->busy = 1;
 
-                               csr |= MUSB_RXCSR_DMAENAB;
-#ifdef USE_MODE1
-                               csr |= MUSB_RXCSR_AUTOCLEAR;
-                               /* csr |= MUSB_RXCSR_DMAMODE; */
-
-                               /* this special sequence (enabling and then
-                                * disabling MUSB_RXCSR_DMAMODE) is required
-                                * to get DMAReq to activate
-                                */
-                               musb_writew(epio, MUSB_RXCSR,
-                                       csr | MUSB_RXCSR_DMAMODE);
-#endif
-                               musb_writew(epio, MUSB_RXCSR, csr);
-
-                               if (request->actual < request->length) {
-                                       int transfer_size = 0;
-#ifdef USE_MODE1
-                                       transfer_size = min(request->length,
-                                                       channel->max_len);
-#else
-                                       transfer_size = len;
-#endif
-                                       if (transfer_size <= musb_ep->packet_sz)
-                                               musb_ep->dma->desired_mode = 0;
-                                       else
-                                               musb_ep->dma->desired_mode = 1;
-
-                                       use_dma = c->channel_program(
-                                                       channel,
-                                                       musb_ep->packet_sz,
-                                                       channel->desired_mode,
-                                                       request->dma
-                                                       + request->actual,
-                                                       transfer_size);
-                               }
-
-                               if (use_dma)
-                                       return;
-                       }
-#endif /* Mentor's DMA */
-
-                       fifo_count = request->length - request->actual;
-                       DBG(3, "%s OUT/RX pio fifo %d/%d, maxpacket %d\n",
-                                       musb_ep->end_point.name,
-                                       len, fifo_count,
-                                       musb_ep->packet_sz);
-
-                       fifo_count = min(len, fifo_count);
-
-#ifdef CONFIG_USB_TUSB_OMAP_DMA
-                       if (tusb_dma_omap() && musb_ep->dma) {
-                               struct dma_controller *c = musb->dma_controller;
-                               struct dma_channel *channel = musb_ep->dma;
-                               u32 dma_addr = request->dma + request->actual;
-                               int ret;
-
-                               ret = c->channel_program(channel,
-                                               musb_ep->packet_sz,
-                                               channel->desired_mode,
-                                               dma_addr,
-                                               fifo_count);
-                               if (ret)
-                                       return;
-                       }
-#endif
+       count = musb_readw(epio, MUSB_RXCOUNT);
+       if (request->actual < request->length) {
+               fifo_count = request->length - request->actual;
+               DBG(3, "%s OUT/RX pio fifo %d/%d, maxpacket %d\n",
+                               musb_ep->name,
+                               count, fifo_count,
+                               musb_ep->packet_sz);
 
-                       musb_read_fifo(musb_ep->hw_ep, fifo_count, (u8 *)
-                                       (request->buf + request->actual));
-                       request->actual += fifo_count;
+               fifo_count = min_t(unsigned, count, fifo_count);
 
-                       /* REVISIT if we left anything in the fifo, flush
-                        * it and report -EOVERFLOW
-                        */
+               musb_read_fifo(musb_ep->hw_ep, fifo_count,
+                              (u8 *) (request->buf + request->actual));
+               request->actual += fifo_count;
 
-                       /* ack the read! */
-                       csr |= MUSB_RXCSR_P_WZC_BITS;
-                       csr &= ~MUSB_RXCSR_RXPKTRDY;
-                       musb_writew(epio, MUSB_RXCSR, csr);
-               }
+               /* REVISIT if we left anything in the fifo, flush
+                * it and report -EOVERFLOW
+                */
+
+               /* ack the read! */
+               csr |= MUSB_RXCSR_P_WZC_BITS;
+               csr &= ~MUSB_RXCSR_RXPKTRDY;
+               musb_writew(epio, MUSB_RXCSR, csr);
        }
 
-       /* reach the end or short packet detected */
-       if (request->actual == request->length || len < musb_ep->packet_sz)
+       musb_ep->busy = 0;
+
+       /* we just received a short packet, it's ok to
+        * giveback() the request already
+        */
+       if (request->actual == request->length || count < musb_ep->packet_sz)
                musb_g_giveback(musb_ep, request, 0);
 }
 
 /*
  * Data ready for a request; called from IRQ
  */
-void musb_g_rx(struct musb *musb, u8 epnum)
+void musb_g_rx(struct musb *musb, u8 epnum, bool is_dma)
 {
        u16                     csr;
+       struct musb_request     *req;
        struct usb_request      *request;
        void __iomem            *mbase = musb->mregs;
        struct musb_ep          *musb_ep = &musb->endpoints[epnum].ep_out;
@@ -740,28 +693,42 @@ void musb_g_rx(struct musb *musb, u8 epnum)
 
        musb_ep_select(mbase, epnum);
 
+       csr = musb_readw(epio, MUSB_RXCSR);
+restart:
+       if (csr == 0) {
+               DBG(3, "spurious IRQ\n");
+               return;
+       }
+
        request = next_request(musb_ep);
+       if (!request) {
+               DBG(1, "waiting for request for %s (csr %04x)\n",
+                               musb_ep->name, csr);
+               musb_ep->rx_pending = true;
+               return;
+       }
 
-       csr = musb_readw(epio, MUSB_RXCSR);
-       dma = is_dma_capable() ? musb_ep->dma : NULL;
+       dma = musb_ep->dma;
 
-       DBG(4, "<== %s, rxcsr %04x%s %p\n", musb_ep->end_point.name,
-                       csr, dma ? " (dma)" : "", request);
+       DBG(4, "<== %s, rxcsr %04x %p (dma %s, %s)\n", musb_ep->name,
+           csr, request, dma ? "enabled" : "disabled",
+           is_dma ? "true" : "false");
 
        if (csr & MUSB_RXCSR_P_SENTSTALL) {
-               if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
-                       dma->status = MUSB_DMA_STATUS_CORE_ABORT;
-                       (void) musb->dma_controller->channel_abort(dma);
-                       request->actual += musb_ep->dma->actual_len;
-               }
-
+               DBG(5, "ep%d is halted, cannot transfer\n", epnum);
                csr |= MUSB_RXCSR_P_WZC_BITS;
                csr &= ~MUSB_RXCSR_P_SENTSTALL;
                musb_writew(epio, MUSB_RXCSR, csr);
 
-               if (request)
+               if (dma != NULL &&
+                   dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
+                       dma->status = MUSB_DMA_STATUS_CORE_ABORT;
+                       musb->dma_controller->channel_abort(dma);
+               }
+
+               if (musb_ep->stalled)
                        musb_g_giveback(musb_ep, request, -EPIPE);
-               goto done;
+               return;
        }
 
        if (csr & MUSB_RXCSR_P_OVERRUN) {
@@ -770,76 +737,64 @@ void musb_g_rx(struct musb *musb, u8 epnum)
                musb_writew(epio, MUSB_RXCSR, csr);
 
                DBG(3, "%s iso overrun on %p\n", musb_ep->name, request);
-               if (request && request->status == -EINPROGRESS)
+               if (request->status == -EINPROGRESS)
                        request->status = -EOVERFLOW;
        }
+
        if (csr & MUSB_RXCSR_INCOMPRX) {
                /* REVISIT not necessarily an error */
-               DBG(4, "%s, incomprx\n", musb_ep->end_point.name);
+               DBG(4, "%s, incomprx\n", musb_ep->name);
        }
 
-       if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) {
-               /* "should not happen"; likely RXPKTRDY pending for DMA */
-               DBG((csr & MUSB_RXCSR_DMAENAB) ? 4 : 1,
-                       "%s busy, csr %04x\n",
-                       musb_ep->end_point.name, csr);
-               goto done;
-       }
+       req = to_musb_request(request);
 
-       if (dma && (csr & MUSB_RXCSR_DMAENAB)) {
-               csr &= ~(MUSB_RXCSR_AUTOCLEAR
-                               | MUSB_RXCSR_DMAENAB
-                               | MUSB_RXCSR_DMAMODE);
-               musb_writew(epio, MUSB_RXCSR,
-                       MUSB_RXCSR_P_WZC_BITS | csr);
+       BUG_ON(dma == NULL && (csr & MUSB_RXCSR_DMAENAB));
 
-               request->actual += musb_ep->dma->actual_len;
+       if (dma != NULL) {
+               u32 len;
 
-               DBG(4, "RXCSR%d %04x, dma off, %04x, len %zu, req %p\n",
-                       epnum, csr,
-                       musb_readw(epio, MUSB_RXCSR),
-                       musb_ep->dma->actual_len, request);
-
-#if defined(CONFIG_USB_INVENTRA_DMA) || defined(CONFIG_USB_TUSB_OMAP_DMA)
-               /* Autoclear doesn't clear RxPktRdy for short packets */
-               if ((dma->desired_mode == 0)
-                               || (dma->actual_len
-                                       & (musb_ep->packet_sz - 1))) {
-                       /* ack the read! */
-                       csr &= ~MUSB_RXCSR_RXPKTRDY;
-                       musb_writew(epio, MUSB_RXCSR, csr);
-               }
+               /* We don't handle stalls yet. */
+               BUG_ON(csr & MUSB_RXCSR_P_SENDSTALL);
 
-               /* incomplete, and not short? wait for next IN packet */
-               if ((request->actual < request->length)
-                               && (musb_ep->dma->actual_len
-                                       == musb_ep->packet_sz))
-                       goto done;
-#endif
-               musb_g_giveback(musb_ep, request, 0);
+               /* We abort() so dma->actual_len gets updated */
+               musb->dma_controller->channel_abort(dma);
 
-               request = next_request(musb_ep);
-               if (!request)
-                       goto done;
+               /* We only expect full packets. */
+               BUG_ON(dma->actual_len & (musb_ep->packet_sz - 1));
 
-               /* don't start more i/o till the stall clears */
-               musb_ep_select(mbase, epnum);
-               csr = musb_readw(epio, MUSB_RXCSR);
-               if (csr & MUSB_RXCSR_P_SENDSTALL)
-                       goto done;
-       }
+               request->actual += dma->actual_len;
+               len = dma->actual_len;
 
+               stop_dma(musb, musb_ep, req);
+               dma = NULL;
 
-       /* analyze request if the ep is hot */
-       if (request)
-               rxstate(musb, to_musb_request(request));
-       else
-               DBG(3, "packet waiting for %s%s request\n",
-                               musb_ep->desc ? "" : "inactive ",
-                               musb_ep->end_point.name);
+               DBG(4, "RXCSR%d %04x, dma off, %04x, len %zu, req %p\n",
+                   epnum, csr, musb_readw(epio, MUSB_RXCSR), len, request);
 
-done:
-       return;
+               if (!is_dma) {
+                       /* Unload with pio */
+                       do_pio_rx(musb, req);
+               } else {
+                       BUG_ON(request->actual != request->length);
+                       musb_g_giveback(musb_ep, request, 0);
+               }
+               return;
+       }
+
+       if (dma == NULL && musb->use_dma) {
+               if (start_dma(musb, req) == 0)
+                       dma = musb_ep->dma;
+       }
+
+       if (dma == NULL) {
+               do_pio_rx(musb, req);
+               csr = musb_readw(epio, MUSB_RXCSR);
+               if (csr & MUSB_RXCSR_RXPKTRDY) {
+                       DBG(2, "new packet in FIFO, restarting RX "
+                           "(CSR %04x)\n", csr);
+                       goto restart;
+               }
+       }
 }
 
 /* ------------------------------------------------------------ */
@@ -854,13 +809,15 @@ static int musb_gadget_enable(struct usb_ep *ep,
        struct musb             *musb;
        void __iomem    *mbase;
        u8              epnum;
-       u16             csr;
+       u16             csr = 0;
        unsigned        tmp;
        int             status = -EINVAL;
 
        if (!ep || !desc)
                return -EINVAL;
 
+       DBG(1, "===> enabling %s\n", ep->name);
+
        musb_ep = to_musb_ep(ep);
        hw_ep = musb_ep->hw_ep;
        regs = hw_ep->regs;
@@ -874,10 +831,10 @@ static int musb_gadget_enable(struct usb_ep *ep,
                status = -EBUSY;
                goto fail;
        }
-       musb_ep->type = desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
+       musb_ep->type = usb_endpoint_type(desc);
 
        /* check direction and (later) maxpacket size against endpoint */
-       if ((desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK) != epnum)
+       if (usb_endpoint_num(desc) != epnum)
                goto fail;
 
        /* REVISIT this rules out high bandwidth periodic transfers */
@@ -890,7 +847,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
         * packet size (or fail), set the mode, clear the fifo
         */
        musb_ep_select(mbase, epnum);
-       if (desc->bEndpointAddress & USB_DIR_IN) {
+       if (usb_endpoint_dir_in(desc)) {
                u16 int_txe = musb_readw(mbase, MUSB_INTRTXE);
 
                if (hw_ep->is_shared_fifo)
@@ -908,18 +865,15 @@ static int musb_gadget_enable(struct usb_ep *ep,
                 */
                musb_writew(regs, MUSB_TXMAXP, tmp);
 
+               /* clear DATAx toggle */
                csr = MUSB_TXCSR_MODE | MUSB_TXCSR_CLRDATATOG;
+
                if (musb_readw(regs, MUSB_TXCSR)
                                & MUSB_TXCSR_FIFONOTEMPTY)
                        csr |= MUSB_TXCSR_FLUSHFIFO;
-               if (musb_ep->type == USB_ENDPOINT_XFER_ISOC)
+               if (usb_endpoint_xfer_isoc(desc))
                        csr |= MUSB_TXCSR_P_ISO;
-
-               /* set twice in case of double buffering */
-               musb_writew(regs, MUSB_TXCSR, csr);
-               /* REVISIT may be inappropriate w/o FIFONOTEMPTY ... */
                musb_writew(regs, MUSB_TXCSR, csr);
-
        } else {
                u16 int_rxe = musb_readw(mbase, MUSB_INTRRXE);
 
@@ -945,34 +899,26 @@ static int musb_gadget_enable(struct usb_ep *ep,
                        musb_writew(regs, MUSB_TXCSR, csr);
                }
 
+               /* clear DATAx toggle */
                csr = MUSB_RXCSR_FLUSHFIFO | MUSB_RXCSR_CLRDATATOG;
-               if (musb_ep->type == USB_ENDPOINT_XFER_ISOC)
+
+               if (usb_endpoint_xfer_isoc(desc))
                        csr |= MUSB_RXCSR_P_ISO;
-               else if (musb_ep->type == USB_ENDPOINT_XFER_INT)
+               else if (usb_endpoint_xfer_int(desc))
                        csr |= MUSB_RXCSR_DISNYET;
-
-               /* set twice in case of double buffering */
-               musb_writew(regs, MUSB_RXCSR, csr);
                musb_writew(regs, MUSB_RXCSR, csr);
        }
 
        /* NOTE:  all the I/O code _should_ work fine without DMA, in case
         * for some reason you run out of channels here.
         */
-       if (is_dma_capable() && musb->dma_controller) {
-               struct dma_controller   *c = musb->dma_controller;
-
-               musb_ep->dma = c->channel_alloc(c, hw_ep,
-                               (desc->bEndpointAddress & USB_DIR_IN));
-       } else
-               musb_ep->dma = NULL;
-
+       musb_ep->dma = NULL;
        musb_ep->desc = desc;
        musb_ep->busy = 0;
        status = 0;
 
        pr_debug("%s periph: enabled %s for %s %s, %smaxpacket %d\n",
-                       musb_driver_name, musb_ep->end_point.name,
+                       musb_driver_name, musb_ep->name,
                        ({ char *s; switch (musb_ep->type) {
                        case USB_ENDPOINT_XFER_BULK:    s = "bulk"; break;
                        case USB_ENDPOINT_XFER_INT:     s = "int"; break;
@@ -985,6 +931,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
        schedule_work(&musb->irq_work);
 
 fail:
+       musb_ep_select(mbase, 0);
        spin_unlock_irqrestore(&musb->lock, flags);
        return status;
 }
@@ -1002,6 +949,7 @@ static int musb_gadget_disable(struct usb_ep *ep)
        int             status = 0;
 
        musb_ep = to_musb_ep(ep);
+       DBG(4, "disabling %s\n", musb_ep->name);
        musb = musb_ep->musb;
        epnum = musb_ep->current_epnum;
        epio = musb->endpoints[epnum].regs;
@@ -1015,11 +963,13 @@ static int musb_gadget_disable(struct usb_ep *ep)
                int_txe &= ~(1 << epnum);
                musb_writew(musb->mregs, MUSB_INTRTXE, int_txe);
                musb_writew(epio, MUSB_TXMAXP, 0);
+               musb_writew(epio, MUSB_TXCSR, 0);
        } else {
                u16 int_rxe = musb_readw(musb->mregs, MUSB_INTRRXE);
                int_rxe &= ~(1 << epnum);
                musb_writew(musb->mregs, MUSB_INTRRXE, int_rxe);
                musb_writew(epio, MUSB_RXMAXP, 0);
+               musb_writew(epio, MUSB_RXCSR, 0);
        }
 
        musb_ep->desc = NULL;
@@ -1031,7 +981,7 @@ static int musb_gadget_disable(struct usb_ep *ep)
 
        spin_unlock_irqrestore(&(musb->lock), flags);
 
-       DBG(2, "%s\n", musb_ep->end_point.name);
+       DBG(2, "%s\n", musb_ep->name);
 
        return status;
 }
@@ -1043,16 +993,20 @@ static int musb_gadget_disable(struct usb_ep *ep)
 struct usb_request *musb_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
 {
        struct musb_ep          *musb_ep = to_musb_ep(ep);
+       struct musb             *musb = musb_ep->musb;
        struct musb_request     *request = NULL;
 
        request = kzalloc(sizeof *request, gfp_flags);
-       if (request) {
-               INIT_LIST_HEAD(&request->request.list);
-               request->request.dma = DMA_ADDR_INVALID;
-               request->epnum = musb_ep->current_epnum;
-               request->ep = musb_ep;
+       if (!request) {
+               dev_err(musb->controller, "not enough memory\n");
+               return NULL;
        }
 
+       INIT_LIST_HEAD(&request->request.list);
+       request->request.dma = DMA_ADDR_INVALID;
+       request->epnum = musb_ep->current_epnum;
+       request->ep = musb_ep;
+
        return &request->request;
 }
 
@@ -1074,22 +1028,6 @@ struct free_record {
        dma_addr_t              dma;
 };
 
-/*
- * Context: controller locked, IRQs blocked.
- */
-static void musb_ep_restart(struct musb *musb, struct musb_request *req)
-{
-       DBG(3, "<== %s request %p len %u on hw_ep%d\n",
-               req->tx ? "TX/IN" : "RX/OUT",
-               &req->request, req->request.length, req->epnum);
-
-       musb_ep_select(musb->mregs, req->epnum);
-       if (req->tx)
-               txstate(musb, req);
-       else
-               rxstate(musb, req);
-}
-
 static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
                        gfp_t gfp_flags)
 {
@@ -1113,37 +1051,14 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
        if (request->ep != musb_ep)
                return -EINVAL;
 
-       DBG(4, "<== to %s request=%p\n", ep->name, req);
+       DBG(4, "<== to %s request %p length %d\n", ep->name, req, req->length);
 
        /* request is mine now... */
        request->request.actual = 0;
        request->request.status = -EINPROGRESS;
        request->epnum = musb_ep->current_epnum;
        request->tx = musb_ep->is_in;
-
-       if (is_dma_capable() && musb_ep->dma) {
-               if (request->request.dma == DMA_ADDR_INVALID) {
-                       request->request.dma = dma_map_single(
-                                       musb->controller,
-                                       request->request.buf,
-                                       request->request.length,
-                                       request->tx
-                                               ? DMA_TO_DEVICE
-                                               : DMA_FROM_DEVICE);
-                       request->mapped = 1;
-               } else {
-                       dma_sync_single_for_device(musb->controller,
-                                       request->request.dma,
-                                       request->request.length,
-                                       request->tx
-                                               ? DMA_TO_DEVICE
-                                               : DMA_FROM_DEVICE);
-                       request->mapped = 0;
-               }
-       } else if (!req->buf) {
-               return -ENODATA;
-       } else
-               request->mapped = 0;
+       request->mapped = 0;
 
        spin_lock_irqsave(&musb->lock, lockflags);
 
@@ -1158,9 +1073,23 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
        /* add request to the list */
        list_add_tail(&(request->request.list), &(musb_ep->req_list));
 
-       /* it this is the head of the queue, start i/o ... */
-       if (!musb_ep->busy && &request->request.list == musb_ep->req_list.next)
+       /* we can only start i/o if this is the head of the queue and
+        * endpoint is not stalled (halted) or busy
+        */
+       if (!musb_ep->stalled && !musb_ep->busy &&
+           &request->request.list == musb_ep->req_list.next &&
+           request->tx) {
+               DBG(1, "restarting\n");
                musb_ep_restart(musb, request);
+       }
+
+       /* if we received an RX packet before the request was queued,
+        * process it here. */
+       if (!request->tx && musb_ep->rx_pending) {
+               DBG(1, "processing pending RX\n");
+               musb_ep->rx_pending = false;
+               musb_g_rx(musb, musb_ep->current_epnum, false);
+       }
 
 cleanup:
        spin_unlock_irqrestore(&musb->lock, lockflags);
@@ -1175,6 +1104,7 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
        int                     status = 0;
        struct musb             *musb = musb_ep->musb;
 
+       DBG(4, "%s, dequeueing request %p\n", ep->name, request);
        if (!ep || !request || to_musb_request(request)->ep != musb_ep)
                return -EINVAL;
 
@@ -1191,11 +1121,10 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
        }
 
        /* if the hardware doesn't have the request, easy ... */
-       if (musb_ep->req_list.next != &request->list || musb_ep->busy)
+       if (musb_ep->req_list.next != &request->list) {
                musb_g_giveback(musb_ep, request, -ECONNRESET);
-
        /* ... else abort the dma transfer ... */
-       else if (is_dma_capable() && musb_ep->dma) {
+       } else if (musb_ep->dma) {
                struct dma_controller   *c = musb->dma_controller;
 
                musb_ep_select(musb->mregs, musb_ep->current_epnum);
@@ -1203,6 +1132,7 @@ static int musb_gadget_dequeue(struct usb_ep *ep, struct usb_request *request)
                        status = c->channel_abort(musb_ep->dma);
                else
                        status = -EBUSY;
+               stop_dma(musb, musb_ep, to_musb_request(request));
                if (status == 0)
                        musb_g_giveback(musb_ep, request, -ECONNRESET);
        } else {
@@ -1288,10 +1218,12 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value)
                musb_writew(epio, MUSB_RXCSR, csr);
        }
 
+       musb_ep->stalled = value;
+
 done:
 
        /* maybe start the first request in the queue */
-       if (!musb_ep->busy && !value && request) {
+       if (!musb_ep->stalled && request) {
                DBG(3, "restarting the request\n");
                musb_ep_restart(musb, request);
        }
@@ -1394,7 +1326,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 
        spin_lock_irqsave(&musb->lock, flags);
 
-       switch (musb->xceiv.state) {
+       switch (musb->xceiv->state) {
        case OTG_STATE_B_PERIPHERAL:
                /* NOTE:  OTG state machine doesn't include B_SUSPENDED;
                 * that's part of the standard usb 1.1 state machine, and
@@ -1466,10 +1398,39 @@ static void musb_pullup(struct musb *musb, int is_on)
        u8 power;
 
        power = musb_readb(musb->mregs, MUSB_POWER);
-       if (is_on)
+       /** UGLY UGLY HACK: Windows problems with multiple
+        * configurations.
+        *
+        * This is necessary to prevent a RESET irq to
+        * come when we fake a usb disconnection in order
+        * to change the configuration on the gadget driver.
+        */
+       if (is_on) {
+               u8 r;
                power |= MUSB_POWER_SOFTCONN;
-       else
+
+               r = musb_readb(musb->mregs, MUSB_INTRUSBE);
+               /* disable RESET interrupt */
+               musb_writeb(musb->mregs, MUSB_INTRUSBE, ~(r & BIT(1)));
+
+               /* send resume */
+               r = musb_readb(musb->mregs, MUSB_POWER);
+               r |= MUSB_POWER_RESUME;
+               musb_writeb(musb->mregs, MUSB_POWER, r);
+
+               /* ...for 10 ms */
+               mdelay(10);
+               r &= ~MUSB_POWER_RESUME;
+               musb_writeb(musb->mregs, MUSB_POWER, r);
+
+               /* enable interrupts */
+               musb_writeb(musb->mregs, MUSB_INTRUSBE, 0xf7);
+
+               /* some delay required for this to work */
+               mdelay(10);
+       } else {
                power &= ~MUSB_POWER_SOFTCONN;
+       }
 
        /* FIXME if on, HdrcStart; if off, HdrcStop */
 
@@ -1496,9 +1457,13 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 {
        struct musb     *musb = gadget_to_musb(gadget);
 
-       if (!musb->xceiv.set_power)
+       if (!musb->xceiv->set_power)
                return -EOPNOTSUPP;
-       return otg_set_power(&musb->xceiv, mA);
+
+       musb->power_draw = mA;
+       schedule_work(&musb->irq_work);
+
+       return otg_set_power(musb->xceiv, mA);
 }
 
 static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
@@ -1633,7 +1598,7 @@ int __init musb_gadget_setup(struct musb *musb)
        musb->g.speed = USB_SPEED_UNKNOWN;
 
        /* this "gadget" abstracts/virtualizes the controller */
-       strcpy(musb->g.dev.bus_id, "gadget");
+       dev_set_name(&musb->g.dev, "gadget");
        musb->g.dev.parent = musb->controller;
        musb->g.dev.dma_mask = musb->controller->dma_mask;
        musb->g.dev.release = musb_gadget_release;
@@ -1711,6 +1676,12 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
        spin_unlock_irqrestore(&musb->lock, flags);
 
        if (retval == 0) {
+               /* Clocks need to be turned on with OFF mode */
+               if (musb->set_clock)
+                       musb->set_clock(musb->clock, 1);
+               else
+                       clk_enable(musb->clock);
+
                retval = driver->bind(&musb->g);
                if (retval != 0) {
                        DBG(3, "bind to driver %s failed --> %d\n",
@@ -1724,8 +1695,8 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
                /* REVISIT always use otg_set_peripheral(), handling
                 * issues including the root hub one below ...
                 */
-               musb->xceiv.gadget = &musb->g;
-               musb->xceiv.state = OTG_STATE_B_IDLE;
+               musb->xceiv->gadget = &musb->g;
+               musb->xceiv->state = OTG_STATE_B_IDLE;
                musb->is_active = 1;
 
                /* FIXME this ignores the softconnect flag.  Drivers are
@@ -1750,14 +1721,15 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver)
                        if (retval < 0) {
                                DBG(1, "add_hcd failed, %d\n", retval);
                                spin_lock_irqsave(&musb->lock, flags);
-                               musb->xceiv.gadget = NULL;
-                               musb->xceiv.state = OTG_STATE_UNDEFINED;
+                               musb->xceiv->gadget = NULL;
+                               musb->xceiv->state = OTG_STATE_UNDEFINED;
                                musb->gadget_driver = NULL;
                                musb->g.dev.driver = NULL;
                                spin_unlock_irqrestore(&musb->lock, flags);
                        }
                }
        }
+       musb_save_ctx(musb);
 
        return retval;
 }
@@ -1826,6 +1798,11 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 
        spin_lock_irqsave(&musb->lock, flags);
 
+       if (musb->set_clock)
+               musb->set_clock(musb->clock, 1);
+       else
+               clk_enable(musb->clock);
+
 #ifdef CONFIG_USB_MUSB_OTG
        musb_hnp_stop(musb);
 #endif
@@ -1834,7 +1811,7 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
 
                (void) musb_gadget_vbus_draw(&musb->g, 0);
 
-               musb->xceiv.state = OTG_STATE_UNDEFINED;
+               musb->xceiv->state = OTG_STATE_UNDEFINED;
                stop_activity(musb, driver);
 
                DBG(3, "unregistering driver %s\n", driver->function);
@@ -1858,6 +1835,7 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
                 * that currently misbehaves.
                 */
        }
+       musb_save_ctx(musb);
 
        return retval;
 }
@@ -1871,7 +1849,7 @@ EXPORT_SYMBOL(usb_gadget_unregister_driver);
 void musb_g_resume(struct musb *musb)
 {
        musb->is_suspended = 0;
-       switch (musb->xceiv.state) {
+       switch (musb->xceiv->state) {
        case OTG_STATE_B_IDLE:
                break;
        case OTG_STATE_B_WAIT_ACON:
@@ -1897,10 +1875,10 @@ void musb_g_suspend(struct musb *musb)
        devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
        DBG(3, "devctl %02x\n", devctl);
 
-       switch (musb->xceiv.state) {
+       switch (musb->xceiv->state) {
        case OTG_STATE_B_IDLE:
                if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS)
-                       musb->xceiv.state = OTG_STATE_B_PERIPHERAL;
+                       musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
                break;
        case OTG_STATE_B_PERIPHERAL:
                musb->is_suspended = 1;
@@ -1946,22 +1924,22 @@ void musb_g_disconnect(struct musb *musb)
                spin_lock(&musb->lock);
        }
 
-       switch (musb->xceiv.state) {
+       switch (musb->xceiv->state) {
        default:
 #ifdef CONFIG_USB_MUSB_OTG
                DBG(2, "Unhandled disconnect %s, setting a_idle\n",
                        otg_state_string(musb));
-               musb->xceiv.state = OTG_STATE_A_IDLE;
+               musb->xceiv->state = OTG_STATE_A_IDLE;
                break;
        case OTG_STATE_A_PERIPHERAL:
-               musb->xceiv.state = OTG_STATE_A_WAIT_VFALL;
+               musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
                break;
        case OTG_STATE_B_WAIT_ACON:
        case OTG_STATE_B_HOST:
 #endif
        case OTG_STATE_B_PERIPHERAL:
        case OTG_STATE_B_IDLE:
-               musb->xceiv.state = OTG_STATE_B_IDLE;
+               musb->xceiv->state = OTG_STATE_B_IDLE;
                break;
        case OTG_STATE_B_SRP_INIT:
                break;
@@ -2017,10 +1995,10 @@ __acquires(musb->lock)
         * or else after HNP, as A-Device
         */
        if (devctl & MUSB_DEVCTL_BDEVICE) {
-               musb->xceiv.state = OTG_STATE_B_PERIPHERAL;
+               musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
                musb->g.is_a_peripheral = 0;
        } else if (is_otg_enabled(musb)) {
-               musb->xceiv.state = OTG_STATE_A_PERIPHERAL;
+               musb->xceiv->state = OTG_STATE_A_PERIPHERAL;
                musb->g.is_a_peripheral = 1;
        } else
                WARN_ON(1);