]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - sitara-epos/sitara-epos-kernel.git/blobdiff - drivers/usb/musb/cppi41_dma.c
musb: adding support for am335x
[sitara-epos/sitara-epos-kernel.git] / drivers / usb / musb / cppi41_dma.c
index 38e725d06884b177bb919bcbe5310d4cf3c956c3..b553d49931b1a8a03e946db1cbb1db054ed6332b 100644 (file)
@@ -32,8 +32,8 @@
 /* Configuration */
 #define USB_CPPI41_DESC_SIZE_SHIFT 6
 #define USB_CPPI41_DESC_ALIGN  (1 << USB_CPPI41_DESC_SIZE_SHIFT)
-#define USB_CPPI41_CH_NUM_PD   64      /* 4K bulk data at full speed */
-#define USB_CPPI41_MAX_PD      (USB_CPPI41_CH_NUM_PD * USB_CPPI41_NUM_CH)
+#define USB_CPPI41_CH_NUM_PD   128     /* 4K bulk data at full speed */
+#define USB_CPPI41_MAX_PD      (USB_CPPI41_CH_NUM_PD * (USB_CPPI41_NUM_CH+1))
 
 #undef DEBUG_CPPI_TD
 #undef USBDRV_DEBUG
@@ -55,12 +55,15 @@ struct usb_pkt_desc;
 
 struct usb_pkt_desc {
        /* Hardware descriptor fields from this point */
-       struct cppi41_host_pkt_desc hw_desc;
+       struct cppi41_host_pkt_desc hw_desc;    /* 40 bytes */
        /* Protocol specific data */
-       dma_addr_t dma_addr;
-       struct usb_pkt_desc *next_pd_ptr;
+       dma_addr_t dma_addr;                    /* offs:44 byte */
+       struct usb_pkt_desc *next_pd_ptr;       /* offs:48 byte*/
        u8 ch_num;
        u8 ep_num;
+       u8 eop;
+       u8 res1;                                /* offs:52 */
+       u8 res2[12];                            /* offs:64 */
 };
 
 /**
@@ -95,6 +98,8 @@ struct cppi41_channel {
        u16 pkt_size;
        u8  transfer_mode;
        u8  zlp_queued;
+       u8  inf_mode;
+       u8  tx_complete;
 };
 
 /**
@@ -109,6 +114,7 @@ struct cppi41 {
 
        struct cppi41_channel tx_cppi_ch[USB_CPPI41_NUM_CH];
        struct cppi41_channel rx_cppi_ch[USB_CPPI41_NUM_CH];
+       struct work_struct      txdma_work;
 
        struct usb_pkt_desc *pd_pool_head; /* Free PD pool head */
        dma_addr_t pd_mem_phys;         /* PD memory physical address */
@@ -119,8 +125,17 @@ struct cppi41 {
        struct cppi41_queue_obj queue_obj; /* Teardown completion queue */
                                        /* object */
        u32 pkt_info;                   /* Tx PD Packet Information field */
+       struct usb_cppi41_info *cppi_info; /* cppi channel information */
+       u8 en_bd_intr;                  /* enable bd interrupt */
+       u32 automode_reg_offs;          /* USB_AUTOREQ_REG offset */
+       u32 teardown_reg_offs;          /* USB_TEARDOWN_REG offset */
+       u32 bd_size;
+       u8  inf_mode;
 };
 
+struct usb_cppi41_info usb_cppi41_info[2];
+EXPORT_SYMBOL(usb_cppi41_info);
+
 #ifdef DEBUG_CPPI_TD
 static void print_pd_list(struct usb_pkt_desc *pd_pool_head)
 {
@@ -169,8 +184,20 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
        struct usb_pkt_desc *curr_pd;
        unsigned long pd_addr;
        int i;
+       struct usb_cppi41_info *cppi_info;
+       struct musb *musb;
 
        cppi = container_of(controller, struct cppi41, controller);
+       cppi_info = cppi->cppi_info;
+       musb = cppi->musb;
+
+       if (cpu_is_ti816x() || cpu_is_am33xx()) {
+               cppi->automode_reg_offs = TI81XX_USB_AUTOREQ_REG;
+               cppi->teardown_reg_offs = TI81XX_USB_TEARDOWN_REG;
+       } else {
+               cppi->automode_reg_offs = USB_AUTOREQ_REG;
+               cppi->teardown_reg_offs = USB_TEARDOWN_REG;
+       }
 
        /*
         * TODO: We may need to check USB_CPPI41_MAX_PD here since CPPI 4.1
@@ -183,20 +210,21 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
         * dma_alloc_coherent()  will return a page aligned address, so our
         * alignment requirement will be honored.
         */
+       cppi->bd_size = USB_CPPI41_MAX_PD * sizeof(struct usb_pkt_desc);
        cppi->pd_mem = dma_alloc_coherent(cppi->musb->controller,
-                                         USB_CPPI41_MAX_PD *
-                                         USB_CPPI41_DESC_ALIGN,
+                                         cppi->bd_size,
                                          &cppi->pd_mem_phys,
                                          GFP_KERNEL | GFP_DMA);
        if (cppi->pd_mem == NULL) {
-               DBG(1, "ERROR: packet descriptor memory allocation failed\n");
+               dev_dbg(musb->controller, "ERROR: packet descriptor memory allocation failed\n");
                return 0;
        }
-       if (cppi41_mem_rgn_alloc(usb_cppi41_info.q_mgr, cppi->pd_mem_phys,
+
+       if (cppi41_mem_rgn_alloc(cppi_info->q_mgr, cppi->pd_mem_phys,
                                 USB_CPPI41_DESC_SIZE_SHIFT,
                                 get_count_order(USB_CPPI41_MAX_PD),
                                 &cppi->pd_mem_rgn)) {
-               DBG(1, "ERROR: queue manager memory region allocation "
+               dev_dbg(musb->controller, "ERROR: queue manager memory region allocation "
                    "failed\n");
                goto free_pds;
        }
@@ -204,14 +232,14 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
        /* Allocate the teardown completion queue */
        if (cppi41_queue_alloc(CPPI41_UNASSIGNED_QUEUE,
                               0, &cppi->teardownQNum)) {
-               DBG(1, "ERROR: teardown completion queue allocation failed\n");
+               dev_dbg(musb->controller, "ERROR: teardown completion queue allocation failed\n");
                goto free_mem_rgn;
        }
-       DBG(4, "Allocated teardown completion queue %d in queue manager 0\n",
+       dev_dbg(musb->controller, "Allocated teardown completion queue %d in queue manager 0\n",
            cppi->teardownQNum);
 
        if (cppi41_queue_init(&cppi->queue_obj, 0, cppi->teardownQNum)) {
-               DBG(1, "ERROR: teardown completion queue initialization "
+               dev_dbg(musb->controller, "ERROR: teardown completion queue initialization "
                    "failed\n");
                goto free_queue;
        }
@@ -245,8 +273,8 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
                 * Extract the CPPI 4.1 DMA Tx channel configuration and
                 * construct/store the Tx PD tag info field for later use...
                 */
-               tx_info = cppi41_dma_block[usb_cppi41_info.dma_block].tx_ch_info
-                         + usb_cppi41_info.ep_dma_ch[i];
+               tx_info = cppi41_dma_block[cppi_info->dma_block].tx_ch_info
+                         + cppi_info->ep_dma_ch[i];
                cppi_ch->src_queue = tx_info->tx_queue[0];
                cppi_ch->tag_info = (tx_info->port_num <<
                                     CPPI41_SRC_TAG_PORT_NUM_SHIFT) |
@@ -266,16 +294,13 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
 
        /* Construct/store Tx PD packet info field for later use */
        cppi->pkt_info = (CPPI41_PKT_TYPE_USB << CPPI41_PKT_TYPE_SHIFT) |
-                        (CPPI41_RETURN_LINKED << CPPI41_RETURN_POLICY_SHIFT) |
-                        (usb_cppi41_info.q_mgr << CPPI41_RETURN_QMGR_SHIFT) |
-                        (usb_cppi41_info.tx_comp_q[0] <<
-                         CPPI41_RETURN_QNUM_SHIFT);
+                        (CPPI41_RETURN_LINKED << CPPI41_RETURN_POLICY_SHIFT);
 
        /* Do necessary configuartion in hardware to get started */
        reg_base = cppi->musb->ctrl_base;
 
        /* Disable auto request mode */
-       musb_writel(reg_base, USB_AUTOREQ_REG, 0);
+       musb_writel(reg_base, cppi->automode_reg_offs, 0);
 
        /* Disable the CDC/RNDIS modes */
        musb_writel(reg_base, USB_TX_MODE_REG, 0);
@@ -285,15 +310,15 @@ static int __devinit cppi41_controller_start(struct dma_controller *controller)
 
  free_queue:
        if (cppi41_queue_free(0, cppi->teardownQNum))
-               DBG(1, "ERROR: failed to free teardown completion queue\n");
+               dev_dbg(musb->controller, "ERROR: failed to free teardown completion queue\n");
 
  free_mem_rgn:
-       if (cppi41_mem_rgn_free(usb_cppi41_info.q_mgr, cppi->pd_mem_rgn))
-               DBG(1, "ERROR: failed to free queue manager memory region\n");
+       if (cppi41_mem_rgn_free(cppi_info->q_mgr, cppi->pd_mem_rgn))
+               dev_dbg(musb->controller, "ERROR: failed to free queue manager memory region\n");
 
  free_pds:
        dma_free_coherent(cppi->musb->controller,
-                         USB_CPPI41_MAX_PD * USB_CPPI41_DESC_ALIGN,
+                         cppi->bd_size,
                          cppi->pd_mem, cppi->pd_mem_phys);
 
        return 0;
@@ -309,28 +334,36 @@ static int cppi41_controller_stop(struct dma_controller *controller)
 {
        struct cppi41 *cppi;
        void __iomem *reg_base;
+       struct usb_cppi41_info *cppi_info;
+       struct musb *musb;
 
        cppi = container_of(controller, struct cppi41, controller);
+       cppi_info = cppi->cppi_info;
+       musb = cppi->musb;
 
        /* Free the teardown completion queue */
-       if (cppi41_queue_free(usb_cppi41_info.q_mgr, cppi->teardownQNum))
-               DBG(1, "ERROR: failed to free teardown completion queue\n");
+       if (cppi41_queue_free(cppi_info->q_mgr, cppi->teardownQNum))
+               dev_dbg(musb->controller, "ERROR: failed to free teardown completion queue\n");
 
        /*
         * Free the packet descriptor region allocated
         * for all Tx/Rx channels.
         */
-       if (cppi41_mem_rgn_free(usb_cppi41_info.q_mgr, cppi->pd_mem_rgn))
-               DBG(1, "ERROR: failed to free queue manager memory region\n");
+       if (cppi41_mem_rgn_free(cppi_info->q_mgr, cppi->pd_mem_rgn))
+               dev_dbg(musb->controller, "ERROR: failed to free queue manager memory region\n");
 
-       dma_free_coherent(cppi->musb->controller,
-                         USB_CPPI41_MAX_PD * USB_CPPI41_DESC_ALIGN,
+       dma_free_coherent(cppi->musb->controller, cppi->bd_size,
                          cppi->pd_mem, cppi->pd_mem_phys);
 
+       cppi->pd_mem = 0;
+       cppi->pd_mem_phys = 0;
+       cppi->pd_pool_head = 0;
+       cppi->bd_size = 0;
+
        reg_base = cppi->musb->ctrl_base;
 
        /* Disable auto request mode */
-       musb_writel(reg_base, USB_AUTOREQ_REG, 0);
+       musb_writel(reg_base, cppi->automode_reg_offs, 0);
 
        /* Disable the CDC/RNDIS modes */
        musb_writel(reg_base, USB_TX_MODE_REG, 0);
@@ -356,14 +389,18 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
        struct cppi41 *cppi;
        struct cppi41_channel  *cppi_ch;
        u32 ch_num, ep_num = ep->epnum;
+       struct usb_cppi41_info *cppi_info;
+       struct musb *musb;
 
        cppi = container_of(controller, struct cppi41, controller);
+       cppi_info = cppi->cppi_info;
+       musb = cppi->musb;
 
        /* Remember, ep_num: 1 .. Max_EP, and CPPI ch_num: 0 .. Max_EP - 1 */
        ch_num = ep_num - 1;
 
        if (ep_num > USB_CPPI41_NUM_CH) {
-               DBG(1, "No %cx DMA channel for EP%d\n",
+               dev_dbg(musb->controller, "No %cx DMA channel for EP%d\n",
                    is_tx ? 'T' : 'R', ep_num);
                return NULL;
        }
@@ -374,9 +411,9 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
        if (is_tx) {
                /* Initialize the CPPI 4.1 Tx DMA channel */
                if (cppi41_tx_ch_init(&cppi_ch->dma_ch_obj,
-                                     usb_cppi41_info.dma_block,
-                                     usb_cppi41_info.ep_dma_ch[ch_num])) {
-                       DBG(1, "ERROR: cppi41_tx_ch_init failed for "
+                                     cppi_info->dma_block,
+                                     cppi_info->ep_dma_ch[ch_num])) {
+                       dev_dbg(musb->controller, "ERROR: cppi41_tx_ch_init failed for "
                            "channel %d\n", ch_num);
                        return NULL;
                }
@@ -388,32 +425,33 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
                                            0, cppi->teardownQNum);
        } else {
                struct cppi41_rx_ch_cfg rx_cfg;
-               u8 q_mgr = usb_cppi41_info.q_mgr;
+               u8 q_mgr = cppi_info->q_mgr;
                int i;
 
                /* Initialize the CPPI 4.1 Rx DMA channel */
                if (cppi41_rx_ch_init(&cppi_ch->dma_ch_obj,
-                                     usb_cppi41_info.dma_block,
-                                     usb_cppi41_info.ep_dma_ch[ch_num])) {
-                       DBG(1, "ERROR: cppi41_rx_ch_init failed\n");
+                                     cppi_info->dma_block,
+                                     cppi_info->ep_dma_ch[ch_num])) {
+                       dev_dbg(musb->controller, "ERROR: cppi41_rx_ch_init failed\n");
                        return NULL;
                }
 
                if (cppi41_queue_alloc(CPPI41_FREE_DESC_BUF_QUEUE |
                                       CPPI41_UNASSIGNED_QUEUE,
                                       q_mgr, &cppi_ch->src_queue.q_num)) {
-                       DBG(1, "ERROR: cppi41_queue_alloc failed for "
+                       dev_dbg(musb->controller, "ERROR: cppi41_queue_alloc failed for "
                            "free descriptor/buffer queue\n");
                        return NULL;
                }
-               DBG(4, "Allocated free descriptor/buffer queue %d in "
+               dev_dbg(musb->controller, "Allocated free descriptor/buffer queue %d in "
                    "queue manager %d\n", cppi_ch->src_queue.q_num, q_mgr);
 
                rx_cfg.default_desc_type = cppi41_rx_host_desc;
                rx_cfg.sop_offset = 0;
                rx_cfg.retry_starved = 1;
+               rx_cfg.rx_max_buf_cnt = 0;
                rx_cfg.rx_queue.q_mgr = cppi_ch->src_queue.q_mgr = q_mgr;
-               rx_cfg.rx_queue.q_num = usb_cppi41_info.rx_comp_q[0];
+               rx_cfg.rx_queue.q_num = cppi_info->rx_comp_q[ch_num];
                for (i = 0; i < 4; i++)
                        rx_cfg.cfg.host_pkt.fdb_queue[i] = cppi_ch->src_queue;
                cppi41_rx_ch_configure(&cppi_ch->dma_ch_obj, &rx_cfg);
@@ -422,12 +460,12 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
        /* Initialize the CPPI 4.1 DMA source queue */
        if (cppi41_queue_init(&cppi_ch->queue_obj, cppi_ch->src_queue.q_mgr,
                               cppi_ch->src_queue.q_num)) {
-               DBG(1, "ERROR: cppi41_queue_init failed for %s queue",
+               dev_dbg(musb->controller, "ERROR: cppi41_queue_init failed for %s queue",
                    is_tx ? "Tx" : "Rx free descriptor/buffer");
                if (is_tx == 0 &&
                    cppi41_queue_free(cppi_ch->src_queue.q_mgr,
                                      cppi_ch->src_queue.q_num))
-                       DBG(1, "ERROR: failed to free Rx descriptor/buffer "
+                       dev_dbg(musb->controller, "ERROR: failed to free Rx descriptor/buffer "
                            "queue\n");
                 return NULL;
        }
@@ -436,7 +474,7 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
        cppi41_dma_ch_enable(&cppi_ch->dma_ch_obj);
 
        if (cppi_ch->end_pt)
-               DBG(1, "Re-allocating DMA %cx channel %d (%p)\n",
+               dev_dbg(musb->controller, "Re-allocating DMA %cx channel %d (%p)\n",
                    is_tx ? 'T' : 'R', ch_num, cppi_ch);
 
        cppi_ch->end_pt = ep;
@@ -445,7 +483,7 @@ static struct dma_channel *cppi41_channel_alloc(struct dma_controller
        cppi_ch->channel.max_len = is_tx ?
                                CPPI41_TXDMA_MAXLEN : CPPI41_RXDMA_MAXLEN;
 
-       DBG(4, "Allocated DMA %cx channel %d for EP%d\n", is_tx ? 'T' : 'R',
+       dev_dbg(musb->controller, "Allocated DMA %cx channel %d for EP%d\n", is_tx ? 'T' : 'R',
            ch_num, ep_num);
 
        return &cppi_ch->channel;
@@ -461,8 +499,9 @@ static void cppi41_channel_release(struct dma_channel *channel)
 
        /* REVISIT: for paranoia, check state and abort if needed... */
        cppi_ch = container_of(channel, struct cppi41_channel, channel);
+
        if (cppi_ch->end_pt == NULL)
-               DBG(1, "Releasing idle DMA channel %p\n", cppi_ch);
+               printk(KERN_INFO "Releasing idle DMA channel %p\n", cppi_ch);
 
        /* But for now, not its IRQ */
        cppi_ch->end_pt = NULL;
@@ -474,7 +513,7 @@ static void cppi41_channel_release(struct dma_channel *channel)
        if (cppi_ch->transmit == 0 &&
            cppi41_queue_free(cppi_ch->src_queue.q_mgr,
                              cppi_ch->src_queue.q_num))
-               DBG(1, "ERROR: failed to free Rx descriptor/buffer queue\n");
+               printk(KERN_ERR "ERROR: failed to free Rx descriptor/buffer queue\n");
 }
 
 static void cppi41_mode_update(struct cppi41_channel *cppi_ch, u8 mode)
@@ -524,25 +563,30 @@ static void cppi41_mode_update(struct cppi41_channel *cppi_ch, u8 mode)
 static unsigned cppi41_next_tx_segment(struct cppi41_channel *tx_ch)
 {
        struct cppi41 *cppi = tx_ch->channel.private_data;
+       struct musb *musb = cppi->musb;
        struct usb_pkt_desc *curr_pd;
        u32 length = tx_ch->length - tx_ch->curr_offset;
        u32 pkt_size = tx_ch->pkt_size;
        unsigned num_pds, n;
+       struct usb_cppi41_info *cppi_info = cppi->cppi_info;
+       u16 q_mgr = cppi_info->q_mgr;
+       u16 tx_comp_q = cppi_info->tx_comp_q[tx_ch->ch_num];
+       u8 en_bd_intr = cppi->en_bd_intr;
 
        /*
         * Tx can use the generic RNDIS mode where we can probably fit this
         * transfer in one PD and one IRQ.  The only time we would NOT want
         * to use it is when the hardware constraints prevent it...
         */
-       if ((pkt_size & 0x3f) == 0 && length > pkt_size) {
-               num_pds  = 1;
-               pkt_size = length;
+       if ((pkt_size & 0x3f) == 0) {
+               num_pds  = length ? 1 : 0;
                cppi41_mode_update(tx_ch, USB_GENERIC_RNDIS_MODE);
        } else {
                num_pds  = (length + pkt_size - 1) / pkt_size;
                cppi41_mode_update(tx_ch, USB_TRANSPARENT_MODE);
        }
 
+       pkt_size = length;
        /*
         * If length of transmit buffer is 0 or a multiple of the endpoint size,
         * then send the zero length packet.
@@ -550,7 +594,7 @@ static unsigned cppi41_next_tx_segment(struct cppi41_channel *tx_ch)
        if (!length || (tx_ch->transfer_mode && length % pkt_size == 0))
                num_pds++;
 
-       DBG(4, "TX DMA%u, %s, maxpkt %u, %u PDs, addr %#x, len %u\n",
+       dev_dbg(musb->controller, "TX DMA%u, %s, maxpkt %u, %u PDs, addr %#x, len %u\n",
            tx_ch->ch_num, tx_ch->dma_mode ? "accelerated" : "transparent",
            pkt_size, num_pds, tx_ch->start_addr + tx_ch->curr_offset, length);
 
@@ -560,7 +604,7 @@ static unsigned cppi41_next_tx_segment(struct cppi41_channel *tx_ch)
                /* Get Tx host packet descriptor from the free pool */
                curr_pd = usb_get_free_pd(cppi);
                if (curr_pd == NULL) {
-                       DBG(1, "No Tx PDs\n");
+                       dev_dbg(musb->controller, "No Tx PDs\n");
                        break;
                }
 
@@ -572,10 +616,13 @@ static unsigned cppi41_next_tx_segment(struct cppi41_channel *tx_ch)
                                      CPPI41_DESC_TYPE_SHIFT) | pkt_size;
                hw_desc->tag_info = tx_ch->tag_info;
                hw_desc->pkt_info = cppi->pkt_info;
+               hw_desc->pkt_info |= ((q_mgr << CPPI41_RETURN_QMGR_SHIFT) |
+                               (tx_comp_q << CPPI41_RETURN_QNUM_SHIFT));
 
                hw_desc->buf_ptr = tx_ch->start_addr + tx_ch->curr_offset;
                hw_desc->buf_len = pkt_size;
                hw_desc->next_desc_ptr = 0;
+               hw_desc->orig_buf_len = pkt_size;
 
                curr_pd->ch_num = tx_ch->ch_num;
                curr_pd->ep_num = tx_ch->end_pt->epnum;
@@ -586,7 +633,10 @@ static unsigned cppi41_next_tx_segment(struct cppi41_channel *tx_ch)
                if (pkt_size == 0)
                        tx_ch->zlp_queued = 1;
 
-               DBG(5, "TX PD %p: buf %08x, len %08x, pkt info %08x\n", curr_pd,
+               if (en_bd_intr)
+                       hw_desc->orig_buf_len |= CPPI41_PKT_INTR_FLAG;
+
+               dev_dbg(musb->controller, "TX PD %p: buf %08x, len %08x, pkt info %08x\n", curr_pd,
                    hw_desc->buf_ptr, hw_desc->buf_len, hw_desc->pkt_info);
 
                cppi41_queue_push(&tx_ch->queue_obj, curr_pd->dma_addr,
@@ -603,13 +653,13 @@ static void cppi41_autoreq_update(struct cppi41_channel *rx_ch, u8 autoreq)
        if (is_host_active(cppi->musb) &&
            autoreq != rx_ch->autoreq) {
                void *__iomem reg_base = cppi->musb->ctrl_base;
-               u32 reg_val = musb_readl(reg_base, USB_AUTOREQ_REG);
+               u32 reg_val = musb_readl(reg_base, cppi->automode_reg_offs);
                u8 ep_num = rx_ch->ch_num + 1;
 
                reg_val &= ~USB_RX_AUTOREQ_MASK(ep_num);
                reg_val |= autoreq << USB_RX_AUTOREQ_SHIFT(ep_num);
 
-               musb_writel(reg_base, USB_AUTOREQ_REG, reg_val);
+               musb_writel(reg_base, cppi->automode_reg_offs, reg_val);
                rx_ch->autoreq = autoreq;
        }
 }
@@ -619,6 +669,10 @@ static void cppi41_set_ep_size(struct cppi41_channel *rx_ch, u32 pkt_size)
        struct cppi41 *cppi = rx_ch->channel.private_data;
        void *__iomem reg_base = cppi->musb->ctrl_base;
        u8 ep_num = rx_ch->ch_num + 1;
+       u32 res = pkt_size % 64;
+
+       /* epsize register must be multiple of 64 */
+       pkt_size += res ? (64 - res) : res;
 
        musb_writel(reg_base, USB_GENERIC_RNDIS_EP_SIZE_REG(ep_num), pkt_size);
 }
@@ -691,42 +745,110 @@ static void cppi41_set_ep_size(struct cppi41_channel *rx_ch, u32 pkt_size)
 static unsigned cppi41_next_rx_segment(struct cppi41_channel *rx_ch)
 {
        struct cppi41 *cppi = rx_ch->channel.private_data;
+       struct musb *musb = cppi->musb;
        struct usb_pkt_desc *curr_pd;
        struct cppi41_host_pkt_desc *hw_desc;
        u32 length = rx_ch->length - rx_ch->curr_offset;
        u32 pkt_size = rx_ch->pkt_size;
+       u32 max_rx_transfer_size = 64 * 1024;
+       u32 i, n_bd , pkt_len;
+       struct usb_gadget_driver *gadget_driver;
+       u8 en_bd_intr = cppi->en_bd_intr, mode;
+
+       if (is_peripheral_active(cppi->musb)) {
+               /* TODO: temporary fix for CDC/RNDIS which needs to be in
+                * GENERIC_RNDIS mode. Without this RNDIS gadget taking
+                * more then 2K ms for a 64 byte pings.
+                */
+               gadget_driver = cppi->musb->gadget_driver;
+
+               pkt_len = rx_ch->pkt_size;
+               mode = USB_GENERIC_RNDIS_MODE;
+               if (!strcmp(gadget_driver->driver.name, "g_file_storage")) {
+                       if (cppi->inf_mode && length > pkt_len) {
+                               pkt_len = 0;
+                               length = length - rx_ch->pkt_size;
+                               cppi41_rx_ch_set_maxbufcnt(&rx_ch->dma_ch_obj,
+                                       DMA_CH_RX_MAX_BUF_CNT_1);
+                               rx_ch->inf_mode = 1;
+                       } else {
+                               max_rx_transfer_size = rx_ch->pkt_size;
+                               mode = USB_TRANSPARENT_MODE;
+                       }
+               } else
+                       if (rx_ch->length < max_rx_transfer_size)
+                               pkt_len = rx_ch->length;
 
-       /*
-        * Rx can use the generic RNDIS mode where we can probably fit this
-        * transfer in one PD and one IRQ (or two with a short packet).
-        */
-       if ((pkt_size & 0x3f) == 0 && length >= 2 * pkt_size) {
-               cppi41_mode_update(rx_ch, USB_GENERIC_RNDIS_MODE);
-               cppi41_autoreq_update(rx_ch, USB_AUTOREQ_ALL_BUT_EOP);
-
-               if (likely(length < 0x10000))
-                       pkt_size = length - length % pkt_size;
-               else
-                       pkt_size = 0x10000;
-               cppi41_set_ep_size(rx_ch, pkt_size);
+               if (mode != USB_TRANSPARENT_MODE)
+                       cppi41_set_ep_size(rx_ch, pkt_len);
+               cppi41_mode_update(rx_ch, mode);
        } else {
-               cppi41_mode_update(rx_ch, USB_TRANSPARENT_MODE);
-               cppi41_autoreq_update(rx_ch, USB_NO_AUTOREQ);
+               /*
+                * Rx can use the generic RNDIS mode where we can
+                * probably fit this transfer in one PD and one IRQ
+                * (or two with a short packet).
+                */
+               if ((pkt_size & 0x3f) == 0) {
+                       cppi41_mode_update(rx_ch, USB_GENERIC_RNDIS_MODE);
+                       cppi41_autoreq_update(rx_ch, USB_AUTOREQ_ALL_BUT_EOP);
+
+                       pkt_size = (length > 0x10000) ? 0x10000 : length;
+                       cppi41_set_ep_size(rx_ch, pkt_size);
+               } else {
+                       cppi41_mode_update(rx_ch, USB_TRANSPARENT_MODE);
+                       cppi41_autoreq_update(rx_ch, USB_NO_AUTOREQ);
+                       max_rx_transfer_size = rx_ch->pkt_size;
+               }
        }
 
-       DBG(4, "RX DMA%u, %s, maxpkt %u, addr %#x, rec'd %u/%u\n",
+       dev_dbg(musb->controller, "RX DMA%u, %s, maxpkt %u, addr %#x, rec'd %u/%u\n",
            rx_ch->ch_num, rx_ch->dma_mode ? "accelerated" : "transparent",
            pkt_size, rx_ch->start_addr + rx_ch->curr_offset,
            rx_ch->curr_offset, rx_ch->length);
 
-       /* Get Rx packet descriptor from the free pool */
-       curr_pd = usb_get_free_pd(cppi);
-       if (curr_pd == NULL) {
-               /* Shouldn't ever happen! */
-               DBG(4, "No Rx PDs\n");
-               return 0;
+       /* calculate number of bd required */
+       n_bd = (length + max_rx_transfer_size - 1)/max_rx_transfer_size;
+
+       for (i = 0; i < n_bd ; ++i) {
+               /* Get Rx packet descriptor from the free pool */
+               curr_pd = usb_get_free_pd(cppi);
+               if (curr_pd == NULL) {
+                       /* Shouldn't ever happen! */
+                       dev_dbg(musb->controller, "No Rx PDs\n");
+                       goto sched;
+               }
+
+               pkt_len =
+               (length > max_rx_transfer_size) ? max_rx_transfer_size : length;
+
+               hw_desc = &curr_pd->hw_desc;
+               hw_desc->desc_info = (CPPI41_DESC_TYPE_HOST <<
+                                     CPPI41_DESC_TYPE_SHIFT);
+               hw_desc->orig_buf_ptr = rx_ch->start_addr + rx_ch->curr_offset;
+               hw_desc->orig_buf_len = pkt_len;
+
+               /* buf_len field of buffer descriptor updated by dma
+                * after reception of data is completed
+                */
+               hw_desc->buf_len = 0;
+
+               curr_pd->ch_num = rx_ch->ch_num;
+               curr_pd->ep_num = rx_ch->end_pt->epnum;
+
+               curr_pd->eop = (length -= pkt_len) ? 0 : 1;
+               rx_ch->curr_offset += pkt_len;
+
+               if (en_bd_intr)
+                       hw_desc->orig_buf_len |= CPPI41_PKT_INTR_FLAG;
+               /*
+                * Push the free Rx packet descriptor
+                * to the free descriptor/buffer queue.
+                */
+               cppi41_queue_push(&rx_ch->queue_obj, curr_pd->dma_addr,
+                       USB_CPPI41_DESC_ALIGN, 0);
        }
 
+sched:
        /*
         * HCD arranged ReqPkt for the first packet.
         * We arrange it for all but the last one.
@@ -739,25 +861,9 @@ static unsigned cppi41_next_rx_segment(struct cppi41_channel *rx_ch)
                musb_writew(epio, MUSB_RXCSR, csr);
        }
 
-       if (length < pkt_size)
-               pkt_size = length;
-
-       hw_desc = &curr_pd->hw_desc;
-       hw_desc->orig_buf_ptr = rx_ch->start_addr + rx_ch->curr_offset;
-       hw_desc->orig_buf_len = pkt_size;
-
-       curr_pd->ch_num = rx_ch->ch_num;
-       curr_pd->ep_num = rx_ch->end_pt->epnum;
-
-       rx_ch->curr_offset += pkt_size;
-
-       /*
-        * Push the free Rx packet descriptor
-        * to the free descriptor/buffer queue.
-        */
-       cppi41_queue_push(&rx_ch->queue_obj, curr_pd->dma_addr,
-               USB_CPPI41_DESC_ALIGN, 0);
-
+       /* enable schedular if not enabled */
+       if (is_peripheral_active(cppi->musb) && (n_bd > 0))
+               cppi41_schedtbl_add_dma_ch(0, 0, rx_ch->ch_num, 0);
        return 1;
 }
 
@@ -794,7 +900,7 @@ static int cppi41_channel_program(struct dma_channel *channel,      u16 maxpacket,
                        cppi_ch->transmit ? 'T' : 'R', cppi_ch->ch_num);
                break;
        case MUSB_DMA_STATUS_UNKNOWN:
-               DBG(1, "%cx DMA%d not allocated!\n",
+               WARNING("%cx DMA%d not allocated!\n",
                    cppi_ch->transmit ? 'T' : 'R', cppi_ch->ch_num);
                return 0;
        case MUSB_DMA_STATUS_FREE:
@@ -835,22 +941,25 @@ static int usb_check_teardown(struct cppi41_channel *cppi_ch,
                              unsigned long pd_addr)
 {
        u32 info;
+       struct cppi41 *cppi = cppi_ch->channel.private_data;
+       struct usb_cppi41_info *cppi_info = cppi->cppi_info;
+       struct musb *musb = cppi->musb;
 
        if (cppi41_get_teardown_info(pd_addr, &info)) {
-               DBG(1, "ERROR: not a teardown descriptor\n");
+               dev_dbg(musb->controller, "ERROR: not a teardown descriptor\n");
                return 0;
        }
 
        if ((info & CPPI41_TEARDOWN_TX_RX_MASK) ==
            (!cppi_ch->transmit << CPPI41_TEARDOWN_TX_RX_SHIFT) &&
            (info & CPPI41_TEARDOWN_DMA_NUM_MASK) ==
-           (usb_cppi41_info.dma_block << CPPI41_TEARDOWN_DMA_NUM_SHIFT) &&
+           (cppi_info->dma_block << CPPI41_TEARDOWN_DMA_NUM_SHIFT) &&
            (info & CPPI41_TEARDOWN_CHAN_NUM_MASK) ==
-           (usb_cppi41_info.ep_dma_ch[cppi_ch->ch_num] <<
+           (cppi_info->ep_dma_ch[cppi_ch->ch_num] <<
             CPPI41_TEARDOWN_CHAN_NUM_SHIFT))
                return 1;
 
-       DBG(1, "ERROR: unexpected values in teardown descriptor\n");
+       dev_dbg(musb->controller, "ERROR: unexpected values in teardown descriptor\n");
        return 0;
 }
 
@@ -863,20 +972,67 @@ static int usb_check_teardown(struct cppi41_channel *cppi_ch,
 static void usb_tx_ch_teardown(struct cppi41_channel *tx_ch)
 {
        struct cppi41 *cppi = tx_ch->channel.private_data;
+       struct musb *musb = cppi->musb;
+       void __iomem *reg_base = musb->ctrl_base;
+       u32 td_reg, timeout = 0xfffff;
+       u8 ep_num = tx_ch->ch_num + 1;
        unsigned long pd_addr;
+       struct cppi41_queue_obj tx_queue_obj;
+       struct usb_cppi41_info *cppi_info;
 
        /* Initiate teardown for Tx DMA channel */
        cppi41_dma_ch_teardown(&tx_ch->dma_ch_obj);
 
+       /* Wait for a descriptor to be queued and pop it... */
        do {
-               /* Wait for a descriptor to be queued and pop it... */
-               do {
-                       pd_addr = cppi41_queue_pop(&cppi->queue_obj);
-               } while (!pd_addr);
+               td_reg  = musb_readl(reg_base, cppi->teardown_reg_offs);
+               td_reg |= USB_TX_TDOWN_MASK(ep_num);
+               musb_writel(reg_base, cppi->teardown_reg_offs, td_reg);
+
+               pd_addr = cppi41_queue_pop(&cppi->queue_obj);
+       } while (!pd_addr && timeout--);
 
-               dprintk("Descriptor (%08lx) popped from teardown completion "
+       if (pd_addr) {
+
+               dev_dbg(musb->controller, "Descriptor (%08lx) popped from teardown completion "
                        "queue\n", pd_addr);
-       } while (!usb_check_teardown(tx_ch, pd_addr));
+
+               if (usb_check_teardown(tx_ch, pd_addr)) {
+                       dev_dbg(musb->controller, "Teardown Desc (%lx) rcvd\n", pd_addr);
+               } else
+                       ERR("Invalid PD(%08lx)popped from TearDn completion"
+                               "queue\n", pd_addr);
+       } else {
+               if (timeout <= 0)
+                       ERR("Teardown Desc not rcvd\n");
+       }
+
+       /* read the tx completion queue and remove
+        * completion bd if any
+        */
+       cppi_info = cppi->cppi_info;
+       if (cppi41_queue_init(&tx_queue_obj, cppi_info->q_mgr,
+                             cppi_info->tx_comp_q[tx_ch->ch_num])) {
+               ERR("ERROR: cppi41_queue_init failed for "
+                   "Tx completion queue");
+               return;
+       }
+
+       while ((pd_addr = cppi41_queue_pop(&tx_queue_obj)) != 0) {
+               struct usb_pkt_desc *curr_pd;
+
+               curr_pd = usb_get_pd_ptr(cppi, pd_addr);
+               if (curr_pd == NULL) {
+                       ERR("Invalid PD popped from Tx completion queue\n");
+                       continue;
+               }
+
+               dev_dbg(musb->controller, "Tx-PD(%p) popped from completion queue\n", curr_pd);
+               dev_dbg(musb->controller, "ch(%d)epnum(%d)len(%d)\n", curr_pd->ch_num,
+                       curr_pd->ep_num, curr_pd->hw_desc.buf_len);
+
+               usb_put_free_pd(cppi, curr_pd);
+       }
 }
 
 /*
@@ -888,22 +1044,31 @@ static void usb_tx_ch_teardown(struct cppi41_channel *tx_ch)
 static void usb_rx_ch_teardown(struct cppi41_channel *rx_ch)
 {
        struct cppi41 *cppi = rx_ch->channel.private_data;
+       struct musb *musb = cppi->musb;
+       struct usb_cppi41_info *cppi_info = cppi->cppi_info;
+       u32 timeout = 0xfffff, pd_addr;
+       struct cppi41_queue_obj rx_queue_obj;
 
        cppi41_dma_ch_default_queue(&rx_ch->dma_ch_obj, 0, cppi->teardownQNum);
 
        /* Initiate teardown for Rx DMA channel */
        cppi41_dma_ch_teardown(&rx_ch->dma_ch_obj);
 
-       while (1) {
+       do {
                struct usb_pkt_desc *curr_pd;
                unsigned long pd_addr;
 
                /* Wait for a descriptor to be queued and pop it... */
                do {
                        pd_addr = cppi41_queue_pop(&cppi->queue_obj);
-               } while (!pd_addr);
+               } while (!pd_addr && timeout--);
+
+               if (timeout <= 0 || !pd_addr) {
+                       ERR("teardown Desc not found\n");
+                       break;
+               }
 
-               dprintk("Descriptor (%08lx) popped from teardown completion "
+               dev_dbg(musb->controller, "Descriptor (%08lx) popped from teardown completion "
                        "queue\n", pd_addr);
 
                /*
@@ -933,11 +1098,37 @@ static void usb_rx_ch_teardown(struct cppi41_channel *rx_ch)
                 * this is protected by critical section.
                 */
                usb_put_free_pd(cppi, curr_pd);
+       } while (0);
+
+       /* read the rx completion queue and remove
+        * completion bd if any
+        */
+       if (cppi41_queue_init(&rx_queue_obj, cppi_info->q_mgr,
+                             cppi_info->rx_comp_q[rx_ch->ch_num])) {
+               ERR("ERROR: cppi41_queue_init failed for "
+                   "Rx completion queue");
+               return;
+       }
+
+       while ((pd_addr = cppi41_queue_pop(&rx_queue_obj)) != 0) {
+               struct usb_pkt_desc *curr_pd;
+
+               curr_pd = usb_get_pd_ptr(cppi, pd_addr);
+               if (curr_pd == NULL) {
+                       ERR("Invalid PD popped from Rx completion queue\n");
+                       continue;
+               }
+
+               dev_dbg(musb->controller, "Rx-PD(%p) popped from completion queue\n", curr_pd);
+               dev_dbg(musb->controller, "ch(%d)epnum(%d)len(%d)\n", curr_pd->ch_num,
+                       curr_pd->ep_num, curr_pd->hw_desc.buf_len);
+
+               usb_put_free_pd(cppi, curr_pd);
        }
 
        /* Now restore the default Rx completion queue... */
-       cppi41_dma_ch_default_queue(&rx_ch->dma_ch_obj, usb_cppi41_info.q_mgr,
-                                   usb_cppi41_info.rx_comp_q[0]);
+       cppi41_dma_ch_default_queue(&rx_ch->dma_ch_obj, cppi_info->q_mgr,
+                                   cppi_info->rx_comp_q[rx_ch->ch_num]);
 }
 
 /*
@@ -957,6 +1148,8 @@ static int cppi41_channel_abort(struct dma_channel *channel)
 
        cppi_ch = container_of(channel, struct cppi41_channel, channel);
        ch_num = cppi_ch->ch_num;
+       cppi = cppi_ch->channel.private_data;
+       musb = cppi->musb;
 
        switch (channel->status) {
        case MUSB_DMA_STATUS_BUS_ABORT:
@@ -968,15 +1161,13 @@ static int cppi41_channel_abort(struct dma_channel *channel)
                        __func__, channel->status);
                break;
        case MUSB_DMA_STATUS_UNKNOWN:
-               DBG(1, "%cx DMA%d not allocated\n",
+               dev_dbg(musb->controller, "%cx DMA%d not allocated\n",
                    cppi_ch->transmit ? 'T' : 'R', ch_num);
                /* FALLTHROUGH */
        case MUSB_DMA_STATUS_FREE:
                return 0;
        }
 
-       cppi = cppi_ch->channel.private_data;
-       musb = cppi->musb;
        reg_base = musb->ctrl_base;
        epio = cppi_ch->end_pt->regs;
        ep_num = ch_num + 1;
@@ -993,14 +1184,16 @@ static int cppi41_channel_abort(struct dma_channel *channel)
                usb_tx_ch_teardown(cppi_ch);
 
                /* Issue CPPI FIFO teardown for Tx channel */
-               td_reg  = musb_readl(reg_base, USB_TEARDOWN_REG);
+               td_reg  = musb_readl(reg_base, cppi->teardown_reg_offs);
                td_reg |= USB_TX_TDOWN_MASK(ep_num);
-               musb_writel(reg_base, USB_TEARDOWN_REG, td_reg);
+               musb_writel(reg_base, cppi->teardown_reg_offs, td_reg);
 
                /* Flush FIFO of the endpoint */
                csr  = musb_readw(epio, MUSB_TXCSR);
                csr |= MUSB_TXCSR_FLUSHFIFO | MUSB_TXCSR_H_WZC_BITS;
                musb_writew(epio, MUSB_TXCSR, csr);
+               musb_writew(epio, MUSB_TXCSR, csr);
+               cppi_ch->tx_complete = 0;
        } else { /* Rx */
                dprintk("Rx channel teardown, cppi_ch = %p\n", cppi_ch);
 
@@ -1008,11 +1201,12 @@ static int cppi41_channel_abort(struct dma_channel *channel)
                csr  = musb_readw(epio, MUSB_RXCSR);
                csr |= MUSB_RXCSR_FLUSHFIFO | MUSB_RXCSR_H_WZC_BITS;
                musb_writew(epio, MUSB_RXCSR, csr);
+               musb_writew(epio, MUSB_RXCSR, csr);
 
                /* Issue CPPI FIFO teardown for Rx channel */
-               td_reg  = musb_readl(reg_base, USB_TEARDOWN_REG);
+               td_reg  = musb_readl(reg_base, cppi->teardown_reg_offs);
                td_reg |= USB_RX_TDOWN_MASK(ep_num);
-               musb_writel(reg_base, USB_TEARDOWN_REG, td_reg);
+               musb_writel(reg_base, cppi->teardown_reg_offs, td_reg);
 
                /* Tear down Rx DMA channel */
                usb_rx_ch_teardown(cppi_ch);
@@ -1078,6 +1272,63 @@ static int cppi41_channel_abort(struct dma_channel *channel)
        return 0;
 }
 
+void txdma_completion_work(struct work_struct *data)
+{
+       struct cppi41 *cppi = container_of(data, struct cppi41, txdma_work);
+       struct cppi41_channel *tx_ch;
+       struct musb *musb = cppi->musb;
+       unsigned index;
+       u8 resched = 0;
+       unsigned long flags;
+
+       while (1) {
+               for (index = 0; index < USB_CPPI41_NUM_CH; index++) {
+                       void __iomem *epio;
+                       u16 csr;
+
+                       tx_ch = &cppi->tx_cppi_ch[index];
+                       spin_lock_irqsave(&musb->lock, flags);
+                       if (tx_ch->tx_complete) {
+                               /* Sometimes a EP can unregister from a DMA
+                                * channel while the data is still in the FIFO.
+                                * Probable reason a proper abort was not
+                                * called before taking such a step.
+                                * Protect against such cases.
+                                */
+                               if (!tx_ch->end_pt) {
+                                       tx_ch->tx_complete = 0;
+                                       continue;
+                               }
+
+                               epio = tx_ch->end_pt->regs;
+                               csr = musb_readw(epio, MUSB_TXCSR);
+
+                               if (csr & (MUSB_TXCSR_TXPKTRDY |
+                                       MUSB_TXCSR_FIFONOTEMPTY)) {
+                                       resched = 1;
+                               } else {
+                                       tx_ch->channel.status =
+                                               MUSB_DMA_STATUS_FREE;
+                                       tx_ch->tx_complete = 0;
+                                       musb_dma_completion(musb, index+1, 1);
+                               }
+                       }
+                       spin_unlock_irqrestore(&musb->lock, flags);
+
+                       if (!resched)
+                               cond_resched();
+               }
+
+               if (resched) {
+                       resched = 0;
+                       cond_resched();
+               } else {
+                       return ;
+               }
+       }
+
+}
+
 /**
  * cppi41_dma_controller_create -
  * instantiate an object representing DMA controller.
@@ -1099,6 +1350,15 @@ cppi41_dma_controller_create(struct musb  *musb, void __iomem *mregs)
        cppi->controller.channel_release = cppi41_channel_release;
        cppi->controller.channel_program = cppi41_channel_program;
        cppi->controller.channel_abort = cppi41_channel_abort;
+       cppi->cppi_info = (struct usb_cppi41_info *)&usb_cppi41_info[musb->id];;
+       cppi->en_bd_intr = cppi->cppi_info->bd_intr_ctrl;
+       INIT_WORK(&cppi->txdma_work, txdma_completion_work);
+
+       /* enable infinite mode only for ti81xx silicon rev2 */
+       if (cpu_is_am33xx() || cpu_is_ti816x()) {
+               dev_dbg(musb->controller, "cppi41dma supports infinite mode\n");
+               cppi->inf_mode = 1;
+       }
 
        return &cppi->controller;
 }
@@ -1124,10 +1384,12 @@ static void usb_process_tx_queue(struct cppi41 *cppi, unsigned index)
 {
        struct cppi41_queue_obj tx_queue_obj;
        unsigned long pd_addr;
+       struct usb_cppi41_info *cppi_info = cppi->cppi_info;
+       struct musb *musb = cppi->musb;
 
-       if (cppi41_queue_init(&tx_queue_obj, usb_cppi41_info.q_mgr,
-                             usb_cppi41_info.tx_comp_q[index])) {
-               DBG(1, "ERROR: cppi41_queue_init failed for "
+       if (cppi41_queue_init(&tx_queue_obj, cppi_info->q_mgr,
+                             cppi_info->tx_comp_q[index])) {
+               dev_dbg(musb->controller, "ERROR: cppi41_queue_init failed for "
                    "Tx completion queue");
                return;
        }
@@ -1162,8 +1424,6 @@ static void usb_process_tx_queue(struct cppi41 *cppi, unsigned index)
                    (tx_ch->transfer_mode && !tx_ch->zlp_queued))
                        cppi41_next_tx_segment(tx_ch);
                else if (tx_ch->channel.actual_len >= tx_ch->length) {
-                       tx_ch->channel.status = MUSB_DMA_STATUS_FREE;
-
                        /*
                         * We get Tx DMA completion interrupt even when
                         * data is still in FIFO and not moved out to
@@ -1172,9 +1432,8 @@ static void usb_process_tx_queue(struct cppi41 *cppi, unsigned index)
                         * USB functionality. So far, we have obsered
                         * failure with iperf.
                         */
-                       udelay(20);
-                       /* Tx completion routine callback */
-                       musb_dma_completion(cppi->musb, ep_num, 1);
+                       tx_ch->tx_complete = 1;
+                       schedule_work(&cppi->txdma_work);
                }
        }
 }
@@ -1183,10 +1442,13 @@ static void usb_process_rx_queue(struct cppi41 *cppi, unsigned index)
 {
        struct cppi41_queue_obj rx_queue_obj;
        unsigned long pd_addr;
+       struct usb_cppi41_info *cppi_info = cppi->cppi_info;
+       struct musb *musb = cppi->musb;
+       u8 en_bd_intr = cppi->en_bd_intr;
 
-       if (cppi41_queue_init(&rx_queue_obj, usb_cppi41_info.q_mgr,
-                             usb_cppi41_info.rx_comp_q[index])) {
-               DBG(1, "ERROR: cppi41_queue_init failed for Rx queue\n");
+       if (cppi41_queue_init(&rx_queue_obj, cppi_info->q_mgr,
+                             cppi_info->rx_comp_q[index])) {
+               dev_dbg(musb->controller, "ERROR: cppi41_queue_init failed for Rx queue\n");
                return;
        }
 
@@ -1194,7 +1456,7 @@ static void usb_process_rx_queue(struct cppi41 *cppi, unsigned index)
                struct usb_pkt_desc *curr_pd;
                struct cppi41_channel *rx_ch;
                u8 ch_num, ep_num;
-               u32 length;
+               u32 length = 0, orig_buf_len, timeout = 50;
 
                curr_pd = usb_get_pd_ptr(cppi, pd_addr);
                if (curr_pd == NULL) {
@@ -1202,28 +1464,92 @@ static void usb_process_rx_queue(struct cppi41 *cppi, unsigned index)
                        continue;
                }
 
+               /* This delay is required to overcome the dma race condition
+                * where software reads buffer descriptor before being updated
+                * by dma as buffer descriptor's writes by dma still pending in
+                * interconnect bridge.
+                */
+               while (timeout--) {
+                       length = curr_pd->hw_desc.desc_info &
+                                       CPPI41_PKT_LEN_MASK;
+                       if (length != 0)
+                               break;
+                       udelay(1);
+               }
+
+               if (length == 0)
+                       ERR("!Race condtion: rxBD read before updated by dma");
+
                /* Extract the data from received packet descriptor */
                ch_num = curr_pd->ch_num;
                ep_num = curr_pd->ep_num;
-               length = curr_pd->hw_desc.buf_len;
+
+               dev_dbg(musb->controller, "Rx complete: dma channel(%d) ep%d len %d timeout %d\n",
+                       ch_num, ep_num, length, (50-timeout));
 
                rx_ch = &cppi->rx_cppi_ch[ch_num];
                rx_ch->channel.actual_len += length;
 
+               if (curr_pd->eop) {
+                       curr_pd->eop = 0;
+                       /* disable the rx dma schedular */
+                       if (is_peripheral_active(cppi->musb) && !cppi->inf_mode)
+                               cppi41_schedtbl_remove_dma_ch(0, 0, ch_num, 0);
+               }
+
                /*
                 * Return Rx PD to the software list --
                 * this is protected by critical section
                 */
                usb_put_free_pd(cppi, curr_pd);
 
+               orig_buf_len = curr_pd->hw_desc.orig_buf_len;
+               if (en_bd_intr)
+                       orig_buf_len &= ~CPPI41_PKT_INTR_FLAG;
+
                if (unlikely(rx_ch->channel.actual_len >= rx_ch->length ||
-                            length < curr_pd->hw_desc.orig_buf_len)) {
-                       rx_ch->channel.status = MUSB_DMA_STATUS_FREE;
+                            length < orig_buf_len)) {
 
-                       /* Rx completion routine callback */
-                       musb_dma_completion(cppi->musb, ep_num, 0);
-               } else
-                       cppi41_next_rx_segment(rx_ch);
+#if defined(CONFIG_SOC_OMAPTI81XX) || defined(CONFIG_SOC_OMAPAM33XX)
+                       struct musb_hw_ep *ep;
+                       u8 isoc, next_seg = 0;
+
+                       /* Workaround for early rx completion of
+                        * cppi41 dma in Generic RNDIS mode for ti81xx
+                        */
+                       if (is_host_enabled(cppi->musb)) {
+                               u32 pkt_size = rx_ch->pkt_size;
+                               ep = cppi->musb->endpoints + ep_num;
+                               isoc = musb_readb(ep->regs, MUSB_RXTYPE);
+                               isoc = (isoc >> 4) & 0x1;
+
+                               if (!isoc
+                               && (rx_ch->dma_mode == USB_GENERIC_RNDIS_MODE)
+                               && (rx_ch->channel.actual_len < rx_ch->length)
+                               && !(rx_ch->channel.actual_len % pkt_size))
+                                       next_seg = 1;
+                       }
+                       if (next_seg) {
+                               rx_ch->curr_offset = rx_ch->channel.actual_len;
+                               cppi41_next_rx_segment(rx_ch);
+                       } else
+#endif
+                       {
+                               rx_ch->channel.status = MUSB_DMA_STATUS_FREE;
+
+                               if (rx_ch->inf_mode) {
+                                       cppi41_rx_ch_set_maxbufcnt(
+                                       &rx_ch->dma_ch_obj, 0);
+                                       rx_ch->inf_mode = 0;
+                               }
+                               /* Rx completion routine callback */
+                               musb_dma_completion(cppi->musb, ep_num, 0);
+                       }
+               } else {
+                       if (is_peripheral_active(cppi->musb) &&
+                               ((rx_ch->length - rx_ch->curr_offset) > 0))
+                               cppi41_next_rx_segment(rx_ch);
+               }
        }
 }
 
@@ -1250,6 +1576,7 @@ void cppi41_completion(struct musb *musb, u32 rx, u32 tx)
                if (tx & 1)
                        usb_process_tx_queue(cppi, index);
 }
+EXPORT_SYMBOL(cppi41_completion);
 
 MODULE_DESCRIPTION("CPPI4.1 dma controller driver for musb");
 MODULE_LICENSE("GPL v2");