--- zzzz-none-000/linux-3.10.107/drivers/net/ethernet/freescale/gianfar.c	2017-06-27 09:49:32.000000000 +0000
+++ scorpion-7490-727/linux-3.10.107/drivers/net/ethernet/freescale/gianfar.c	2021-02-04 17:41:59.000000000 +0000
@@ -9,7 +9,7 @@
  * Maintainer: Kumar Gala
  * Modifier: Sandeep Gopalpet <sandeep.kumar@freescale.com>
  *
- * Copyright 2002-2009, 2011 Freescale Semiconductor, Inc.
+ * Copyright 2002-2009, 2011-2013 Freescale Semiconductor, Inc.
  * Copyright 2007 MontaVista Software, Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -70,7 +70,6 @@
 #include <linux/unistd.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -78,6 +77,8 @@
 #include <linux/if_vlan.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/ip.h>
@@ -87,7 +88,10 @@
 #include <linux/net_tstamp.h>
 
 #include <asm/io.h>
+#ifdef CONFIG_PPC
 #include <asm/reg.h>
+#include <asm/mpc85xx.h>
+#endif
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <linux/module.h>
@@ -98,28 +102,29 @@
 #include <linux/phy_fixed.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 
 #include "gianfar.h"
 
-#define TX_TIMEOUT      (1*HZ)
+#define TX_TIMEOUT      (5*HZ)
 
-const char gfar_driver_version[] = "1.3";
+const char gfar_driver_version[] = "2.0";
 
 static int gfar_enet_open(struct net_device *dev);
 static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void gfar_reset_task(struct work_struct *work);
 static void gfar_timeout(struct net_device *dev);
 static int gfar_close(struct net_device *dev);
-struct sk_buff *gfar_new_skb(struct net_device *dev);
-static void gfar_new_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
-			   struct sk_buff *skb);
+static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
+				int alloc_cnt);
 static int gfar_set_mac_address(struct net_device *dev);
 static int gfar_change_mtu(struct net_device *dev, int new_mtu);
 static irqreturn_t gfar_error(int irq, void *dev_id);
 static irqreturn_t gfar_transmit(int irq, void *dev_id);
 static irqreturn_t gfar_interrupt(int irq, void *dev_id);
 static void adjust_link(struct net_device *dev);
-static void init_registers(struct net_device *dev);
+static noinline void gfar_update_link_state(struct gfar_private *priv);
 static int init_phy(struct net_device *dev);
 static int gfar_probe(struct platform_device *ofdev);
 static int gfar_remove(struct platform_device *ofdev);
@@ -127,17 +132,17 @@
 static void gfar_set_multi(struct net_device *dev);
 static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
 static void gfar_configure_serdes(struct net_device *dev);
-static int gfar_poll(struct napi_struct *napi, int budget);
+static int gfar_poll_rx(struct napi_struct *napi, int budget);
+static int gfar_poll_tx(struct napi_struct *napi, int budget);
+static int gfar_poll_rx_sq(struct napi_struct *napi, int budget);
+static int gfar_poll_tx_sq(struct napi_struct *napi, int budget);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gfar_netpoll(struct net_device *dev);
 #endif
 int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit);
 static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue);
-static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
-			       int amount_pull, struct napi_struct *napi);
-void gfar_halt(struct net_device *dev);
-static void gfar_halt_nodisable(struct net_device *dev);
-void gfar_start(struct net_device *dev);
+static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb);
+static void gfar_halt_nodisable(struct gfar_private *priv);
 static void gfar_clear_exact_match(struct net_device *dev);
 static void gfar_set_mac_for_addr(struct net_device *dev, int num,
 				  const u8 *addr);
@@ -152,24 +157,25 @@
 {
 	u32 lstatus;
 
-	bdp->bufPtr = buf;
+	bdp->bufPtr = cpu_to_be32(buf);
 
 	lstatus = BD_LFLAG(RXBD_EMPTY | RXBD_INTERRUPT);
 	if (bdp == rx_queue->rx_bd_base + rx_queue->rx_ring_size - 1)
 		lstatus |= BD_LFLAG(RXBD_WRAP);
 
-	eieio();
+	gfar_wmb();
 
-	bdp->lstatus = lstatus;
+	bdp->lstatus = cpu_to_be32(lstatus);
 }
 
-static int gfar_init_bds(struct net_device *ndev)
+static void gfar_init_bds(struct net_device *ndev)
 {
 	struct gfar_private *priv = netdev_priv(ndev);
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	struct gfar_priv_tx_q *tx_queue = NULL;
 	struct gfar_priv_rx_q *rx_queue = NULL;
 	struct txbd8 *txbdp;
-	struct rxbd8 *rxbdp;
+	u32 __iomem *rfbptr;
 	int i, j;
 
 	for (i = 0; i < priv->num_tx_queues; i++) {
@@ -191,45 +197,33 @@
 
 		/* Set the last descriptor in the ring to indicate wrap */
 		txbdp--;
-		txbdp->status |= TXBD_WRAP;
+		txbdp->status = cpu_to_be16(be16_to_cpu(txbdp->status) |
+					    TXBD_WRAP);
 	}
 
+	rfbptr = &regs->rfbptr0;
 	for (i = 0; i < priv->num_rx_queues; i++) {
 		rx_queue = priv->rx_queue[i];
-		rx_queue->cur_rx = rx_queue->rx_bd_base;
-		rx_queue->skb_currx = 0;
-		rxbdp = rx_queue->rx_bd_base;
-
-		for (j = 0; j < rx_queue->rx_ring_size; j++) {
-			struct sk_buff *skb = rx_queue->rx_skbuff[j];
-
-			if (skb) {
-				gfar_init_rxbdp(rx_queue, rxbdp,
-						rxbdp->bufPtr);
-			} else {
-				skb = gfar_new_skb(ndev);
-				if (!skb) {
-					netdev_err(ndev, "Can't allocate RX buffers\n");
-					return -ENOMEM;
-				}
-				rx_queue->rx_skbuff[j] = skb;
 
-				gfar_new_rxbdp(rx_queue, rxbdp, skb);
-			}
+		rx_queue->next_to_clean = 0;
+		rx_queue->next_to_use = 0;
+		rx_queue->next_to_alloc = 0;
 
-			rxbdp++;
-		}
+		/* make sure next_to_clean != next_to_use after this
+		 * by leaving at least 1 unused descriptor
+		 */
+		gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue));
 
+		rx_queue->rfbptr = rfbptr;
+		rfbptr += 2;
 	}
-
-	return 0;
 }
 
 static int gfar_alloc_skb_resources(struct net_device *ndev)
 {
 	void *vaddr;
 	dma_addr_t addr;
-	int i, j, k;
+	int i, j;
 	struct gfar_private *priv = netdev_priv(ndev);
 	struct device *dev = priv->dev;
 	struct gfar_priv_tx_q *tx_queue = NULL;
@@ -268,7 +262,8 @@
 		rx_queue = priv->rx_queue[i];
 		rx_queue->rx_bd_base = vaddr;
 		rx_queue->rx_bd_dma_base = addr;
-		rx_queue->dev = ndev;
+		rx_queue->ndev = ndev;
+		rx_queue->dev = dev;
 		addr  += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
 		vaddr += sizeof(struct rxbd8) * rx_queue->rx_ring_size;
 	}
@@ -283,25 +278,20 @@
 		if (!tx_queue->tx_skbuff)
 			goto cleanup;
 
-		for (k = 0; k < tx_queue->tx_ring_size; k++)
-			tx_queue->tx_skbuff[k] = NULL;
+		for (j = 0; j < tx_queue->tx_ring_size; j++)
+			tx_queue->tx_skbuff[j] = NULL;
 	}
 
 	for (i = 0; i < priv->num_rx_queues; i++) {
 		rx_queue = priv->rx_queue[i];
-		rx_queue->rx_skbuff =
-			kmalloc_array(rx_queue->rx_ring_size,
-				      sizeof(*rx_queue->rx_skbuff),
-				      GFP_KERNEL);
-		if (!rx_queue->rx_skbuff)
+		rx_queue->rx_buff = kcalloc(rx_queue->rx_ring_size,
+					    sizeof(*rx_queue->rx_buff),
+					    GFP_KERNEL);
+		if (!rx_queue->rx_buff)
 			goto cleanup;
-
-		for (j = 0; j < rx_queue->rx_ring_size; j++)
-			rx_queue->rx_skbuff[j] = NULL;
 	}
 
-	if (gfar_init_bds(ndev))
-		goto cleanup;
+	gfar_init_bds(ndev);
 
 	return 0;
 
@@ -329,72 +319,85 @@
 	}
 }
 
-static void gfar_init_mac(struct net_device *ndev)
+static void gfar_init_rqprm(struct gfar_private *priv)
 {
-	struct gfar_private *priv = netdev_priv(ndev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 rctrl = 0;
-	u32 tctrl = 0;
-	u32 attrs = 0;
-
-	/* write the tx/rx base registers */
-	gfar_init_tx_rx_base(priv);
+	u32 __iomem *baddr;
+	int i;
 
-	/* Configure the coalescing support */
-	gfar_configure_coalescing_all(priv);
+	baddr = &regs->rqprm0;
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		gfar_write(baddr, priv->rx_queue[i]->rx_ring_size |
+			   (DEFAULT_RX_LFC_THR << FBTHR_SHIFT));
+		baddr++;
+	}
+}
 
+static void gfar_rx_offload_en(struct gfar_private *priv)
+{
 	/* set this when rx hw offload (TOE) functions are being used */
 	priv->uses_rxfcb = 0;
 
+	if (priv->ndev->features & (NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX))
+		priv->uses_rxfcb = 1;
+
+	if (priv->hwts_rx_en || priv->rx_filer_enable)
+		priv->uses_rxfcb = 1;
+}
+
+static void gfar_mac_rx_config(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 rctrl = 0;
+
 	if (priv->rx_filer_enable) {
-		rctrl |= RCTRL_FILREN;
+		rctrl |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
 		/* Program the RIR0 reg with the required distribution */
-		gfar_write(&regs->rir0, DEFAULT_RIR0);
+		if (priv->poll_mode == GFAR_SQ_POLLING)
+			gfar_write(&regs->rir0, DEFAULT_2RXQ_RIR0);
+		else /* GFAR_MQ_POLLING */
+			gfar_write(&regs->rir0, DEFAULT_8RXQ_RIR0);
 	}
 
 	/* Restore PROMISC mode */
-	if (ndev->flags & IFF_PROMISC)
+	if (priv->ndev->flags & IFF_PROMISC)
 		rctrl |= RCTRL_PROM;
 
-	if (ndev->features & NETIF_F_RXCSUM) {
+	if (priv->ndev->features & NETIF_F_RXCSUM)
 		rctrl |= RCTRL_CHECKSUMMING;
-		priv->uses_rxfcb = 1;
-	}
 
-	if (priv->extended_hash) {
-		rctrl |= RCTRL_EXTHASH;
-
-		gfar_clear_exact_match(ndev);
-		rctrl |= RCTRL_EMEN;
-	}
+	if (priv->extended_hash)
+		rctrl |= RCTRL_EXTHASH | RCTRL_EMEN;
 
 	if (priv->padding) {
 		rctrl &= ~RCTRL_PAL_MASK;
 		rctrl |= RCTRL_PADDING(priv->padding);
 	}
 
-	/* Insert receive time stamps into padding alignment bytes */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) {
-		rctrl &= ~RCTRL_PAL_MASK;
-		rctrl |= RCTRL_PADDING(8);
-		priv->padding = 8;
-	}
-
 	/* Enable HW time stamping if requested from user space */
-	if (priv->hwts_rx_en) {
+	if (priv->hwts_rx_en)
 		rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
-		priv->uses_rxfcb = 1;
-	}
 
-	if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+	if (priv->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
 		rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
-		priv->uses_rxfcb = 1;
-	}
+
+	/* Clear the LFC bit */
+	gfar_write(&regs->rctrl, rctrl);
+	/* Init flow control threshold values */
+	gfar_init_rqprm(priv);
+	gfar_write(&regs->ptv, DEFAULT_LFC_PTVVAL);
+	rctrl |= RCTRL_LFC;
 
 	/* Init rctrl based on our settings */
 	gfar_write(&regs->rctrl, rctrl);
+}
+
+static void gfar_mac_tx_config(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tctrl = 0;
 
-	if (ndev->features & NETIF_F_IP_CSUM)
+	if (priv->ndev->features & NETIF_F_IP_CSUM)
 		tctrl |= TCTRL_INIT_CSUM;
 
 	if (priv->prio_sched_en)
@@ -405,30 +408,51 @@
 		gfar_write(&regs->tr47wt, DEFAULT_WRRS_WEIGHT);
 	}
 
-	gfar_write(&regs->tctrl, tctrl);
+	if (priv->ndev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		tctrl |= TCTRL_VLINS;
 
-	/* Set the extraction length and index */
-	attrs = ATTRELI_EL(priv->rx_stash_size) |
-		ATTRELI_EI(priv->rx_stash_index);
+	gfar_write(&regs->tctrl, tctrl);
+}
 
-	gfar_write(&regs->attreli, attrs);
+static void gfar_configure_coalescing(struct gfar_private *priv,
+			       unsigned long tx_mask, unsigned long rx_mask)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 __iomem *baddr;
 
-	/* Start with defaults, and add stashing or locking
-	 * depending on the approprate variables
-	 */
-	attrs = ATTR_INIT_SETTINGS;
+	if (priv->mode == MQ_MG_MODE) {
+		int i = 0;
 
-	if (priv->bd_stash_en)
-		attrs |= ATTR_BDSTASH;
+		baddr = &regs->txic0;
+		for_each_set_bit(i, &tx_mask, priv->num_tx_queues) {
+			gfar_write(baddr + i, 0);
+			if (likely(priv->tx_queue[i]->txcoalescing))
+				gfar_write(baddr + i, priv->tx_queue[i]->txic);
+		}
 
-	if (priv->rx_stash_size != 0)
-		attrs |= ATTR_BUFSTASH;
+		baddr = &regs->rxic0;
+		for_each_set_bit(i, &rx_mask, priv->num_rx_queues) {
+			gfar_write(baddr + i, 0);
+			if (likely(priv->rx_queue[i]->rxcoalescing))
+				gfar_write(baddr + i, priv->rx_queue[i]->rxic);
+		}
+	} else {
+		/* Backward compatible case -- even if we enable
+		 * multiple queues, there's only single reg to program
+		 */
+		gfar_write(&regs->txic, 0);
+		if (likely(priv->tx_queue[0]->txcoalescing))
+			gfar_write(&regs->txic, priv->tx_queue[0]->txic);
 
-	gfar_write(&regs->attr, attrs);
+		gfar_write(&regs->rxic, 0);
+		if (unlikely(priv->rx_queue[0]->rxcoalescing))
+			gfar_write(&regs->rxic, priv->rx_queue[0]->rxic);
+	}
+}
 
-	gfar_write(&regs->fifo_tx_thr, priv->fifo_threshold);
-	gfar_write(&regs->fifo_tx_starve, priv->fifo_starve);
-	gfar_write(&regs->fifo_tx_starve_shutoff, priv->fifo_starve_off);
+void gfar_configure_coalescing_all(struct gfar_private *priv)
+{
+	gfar_configure_coalescing(priv, 0xFF, 0xFF);
 }
 
 static struct net_device_stats *gfar_get_stats(struct net_device *dev)
@@ -459,6 +483,15 @@
 	return &dev->stats;
 }
 
+static int gfar_set_mac_addr(struct net_device *dev, void *p)
+{
+	eth_mac_addr(dev, p);
+
+	gfar_set_mac_for_addr(dev, 0, dev->dev_addr);
+
+	return 0;
+}
+
 static const struct net_device_ops gfar_netdev_ops = {
 	.ndo_open = gfar_enet_open,
 	.ndo_start_xmit = gfar_start_xmit,
@@ -469,46 +502,71 @@
 	.ndo_tx_timeout = gfar_timeout,
 	.ndo_do_ioctl = gfar_ioctl,
 	.ndo_get_stats = gfar_get_stats,
-	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_set_mac_address = gfar_set_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = gfar_netpoll,
 #endif
 };
 
-void lock_rx_qs(struct gfar_private *priv)
+static void gfar_ints_disable(struct gfar_private *priv)
 {
 	int i;
+	for (i = 0; i < priv->num_grps; i++) {
+		struct gfar __iomem *regs = priv->gfargrp[i].regs;
+		/* Clear IEVENT */
+		gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
 
-	for (i = 0; i < priv->num_rx_queues; i++)
-		spin_lock(&priv->rx_queue[i]->rxlock);
+		/* Initialize IMASK */
+		gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+	}
 }
 
-void lock_tx_qs(struct gfar_private *priv)
+static void gfar_ints_enable(struct gfar_private *priv)
 {
 	int i;
-
-	for (i = 0; i < priv->num_tx_queues; i++)
-		spin_lock(&priv->tx_queue[i]->txlock);
+	for (i = 0; i < priv->num_grps; i++) {
+		struct gfar __iomem *regs = priv->gfargrp[i].regs;
+		/* Unmask the interrupts we look for */
+		gfar_write(&regs->imask, IMASK_DEFAULT);
+	}
 }
 
-void unlock_rx_qs(struct gfar_private *priv)
+static int gfar_alloc_tx_queues(struct gfar_private *priv)
 {
 	int i;
 
-	for (i = 0; i < priv->num_rx_queues; i++)
-		spin_unlock(&priv->rx_queue[i]->rxlock);
+	for (i = 0; i < priv->num_tx_queues; i++) {
+		priv->tx_queue[i] = kzalloc(sizeof(struct gfar_priv_tx_q),
+					    GFP_KERNEL);
+		if (!priv->tx_queue[i])
+			return -ENOMEM;
+
+		priv->tx_queue[i]->tx_skbuff = NULL;
+		priv->tx_queue[i]->qindex = i;
+		priv->tx_queue[i]->dev = priv->ndev;
+		spin_lock_init(&(priv->tx_queue[i]->txlock));
+	}
+	return 0;
 }
 
-void unlock_tx_qs(struct gfar_private *priv)
+static int gfar_alloc_rx_queues(struct gfar_private *priv)
 {
 	int i;
 
-	for (i = 0; i < priv->num_tx_queues; i++)
-		spin_unlock(&priv->tx_queue[i]->txlock);
+	for (i = 0; i < priv->num_rx_queues; i++) {
+		priv->rx_queue[i] = kzalloc(sizeof(struct gfar_priv_rx_q),
+					    GFP_KERNEL);
+		if (!priv->rx_queue[i])
+			return -ENOMEM;
+
+		priv->rx_queue[i]->qindex = i;
+		priv->rx_queue[i]->ndev = priv->ndev;
+	}
+	return 0;
 }
 
-static void free_tx_pointers(struct gfar_private *priv)
+static void gfar_free_tx_queues(struct gfar_private *priv)
 {
 	int i;
 
@@ -516,7 +574,7 @@
 		kfree(priv->tx_queue[i]);
 }
 
-static void free_rx_pointers(struct gfar_private *priv)
+static void gfar_free_rx_queues(struct gfar_private *priv)
 {
 	int i;
 
@@ -550,23 +608,26 @@
 {
 	int i;
 
-	for (i = 0; i < priv->num_grps; i++)
-		napi_disable(&priv->gfargrp[i].napi);
+	for (i = 0; i < priv->num_grps; i++) {
+		napi_disable(&priv->gfargrp[i].napi_rx);
+		napi_disable(&priv->gfargrp[i].napi_tx);
+	}
 }
 
 static void enable_napi(struct gfar_private *priv)
 {
 	int i;
 
-	for (i = 0; i < priv->num_grps; i++)
-		napi_enable(&priv->gfargrp[i].napi);
+	for (i = 0; i < priv->num_grps; i++) {
+		napi_enable(&priv->gfargrp[i].napi_rx);
+		napi_enable(&priv->gfargrp[i].napi_tx);
+	}
 }
 
 static int gfar_parse_group(struct device_node *np,
 			    struct gfar_private *priv, const char *model)
 {
 	struct gfar_priv_grp *grp = &priv->gfargrp[priv->num_grps];
-	u32 *queue_mask;
 	int i;
 
 	for (i = 0; i < GFAR_NUM_IRQS; i++) {
@@ -586,31 +647,87 @@
 	if (model && strcasecmp(model, "FEC")) {
 		gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1);
 		gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2);
-		if (gfar_irq(grp, TX)->irq == NO_IRQ ||
-		    gfar_irq(grp, RX)->irq == NO_IRQ ||
-		    gfar_irq(grp, ER)->irq == NO_IRQ)
+		if (!gfar_irq(grp, TX)->irq ||
+		    !gfar_irq(grp, RX)->irq ||
+		    !gfar_irq(grp, ER)->irq)
 			return -EINVAL;
 	}
 
-	grp->grp_id = priv->num_grps;
 	grp->priv = priv;
 	spin_lock_init(&grp->grplock);
 	if (priv->mode == MQ_MG_MODE) {
-		queue_mask = (u32 *)of_get_property(np, "fsl,rx-bit-map", NULL);
-		grp->rx_bit_map = queue_mask ?
-			*queue_mask : (DEFAULT_MAPPING >> priv->num_grps);
-		queue_mask = (u32 *)of_get_property(np, "fsl,tx-bit-map", NULL);
-		grp->tx_bit_map = queue_mask ?
-			*queue_mask : (DEFAULT_MAPPING >> priv->num_grps);
+		u32 rxq_mask, txq_mask;
+		int ret;
+
+		grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+		grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+
+		ret = of_property_read_u32(np, "fsl,rx-bit-map", &rxq_mask);
+		if (!ret) {
+			grp->rx_bit_map = rxq_mask ?
+			rxq_mask : (DEFAULT_MAPPING >> priv->num_grps);
+		}
+
+		ret = of_property_read_u32(np, "fsl,tx-bit-map", &txq_mask);
+		if (!ret) {
+			grp->tx_bit_map = txq_mask ?
+			txq_mask : (DEFAULT_MAPPING >> priv->num_grps);
+		}
+
+		if (priv->poll_mode == GFAR_SQ_POLLING) {
+			/* One Q per interrupt group: Q0 to G0, Q1 to G1 */
+			grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+			grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
+		}
 	} else {
 		grp->rx_bit_map = 0xFF;
 		grp->tx_bit_map = 0xFF;
 	}
+
+	/* bit_map's MSB is q0 (from q0 to q7) but, for_each_set_bit parses
+	 * right to left, so we need to revert the 8 bits to get the q index
+	 */
+	grp->rx_bit_map = bitrev8(grp->rx_bit_map);
+	grp->tx_bit_map = bitrev8(grp->tx_bit_map);
+
+	/* Calculate RSTAT, TSTAT, RQUEUE and TQUEUE values,
+	 * also assign queues to groups
+	 */
+	for_each_set_bit(i, &grp->rx_bit_map, priv->num_rx_queues) {
+		if (!grp->rx_queue)
+			grp->rx_queue = priv->rx_queue[i];
+		grp->num_rx_queues++;
+		grp->rstat |= (RSTAT_CLEAR_RHALT >> i);
+		priv->rqueue |= ((RQUEUE_EN0 | RQUEUE_EX0) >> i);
+		priv->rx_queue[i]->grp = grp;
+	}
+
+	for_each_set_bit(i, &grp->tx_bit_map, priv->num_tx_queues) {
+		if (!grp->tx_queue)
+			grp->tx_queue = priv->tx_queue[i];
+		grp->num_tx_queues++;
+		grp->tstat |= (TSTAT_CLEAR_THALT >> i);
+		priv->tqueue |= (TQUEUE_EN0 >> i);
+		priv->tx_queue[i]->grp = grp;
+	}
+
 	priv->num_grps++;
 
 	return 0;
 }
 
+static int gfar_of_group_count(struct device_node *np)
+{
+	struct device_node *child;
+	int num = 0;
+
+	for_each_available_child_of_node(np, child)
+		if (!of_node_cmp(child->name, "queue-group"))
+			num++;
+
+	return num;
+}
+
 static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 {
 	const char *model;
@@ -621,18 +738,54 @@
 	struct gfar_private *priv = NULL;
 	struct device_node *np = ofdev->dev.of_node;
 	struct device_node *child = NULL;
-	const u32 *stash;
-	const u32 *stash_len;
-	const u32 *stash_idx;
+	struct property *stash;
+	u32 stash_len = 0;
+	u32 stash_idx = 0;
 	unsigned int num_tx_qs, num_rx_qs;
-	u32 *tx_queues, *rx_queues;
+	unsigned short mode, poll_mode;
 
-	if (!np || !of_device_is_available(np))
+	if (!np)
 		return -ENODEV;
 
-	/* parse the num of tx and rx queues */
-	tx_queues = (u32 *)of_get_property(np, "fsl,num_tx_queues", NULL);
-	num_tx_qs = tx_queues ? *tx_queues : 1;
+	if (of_device_is_compatible(np, "fsl,etsec2")) {
+		mode = MQ_MG_MODE;
+		poll_mode = GFAR_SQ_POLLING;
+	} else {
+		mode = SQ_SG_MODE;
+		poll_mode = GFAR_SQ_POLLING;
+	}
+
+	if (mode == SQ_SG_MODE) {
+		num_tx_qs = 1;
+		num_rx_qs = 1;
+	} else { /* MQ_MG_MODE */
+		/* get the actual number of supported groups */
+		unsigned int num_grps = gfar_of_group_count(np);
+
+		if (num_grps == 0 || num_grps > MAXGROUPS) {
+			dev_err(&ofdev->dev, "Invalid # of int groups(%d)\n",
+				num_grps);
+			pr_err("Cannot do alloc_etherdev, aborting\n");
+			return -EINVAL;
+		}
+
+		if (poll_mode == GFAR_SQ_POLLING) {
+			num_tx_qs = num_grps; /* one txq per int group */
+			num_rx_qs = num_grps; /* one rxq per int group */
+		} else { /* GFAR_MQ_POLLING */
+			u32 tx_queues, rx_queues;
+			int ret;
+
+			/* parse the num of HW tx and rx queues */
+			ret = of_property_read_u32(np, "fsl,num_tx_queues",
+						   &tx_queues);
+			num_tx_qs = ret ? 1 : tx_queues;
+
+			ret = of_property_read_u32(np, "fsl,num_rx_queues",
+						   &rx_queues);
+			num_rx_qs = ret ? 1 : rx_queues;
+		}
+	}
 
 	if (num_tx_qs > MAX_TX_QS) {
 		pr_err("num_tx_qs(=%d) greater than MAX_TX_QS(=%d)\n",
@@ -641,9 +794,6 @@
 		return -EINVAL;
 	}
 
-	rx_queues = (u32 *)of_get_property(np, "fsl,num_rx_queues", NULL);
-	num_rx_qs = rx_queues ? *rx_queues : 1;
-
 	if (num_rx_qs > MAX_RX_QS) {
 		pr_err("num_rx_qs(=%d) greater than MAX_RX_QS(=%d)\n",
 		       num_rx_qs, MAX_RX_QS);
@@ -659,84 +809,67 @@
 	priv = netdev_priv(dev);
 	priv->ndev = dev;
 
+	priv->mode = mode;
+	priv->poll_mode = poll_mode;
+
 	priv->num_tx_queues = num_tx_qs;
 	netif_set_real_num_rx_queues(dev, num_rx_qs);
 	priv->num_rx_queues = num_rx_qs;
-	priv->num_grps = 0x0;
+
+	err = gfar_alloc_tx_queues(priv);
+	if (err)
+		goto tx_alloc_failed;
+
+	err = gfar_alloc_rx_queues(priv);
+	if (err)
+		goto rx_alloc_failed;
+
+	err = of_property_read_string(np, "model", &model);
+	if (err) {
+		pr_err("Device model property missing, aborting\n");
+		goto rx_alloc_failed;
+	}
 
 	/* Init Rx queue filer rule set linked list */
 	INIT_LIST_HEAD(&priv->rx_list.list);
 	priv->rx_list.count = 0;
 	mutex_init(&priv->rx_queue_access);
 
-	model = of_get_property(np, "model", NULL);
-
 	for (i = 0; i < MAXGROUPS; i++)
 		priv->gfargrp[i].regs = NULL;
 
 	/* Parse and initialize group specific information */
-	if (of_device_is_compatible(np, "fsl,etsec2")) {
-		priv->mode = MQ_MG_MODE;
-		for_each_child_of_node(np, child) {
+	if (priv->mode == MQ_MG_MODE) {
+		for_each_available_child_of_node(np, child) {
+			if (of_node_cmp(child->name, "queue-group"))
+				continue;
+
 			err = gfar_parse_group(child, priv, model);
 			if (err)
 				goto err_grp_init;
 		}
-	} else {
-		priv->mode = SQ_SG_MODE;
+	} else { /* SQ_SG_MODE */
 		err = gfar_parse_group(np, priv, model);
 		if (err)
 			goto err_grp_init;
 	}
 
-	for (i = 0; i < priv->num_tx_queues; i++)
-		priv->tx_queue[i] = NULL;
-	for (i = 0; i < priv->num_rx_queues; i++)
-		priv->rx_queue[i] = NULL;
-
-	for (i = 0; i < priv->num_tx_queues; i++) {
-		priv->tx_queue[i] = kzalloc(sizeof(struct gfar_priv_tx_q),
-					    GFP_KERNEL);
-		if (!priv->tx_queue[i]) {
-			err = -ENOMEM;
-			goto tx_alloc_failed;
-		}
-		priv->tx_queue[i]->tx_skbuff = NULL;
-		priv->tx_queue[i]->qindex = i;
-		priv->tx_queue[i]->dev = dev;
-		spin_lock_init(&(priv->tx_queue[i]->txlock));
-	}
-
-	for (i = 0; i < priv->num_rx_queues; i++) {
-		priv->rx_queue[i] = kzalloc(sizeof(struct gfar_priv_rx_q),
-					    GFP_KERNEL);
-		if (!priv->rx_queue[i]) {
-			err = -ENOMEM;
-			goto rx_alloc_failed;
-		}
-		priv->rx_queue[i]->rx_skbuff = NULL;
-		priv->rx_queue[i]->qindex = i;
-		priv->rx_queue[i]->dev = dev;
-		spin_lock_init(&(priv->rx_queue[i]->rxlock));
-	}
-
-
-	stash = of_get_property(np, "bd-stash", NULL);
+	stash = of_find_property(np, "bd-stash", NULL);
 
 	if (stash) {
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_BD_STASHING;
 		priv->bd_stash_en = 1;
 	}
 
-	stash_len = of_get_property(np, "rx-stash-len", NULL);
+	err = of_property_read_u32(np, "rx-stash-len", &stash_len);
 
-	if (stash_len)
-		priv->rx_stash_size = *stash_len;
+	if (err == 0)
+		priv->rx_stash_size = stash_len;
 
-	stash_idx = of_get_property(np, "rx-stash-idx", NULL);
+	err = of_property_read_u32(np, "rx-stash-idx", &stash_idx);
 
-	if (stash_idx)
-		priv->rx_stash_index = *stash_idx;
+	if (err == 0)
+		priv->rx_stash_index = stash_idx;
 
 	if (stash_len || stash_idx)
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
@@ -747,53 +880,66 @@
 		memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
 
 	if (model && !strcasecmp(model, "TSEC"))
-		priv->device_flags = FSL_GIANFAR_DEV_HAS_GIGABIT |
+		priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT |
 				     FSL_GIANFAR_DEV_HAS_COALESCE |
 				     FSL_GIANFAR_DEV_HAS_RMON |
 				     FSL_GIANFAR_DEV_HAS_MULTI_INTR;
 
 	if (model && !strcasecmp(model, "eTSEC"))
-		priv->device_flags = FSL_GIANFAR_DEV_HAS_GIGABIT |
+		priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT |
 				     FSL_GIANFAR_DEV_HAS_COALESCE |
 				     FSL_GIANFAR_DEV_HAS_RMON |
 				     FSL_GIANFAR_DEV_HAS_MULTI_INTR |
-				     FSL_GIANFAR_DEV_HAS_PADDING |
 				     FSL_GIANFAR_DEV_HAS_CSUM |
 				     FSL_GIANFAR_DEV_HAS_VLAN |
 				     FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
 				     FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
-				     FSL_GIANFAR_DEV_HAS_TIMER;
+				     FSL_GIANFAR_DEV_HAS_TIMER |
+				     FSL_GIANFAR_DEV_HAS_RX_FILER;
 
-	ctype = of_get_property(np, "phy-connection-type", NULL);
+	err = of_property_read_string(np, "phy-connection-type", &ctype);
 
 	/* We only care about rgmii-id.  The rest are autodetected */
-	if (ctype && !strcmp(ctype, "rgmii-id"))
+	if (err == 0 && !strcmp(ctype, "rgmii-id"))
 		priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
 	else
 		priv->interface = PHY_INTERFACE_MODE_MII;
 
-	if (of_get_property(np, "fsl,magic-packet", NULL))
+	if (of_find_property(np, "fsl,magic-packet", NULL))
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
 
+	if (of_get_property(np, "fsl,wake-on-filer", NULL))
+		priv->device_flags |= FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER;
+
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
 
+	/* In the case of a fixed PHY, the DT node associated
+	 * to the PHY is the Ethernet MAC DT node.
+	 */
+	if (!priv->phy_node && of_phy_is_fixed_link(np)) {
+		err = of_phy_register_fixed_link(np);
+		if (err)
+			goto err_grp_init;
+
+		priv->phy_node = of_node_get(np);
+	}
+
 	/* Find the TBI PHY.  If it's not there, we don't support SGMII */
 	priv->tbi_node = of_parse_phandle(np, "tbi-handle", 0);
 
 	return 0;
 
-rx_alloc_failed:
-	free_rx_pointers(priv);
-tx_alloc_failed:
-	free_tx_pointers(priv);
 err_grp_init:
 	unmap_group_regs(priv);
+rx_alloc_failed:
+	gfar_free_rx_queues(priv);
+tx_alloc_failed:
+	gfar_free_tx_queues(priv);
 	free_gfar_dev(priv);
 	return err;
 }
 
-static int gfar_hwtstamp_ioctl(struct net_device *netdev,
-			       struct ifreq *ifr, int cmd)
+static int gfar_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
 {
 	struct hwtstamp_config config;
 	struct gfar_private *priv = netdev_priv(netdev);
@@ -821,18 +967,16 @@
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		if (priv->hwts_rx_en) {
-			stop_gfar(netdev);
 			priv->hwts_rx_en = 0;
-			startup_gfar(netdev);
+			reset_gfar(netdev);
 		}
 		break;
 	default:
 		if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER))
 			return -ERANGE;
 		if (!priv->hwts_rx_en) {
-			stop_gfar(netdev);
 			priv->hwts_rx_en = 1;
-			startup_gfar(netdev);
+			reset_gfar(netdev);
 		}
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
 		break;
@@ -842,7 +986,20 @@
 		-EFAULT : 0;
 }
 
-/* Ioctl MII Interface */
+static int gfar_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	struct gfar_private *priv = netdev_priv(netdev);
+
+	config.flags = 0;
+	config.tx_type = priv->hwts_tx_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	config.rx_filter = (priv->hwts_rx_en ?
+			    HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
 static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct gfar_private *priv = netdev_priv(dev);
@@ -851,7 +1008,9 @@
 		return -EINVAL;
 
 	if (cmd == SIOCSHWTSTAMP)
-		return gfar_hwtstamp_ioctl(dev, rq, cmd);
+		return gfar_hwtstamp_set(dev, rq);
+	if (cmd == SIOCGHWTSTAMP)
+		return gfar_hwtstamp_get(dev, rq);
 
 	if (!priv->phydev)
 		return -ENODEV;
@@ -859,19 +1018,6 @@
 	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
-static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs)
-{
-	unsigned int new_bit_map = 0x0;
-	int mask = 0x1 << (max_qs - 1), i;
-
-	for (i = 0; i < max_qs; i++) {
-		if (bit_map & mask)
-			new_bit_map = new_bit_map + (1 << i);
-		mask = mask >> 0x1;
-	}
-	return new_bit_map;
-}
-
 static u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar,
 				   u32 class)
 {
@@ -939,9 +1085,9 @@
 	}
 }
 
-static void gfar_detect_errata(struct gfar_private *priv)
+#ifdef CONFIG_PPC
+static void __gfar_detect_errata_83xx(struct gfar_private *priv)
 {
-	struct device *dev = &priv->ofdev->dev;
 	unsigned int pvr = mfspr(SPRN_PVR);
 	unsigned int svr = mfspr(SPRN_SVR);
 	unsigned int mod = (svr >> 16) & 0xfff6; /* w/o E suffix */
@@ -957,103 +1103,174 @@
 	    (pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
 		priv->errata |= GFAR_ERRATA_76;
 
-	/* MPC8313 and MPC837x all rev */
-	if ((pvr == 0x80850010 && mod == 0x80b0) ||
-	    (pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
-		priv->errata |= GFAR_ERRATA_A002;
+	/* MPC8313 Rev < 2.0 */
+	if (pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020)
+		priv->errata |= GFAR_ERRATA_12;
+}
 
-	/* MPC8313 Rev < 2.0, MPC8548 rev 2.0 */
-	if ((pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020) ||
-	    (pvr == 0x80210020 && mod == 0x8030 && rev == 0x0020))
+static void __gfar_detect_errata_85xx(struct gfar_private *priv)
+{
+	unsigned int svr = mfspr(SPRN_SVR);
+
+	if ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) == 0x20))
 		priv->errata |= GFAR_ERRATA_12;
+	if (((SVR_SOC_VER(svr) == SVR_P2020) && (SVR_REV(svr) < 0x20)) ||
+	    ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)))
+		priv->errata |= GFAR_ERRATA_76; /* aka eTSEC 20 */
+}
+#endif
+
+static void gfar_detect_errata(struct gfar_private *priv)
+{
+	struct device *dev = &priv->ofdev->dev;
+
+	/* no plans to fix */
+	priv->errata |= GFAR_ERRATA_A002;
+
+#ifdef CONFIG_PPC
+	if (pvr_version_is(PVR_VER_E500V1) || pvr_version_is(PVR_VER_E500V2))
+		__gfar_detect_errata_85xx(priv);
+	else /* non-mpc85xx parts, i.e. e300 core based */
+		__gfar_detect_errata_83xx(priv);
+#endif
 
 	if (priv->errata)
 		dev_info(dev, "enabled errata workarounds, flags: 0x%x\n",
 			 priv->errata);
 }
 
-/* Set up the ethernet device structure, private data,
- * and anything else we need before we start
- */
-static int gfar_probe(struct platform_device *ofdev)
+void gfar_mac_reset(struct gfar_private *priv)
 {
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
-	struct net_device *dev = NULL;
-	struct gfar_private *priv = NULL;
-	struct gfar __iomem *regs = NULL;
-	int err = 0, i, grp_idx = 0;
-	u32 rstat = 0, tstat = 0, rqueue = 0, tqueue = 0;
-	u32 isrg = 0;
-	u32 __iomem *baddr;
-
-	err = gfar_of_init(ofdev, &dev);
-
-	if (err)
-		return err;
-
-	priv = netdev_priv(dev);
-	priv->ndev = dev;
-	priv->ofdev = ofdev;
-	priv->dev = &ofdev->dev;
-	SET_NETDEV_DEV(dev, &ofdev->dev);
 
-	spin_lock_init(&priv->bflock);
-	INIT_WORK(&priv->reset_task, gfar_reset_task);
-
-	dev_set_drvdata(&ofdev->dev, priv);
-	regs = priv->gfargrp[0].regs;
+	/* Reset MAC layer */
+	gfar_write(&regs->maccfg1, MACCFG1_SOFT_RESET);
 
-	gfar_detect_errata(priv);
+	/* We need to delay at least 3 TX clocks */
+	udelay(3);
 
-	/* Stop the DMA engine now, in case it was running before
-	 * (The firmware could have used it, and left it running).
+	/* the soft reset bit is not self-resetting, so we need to
+	 * clear it before resuming normal operation
 	 */
-	gfar_halt(dev);
+	gfar_write(&regs->maccfg1, 0);
 
-	/* Reset MAC layer */
-	gfar_write(&regs->maccfg1, MACCFG1_SOFT_RESET);
+	udelay(3);
 
-	/* We need to delay at least 3 TX clocks */
-	udelay(2);
+	gfar_rx_offload_en(priv);
 
-	tempval = (MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
-	gfar_write(&regs->maccfg1, tempval);
+	/* Initialize the max receive frame/buffer lengths */
+	gfar_write(&regs->maxfrm, GFAR_JUMBO_FRAME_SIZE);
+	gfar_write(&regs->mrblr, GFAR_RXB_SIZE);
+
+	/* Initialize the Minimum Frame Length Register */
+	gfar_write(&regs->minflr, MINFLR_INIT_SETTINGS);
 
 	/* Initialize MACCFG2. */
 	tempval = MACCFG2_INIT_SETTINGS;
+
+	/* eTSEC74 erratum: Rx frames of length MAXFRM or MAXFRM-1
+	 * are marked as truncated.  Avoid this by MACCFG2[Huge Frame]=1,
+	 * and by checking RxBD[LG] and discarding larger than MAXFRM.
+	 */
 	if (gfar_has_errata(priv, GFAR_ERRATA_74))
 		tempval |= MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK;
+
 	gfar_write(&regs->maccfg2, tempval);
 
+	/* Clear mac addr hash registers */
+	gfar_write(&regs->igaddr0, 0);
+	gfar_write(&regs->igaddr1, 0);
+	gfar_write(&regs->igaddr2, 0);
+	gfar_write(&regs->igaddr3, 0);
+	gfar_write(&regs->igaddr4, 0);
+	gfar_write(&regs->igaddr5, 0);
+	gfar_write(&regs->igaddr6, 0);
+	gfar_write(&regs->igaddr7, 0);
+
+	gfar_write(&regs->gaddr0, 0);
+	gfar_write(&regs->gaddr1, 0);
+	gfar_write(&regs->gaddr2, 0);
+	gfar_write(&regs->gaddr3, 0);
+	gfar_write(&regs->gaddr4, 0);
+	gfar_write(&regs->gaddr5, 0);
+	gfar_write(&regs->gaddr6, 0);
+	gfar_write(&regs->gaddr7, 0);
+
+	if (priv->extended_hash)
+		gfar_clear_exact_match(priv->ndev);
+
+	gfar_mac_rx_config(priv);
+
+	gfar_mac_tx_config(priv);
+
+	gfar_set_mac_address(priv->ndev);
+
+	gfar_set_multi(priv->ndev);
+
+	/* clear ievent and imask before configuring coalescing */
+	gfar_ints_disable(priv);
+
+	/* Configure the coalescing support */
+	gfar_configure_coalescing_all(priv);
+}
+
+static void gfar_hw_init(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 attrs;
+
+	/* Stop the DMA engine now, in case it was running before
+	 * (The firmware could have used it, and left it running).
+	 */
+	gfar_halt(priv);
+
+	gfar_mac_reset(priv);
+
+	/* Zero out the rmon mib registers if it has them */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+		memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib));
+
+		/* Mask off the CAM interrupts */
+		gfar_write(&regs->rmon.cam1, 0xffffffff);
+		gfar_write(&regs->rmon.cam2, 0xffffffff);
+	}
+
 	/* Initialize ECNTRL */
 	gfar_write(&regs->ecntrl, ECNTRL_INIT_SETTINGS);
 
-	/* Set the dev->base_addr to the gfar reg region */
-	dev->base_addr = (unsigned long) regs;
+	/* Set the extraction length and index */
+	attrs = ATTRELI_EL(priv->rx_stash_size) |
+		ATTRELI_EI(priv->rx_stash_index);
 
-	/* Fill in the dev structure */
-	dev->watchdog_timeo = TX_TIMEOUT;
-	dev->mtu = 1500;
-	dev->netdev_ops = &gfar_netdev_ops;
-	dev->ethtool_ops = &gfar_ethtool_ops;
+	gfar_write(&regs->attreli, attrs);
 
-	/* Register for napi ...We are registering NAPI for each grp */
-	for (i = 0; i < priv->num_grps; i++)
-		netif_napi_add(dev, &priv->gfargrp[i].napi, gfar_poll,
-			       GFAR_DEV_WEIGHT);
+	/* Start with defaults, and add stashing
+	 * depending on driver parameters
+	 */
+	attrs = ATTR_INIT_SETTINGS;
 
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
-		dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
-				   NETIF_F_RXCSUM;
-		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG |
-				 NETIF_F_RXCSUM | NETIF_F_HIGHDMA;
-	}
+	if (priv->bd_stash_en)
+		attrs |= ATTR_BDSTASH;
 
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
-		dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
-				    NETIF_F_HW_VLAN_CTAG_RX;
-		dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
-	}
+	if (priv->rx_stash_size != 0)
+		attrs |= ATTR_BUFSTASH;
+
+	gfar_write(&regs->attr, attrs);
+
+	/* FIFO configs */
+	gfar_write(&regs->fifo_tx_thr, DEFAULT_FIFO_TX_THR);
+	gfar_write(&regs->fifo_tx_starve, DEFAULT_FIFO_TX_STARVE);
+	gfar_write(&regs->fifo_tx_starve_shutoff, DEFAULT_FIFO_TX_STARVE_OFF);
+
+	/* Program the interrupt steering regs, only for MG devices */
+	if (priv->num_grps > 1)
+		gfar_write_isrg(priv);
+}
+
+static void gfar_init_addr_hash_table(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
 		priv->extended_hash = 1;
@@ -1089,70 +1306,82 @@
 		priv->hash_regs[6] = &regs->gaddr6;
 		priv->hash_regs[7] = &regs->gaddr7;
 	}
+}
 
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_PADDING)
-		priv->padding = DEFAULT_PADDING;
-	else
-		priv->padding = 0;
+/* Set up the ethernet device structure, private data,
+ * and anything else we need before we start
+ */
+static int gfar_probe(struct platform_device *ofdev)
+{
+	struct net_device *dev = NULL;
+	struct gfar_private *priv = NULL;
+	int err = 0, i;
 
-	if (dev->features & NETIF_F_IP_CSUM ||
-	    priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
-		dev->needed_headroom = GMAC_FCB_LEN;
+	err = gfar_of_init(ofdev, &dev);
 
-	/* Program the isrg regs only if number of grps > 1 */
-	if (priv->num_grps > 1) {
-		baddr = &regs->isrg0;
-		for (i = 0; i < priv->num_grps; i++) {
-			isrg |= (priv->gfargrp[i].rx_bit_map << ISRG_SHIFT_RX);
-			isrg |= (priv->gfargrp[i].tx_bit_map << ISRG_SHIFT_TX);
-			gfar_write(baddr, isrg);
-			baddr++;
-			isrg = 0x0;
+	if (err)
+		return err;
+
+	priv = netdev_priv(dev);
+	priv->ndev = dev;
+	priv->ofdev = ofdev;
+	priv->dev = &ofdev->dev;
+	SET_NETDEV_DEV(dev, &ofdev->dev);
+
+	INIT_WORK(&priv->reset_task, gfar_reset_task);
+
+	platform_set_drvdata(ofdev, priv);
+
+	gfar_detect_errata(priv);
+
+	/* Set the dev->base_addr to the gfar reg region */
+	dev->base_addr = (unsigned long) priv->gfargrp[0].regs;
+
+	/* Fill in the dev structure */
+	dev->watchdog_timeo = TX_TIMEOUT;
+	dev->mtu = 1500;
+	dev->netdev_ops = &gfar_netdev_ops;
+	dev->ethtool_ops = &gfar_ethtool_ops;
+
+	/* Register for napi ...We are registering NAPI for each grp */
+	for (i = 0; i < priv->num_grps; i++) {
+		if (priv->poll_mode == GFAR_SQ_POLLING) {
+			netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
+				       gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
+			netif_napi_add(dev, &priv->gfargrp[i].napi_tx,
+				       gfar_poll_tx_sq, 2);
+		} else {
+			netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
+				       gfar_poll_rx, GFAR_DEV_WEIGHT);
+			netif_napi_add(dev, &priv->gfargrp[i].napi_tx,
+				       gfar_poll_tx, 2);
 		}
 	}
 
-	/* Need to reverse the bit maps as  bit_map's MSB is q0
-	 * but, for_each_set_bit parses from right to left, which
-	 * basically reverses the queue numbers
-	 */
-	for (i = 0; i< priv->num_grps; i++) {
-		priv->gfargrp[i].tx_bit_map =
-			reverse_bitmap(priv->gfargrp[i].tx_bit_map, MAX_TX_QS);
-		priv->gfargrp[i].rx_bit_map =
-			reverse_bitmap(priv->gfargrp[i].rx_bit_map, MAX_RX_QS);
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
+		dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
+				   NETIF_F_RXCSUM;
+		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG |
+				 NETIF_F_RXCSUM | NETIF_F_HIGHDMA;
+	}
+
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
+		dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_HW_VLAN_CTAG_RX;
+		dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
 	}
 
-	/* Calculate RSTAT, TSTAT, RQUEUE and TQUEUE values,
-	 * also assign queues to groups
-	 */
-	for (grp_idx = 0; grp_idx < priv->num_grps; grp_idx++) {
-		priv->gfargrp[grp_idx].num_rx_queues = 0x0;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
-		for_each_set_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
-				 priv->num_rx_queues) {
-			priv->gfargrp[grp_idx].num_rx_queues++;
-			priv->rx_queue[i]->grp = &priv->gfargrp[grp_idx];
-			rstat = rstat | (RSTAT_CLEAR_RHALT >> i);
-			rqueue = rqueue | ((RQUEUE_EN0 | RQUEUE_EX0) >> i);
-		}
-		priv->gfargrp[grp_idx].num_tx_queues = 0x0;
-
-		for_each_set_bit(i, &priv->gfargrp[grp_idx].tx_bit_map,
-				 priv->num_tx_queues) {
-			priv->gfargrp[grp_idx].num_tx_queues++;
-			priv->tx_queue[i]->grp = &priv->gfargrp[grp_idx];
-			tstat = tstat | (TSTAT_CLEAR_THALT >> i);
-			tqueue = tqueue | (TQUEUE_EN0 >> i);
-		}
-		priv->gfargrp[grp_idx].rstat = rstat;
-		priv->gfargrp[grp_idx].tstat = tstat;
-		rstat = tstat =0;
-	}
+	gfar_init_addr_hash_table(priv);
 
-	gfar_write(&regs->rqueue, rqueue);
-	gfar_write(&regs->tqueue, tqueue);
+	/* Insert receive time stamps into padding alignment bytes */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
+		priv->padding = 8;
 
-	priv->rx_buffer_size = DEFAULT_RX_BUFFER_SIZE;
+	if (dev->features & NETIF_F_IP_CSUM ||
+	    priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER)
+		dev->needed_headroom = GMAC_FCB_LEN;
 
 	/* Initializing some of the rx/tx queue level parameters */
 	for (i = 0; i < priv->num_tx_queues; i++) {
@@ -1168,14 +1397,19 @@
 		priv->rx_queue[i]->rxic = DEFAULT_RXIC;
 	}
 
-	/* always enable rx filer */
-	priv->rx_filer_enable = 1;
+	/* Always enable rx filer if available */
+	priv->rx_filer_enable =
+	    (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
 	/* Enable most messages by default */
 	priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
 	/* use pritority h/w tx queue scheduling for single queue devices */
 	if (priv->num_tx_queues == 1)
 		priv->prio_sched_en = 1;
 
+	set_bit(GFAR_DOWN, &priv->state);
+
+	gfar_hw_init(priv);
+
 	/* Carrier starts down, phylib will bring it up */
 	netif_carrier_off(dev);
 
@@ -1186,9 +1420,14 @@
 		goto register_fail;
 	}
 
-	device_init_wakeup(&dev->dev,
-			   priv->device_flags &
-			   FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET)
+		priv->wol_supported |= GFAR_WOL_MAGIC;
+
+	if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER) &&
+	    priv->rx_filer_enable)
+		priv->wol_supported |= GFAR_WOL_FILER_UCAST;
+
+	device_set_wakeup_capable(&ofdev->dev, priv->wol_supported);
 
 	/* fill out IRQ number and name fields */
 	for (i = 0; i < priv->num_grps; i++) {
@@ -1207,9 +1446,6 @@
 	/* Initialize the filer table */
 	gfar_init_filer_table(priv);
 
-	/* Create all the sysfs files */
-	gfar_init_sysfs(dev);
-
 	/* Print out the device info */
 	netdev_info(dev, "mac: %pM\n", dev->dev_addr);
 
@@ -1228,29 +1464,25 @@
 
 register_fail:
 	unmap_group_regs(priv);
-	free_tx_pointers(priv);
-	free_rx_pointers(priv);
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
-	if (priv->tbi_node)
-		of_node_put(priv->tbi_node);
+	gfar_free_rx_queues(priv);
+	gfar_free_tx_queues(priv);
+	of_node_put(priv->phy_node);
+	of_node_put(priv->tbi_node);
 	free_gfar_dev(priv);
 	return err;
 }
 
 static int gfar_remove(struct platform_device *ofdev)
 {
-	struct gfar_private *priv = dev_get_drvdata(&ofdev->dev);
-
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
-	if (priv->tbi_node)
-		of_node_put(priv->tbi_node);
+	struct gfar_private *priv = platform_get_drvdata(ofdev);
 
-	dev_set_drvdata(&ofdev->dev, NULL);
+	of_node_put(priv->phy_node);
+	of_node_put(priv->tbi_node);
 
 	unregister_netdev(priv->ndev);
 	unmap_group_regs(priv);
+	gfar_free_rx_queues(priv);
+	gfar_free_tx_queues(priv);
 	free_gfar_dev(priv);
 
 	return 0;
@@ -1258,55 +1490,153 @@
 
 #ifdef CONFIG_PM
 
+static void __gfar_filer_disable(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 temp;
+
+	temp = gfar_read(&regs->rctrl);
+	temp &= ~(RCTRL_FILREN | RCTRL_PRSDEP_INIT);
+	gfar_write(&regs->rctrl, temp);
+}
+
+static void __gfar_filer_enable(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 temp;
+
+	temp = gfar_read(&regs->rctrl);
+	temp |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
+	gfar_write(&regs->rctrl, temp);
+}
+
+/* Filer rules implementing wol capabilities */
+static void gfar_filer_config_wol(struct gfar_private *priv)
+{
+	unsigned int i;
+	u32 rqfcr;
+
+	__gfar_filer_disable(priv);
+
+	/* clear the filer table, reject any packet by default */
+	rqfcr = RQFCR_RJE | RQFCR_CMP_MATCH;
+	for (i = 0; i <= MAX_FILER_IDX; i++)
+		gfar_write_filer(priv, i, rqfcr, 0);
+
+	i = 0;
+	if (priv->wol_opts & GFAR_WOL_FILER_UCAST) {
+		/* unicast packet, accept it */
+		struct net_device *ndev = priv->ndev;
+		/* get the default rx queue index */
+		u8 qindex = (u8)priv->gfargrp[0].rx_queue->qindex;
+		u32 dest_mac_addr = (ndev->dev_addr[0] << 16) |
+				    (ndev->dev_addr[1] << 8) |
+				     ndev->dev_addr[2];
+
+		rqfcr = (qindex << 10) | RQFCR_AND |
+			RQFCR_CMP_EXACT | RQFCR_PID_DAH;
+
+		gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
+
+		dest_mac_addr = (ndev->dev_addr[3] << 16) |
+				(ndev->dev_addr[4] << 8) |
+				 ndev->dev_addr[5];
+		rqfcr = (qindex << 10) | RQFCR_GPI |
+			RQFCR_CMP_EXACT | RQFCR_PID_DAL;
+		gfar_write_filer(priv, i++, rqfcr, dest_mac_addr);
+	}
+
+	__gfar_filer_enable(priv);
+}
+
+static void gfar_filer_restore_table(struct gfar_private *priv)
+{
+	u32 rqfcr, rqfpr;
+	unsigned int i;
+
+	__gfar_filer_disable(priv);
+
+	for (i = 0; i <= MAX_FILER_IDX; i++) {
+		rqfcr = priv->ftp_rqfcr[i];
+		rqfpr = priv->ftp_rqfpr[i];
+		gfar_write_filer(priv, i, rqfcr, rqfpr);
+	}
+
+	__gfar_filer_enable(priv);
+}
+
+/* gfar_start() for Rx only and with the FGPI filer interrupt enabled */
+static void gfar_start_wol_filer(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	u32 tempval;
+	int i = 0;
+
+	/* Enable Rx hw queues */
+	gfar_write(&regs->rqueue, priv->rqueue);
+
+	/* Initialize DMACTRL to have WWR and WOP */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval |= DMACTRL_INIT_SETTINGS;
+	gfar_write(&regs->dmactrl, tempval);
+
+	/* Make sure we aren't stopped */
+	tempval = gfar_read(&regs->dmactrl);
+	tempval &= ~DMACTRL_GRS;
+	gfar_write(&regs->dmactrl, tempval);
+
+	for (i = 0; i < priv->num_grps; i++) {
+		regs = priv->gfargrp[i].regs;
+		/* Clear RHLT, so that the DMA starts polling now */
+		gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
+		/* enable the Filer General Purpose Interrupt */
+		gfar_write(&regs->imask, IMASK_FGPI);
+	}
+
+	/* Enable Rx DMA */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval |= MACCFG1_RX_EN;
+	gfar_write(&regs->maccfg1, tempval);
+}
+
 static int gfar_suspend(struct device *dev)
 {
 	struct gfar_private *priv = dev_get_drvdata(dev);
 	struct net_device *ndev = priv->ndev;
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	unsigned long flags;
 	u32 tempval;
+	u16 wol = priv->wol_opts;
 
-	int magic_packet = priv->wol_en &&
-			   (priv->device_flags &
-			    FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+	if (!netif_running(ndev))
+		return 0;
 
+	disable_napi(priv);
+	netif_tx_lock(ndev);
 	netif_device_detach(ndev);
+	netif_tx_unlock(ndev);
 
-	if (netif_running(ndev)) {
+	gfar_halt(priv);
 
-		local_irq_save(flags);
-		lock_tx_qs(priv);
-		lock_rx_qs(priv);
+	if (wol & GFAR_WOL_MAGIC) {
+		/* Enable interrupt on Magic Packet */
+		gfar_write(&regs->imask, IMASK_MAG);
 
-		gfar_halt_nodisable(ndev);
+		/* Enable Magic Packet mode */
+		tempval = gfar_read(&regs->maccfg2);
+		tempval |= MACCFG2_MPEN;
+		gfar_write(&regs->maccfg2, tempval);
 
-		/* Disable Tx, and Rx if wake-on-LAN is disabled. */
+		/* re-enable the Rx block */
 		tempval = gfar_read(&regs->maccfg1);
-
-		tempval &= ~MACCFG1_TX_EN;
-
-		if (!magic_packet)
-			tempval &= ~MACCFG1_RX_EN;
-
+		tempval |= MACCFG1_RX_EN;
 		gfar_write(&regs->maccfg1, tempval);
 
-		unlock_rx_qs(priv);
-		unlock_tx_qs(priv);
-		local_irq_restore(flags);
-
-		disable_napi(priv);
-
-		if (magic_packet) {
-			/* Enable interrupt on Magic Packet */
-			gfar_write(&regs->imask, IMASK_MAG);
-
-			/* Enable Magic Packet mode */
-			tempval = gfar_read(&regs->maccfg2);
-			tempval |= MACCFG2_MPEN;
-			gfar_write(&regs->maccfg2, tempval);
-		} else {
-			phy_stop(priv->phydev);
-		}
+	} else if (wol & GFAR_WOL_FILER_UCAST) {
+		gfar_filer_config_wol(priv);
+		gfar_start_wol_filer(priv);
+
+	} else {
+		phy_stop(priv->phydev);
 	}
 
 	return 0;
@@ -1317,39 +1647,30 @@
 	struct gfar_private *priv = dev_get_drvdata(dev);
 	struct net_device *ndev = priv->ndev;
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	unsigned long flags;
 	u32 tempval;
-	int magic_packet = priv->wol_en &&
-			   (priv->device_flags &
-			    FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+	u16 wol = priv->wol_opts;
 
-	if (!netif_running(ndev)) {
-		netif_device_attach(ndev);
+	if (!netif_running(ndev))
 		return 0;
-	}
 
-	if (!magic_packet && priv->phydev)
-		phy_start(priv->phydev);
-
-	/* Disable Magic Packet mode, in case something
-	 * else woke us up.
-	 */
-	local_irq_save(flags);
-	lock_tx_qs(priv);
-	lock_rx_qs(priv);
+	if (wol & GFAR_WOL_MAGIC) {
+		/* Disable Magic Packet mode */
+		tempval = gfar_read(&regs->maccfg2);
+		tempval &= ~MACCFG2_MPEN;
+		gfar_write(&regs->maccfg2, tempval);
 
-	tempval = gfar_read(&regs->maccfg2);
-	tempval &= ~MACCFG2_MPEN;
-	gfar_write(&regs->maccfg2, tempval);
+	} else if (wol & GFAR_WOL_FILER_UCAST) {
+		/* need to stop rx only, tx is already down */
+		gfar_halt(priv);
+		gfar_filer_restore_table(priv);
 
-	gfar_start(ndev);
+	} else {
+		phy_start(priv->phydev);
+	}
 
-	unlock_rx_qs(priv);
-	unlock_tx_qs(priv);
-	local_irq_restore(flags);
+	gfar_start(priv);
 
 	netif_device_attach(ndev);
-
 	enable_napi(priv);
 
 	return 0;
@@ -1366,15 +1687,13 @@
 		return 0;
 	}
 
-	if (gfar_init_bds(ndev)) {
-		free_skb_resources(priv);
-		return -ENOMEM;
-	}
+	gfar_init_bds(ndev);
 
-	init_registers(ndev);
-	gfar_set_mac_address(ndev);
-	gfar_init_mac(ndev);
-	gfar_start(ndev);
+	gfar_mac_reset(priv);
+
+	gfar_init_tx_rx_base(priv);
+
+	gfar_start(priv);
 
 	priv->oldlink = 0;
 	priv->oldspeed = 0;
@@ -1458,7 +1777,7 @@
 	struct gfar_private *priv = netdev_priv(dev);
 	uint gigabit_support =
 		priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-		SUPPORTED_1000baseT_Full : 0;
+		GFAR_SUPPORTED_GBIT : 0;
 	phy_interface_t interface;
 
 	priv->oldlink = 0;
@@ -1469,9 +1788,6 @@
 
 	priv->phydev = of_phy_connect(dev, priv->phy_node, &adjust_link, 0,
 				      interface);
-	if (!priv->phydev)
-		priv->phydev = of_phy_connect_fixed_link(dev, &adjust_link,
-							 interface);
 	if (!priv->phydev) {
 		dev_err(&dev->dev, "could not attach to PHY\n");
 		return -ENODEV;
@@ -1484,6 +1800,9 @@
 	priv->phydev->supported &= (GFAR_SUPPORTED | gigabit_support);
 	priv->phydev->advertising = priv->phydev->supported;
 
+	/* Add support for flow control, but don't advertise it by default */
+	priv->phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+
 	return 0;
 }
 
@@ -1517,8 +1836,10 @@
 	 * everything for us?  Resetting it takes the link down and requires
 	 * several seconds for it to come back.
 	 */
-	if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS)
+	if (phy_read(tbiphy, MII_BMSR) & BMSR_LSTATUS) {
+		put_device(&tbiphy->dev);
 		return;
+	}
 
 	/* Single clk mode, mii mode off(for serdes communication) */
 	phy_write(tbiphy, MII_TBICON, TBICON_CLK_SELECT);
@@ -1530,57 +1851,8 @@
 	phy_write(tbiphy, MII_BMCR,
 		  BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX |
 		  BMCR_SPEED1000);
-}
-
-static void init_registers(struct net_device *dev)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = NULL;
-	int i;
-
-	for (i = 0; i < priv->num_grps; i++) {
-		regs = priv->gfargrp[i].regs;
-		/* Clear IEVENT */
-		gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
-
-		/* Initialize IMASK */
-		gfar_write(&regs->imask, IMASK_INIT_CLEAR);
-	}
-
-	regs = priv->gfargrp[0].regs;
-	/* Init hash registers to zero */
-	gfar_write(&regs->igaddr0, 0);
-	gfar_write(&regs->igaddr1, 0);
-	gfar_write(&regs->igaddr2, 0);
-	gfar_write(&regs->igaddr3, 0);
-	gfar_write(&regs->igaddr4, 0);
-	gfar_write(&regs->igaddr5, 0);
-	gfar_write(&regs->igaddr6, 0);
-	gfar_write(&regs->igaddr7, 0);
-
-	gfar_write(&regs->gaddr0, 0);
-	gfar_write(&regs->gaddr1, 0);
-	gfar_write(&regs->gaddr2, 0);
-	gfar_write(&regs->gaddr3, 0);
-	gfar_write(&regs->gaddr4, 0);
-	gfar_write(&regs->gaddr5, 0);
-	gfar_write(&regs->gaddr6, 0);
-	gfar_write(&regs->gaddr7, 0);
-
-	/* Zero out the rmon mib registers if it has them */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
-		memset_io(&(regs->rmon), 0, sizeof (struct rmon_mib));
 
-		/* Mask off the CAM interrupts */
-		gfar_write(&regs->rmon.cam1, 0xffffffff);
-		gfar_write(&regs->rmon.cam2, 0xffffffff);
-	}
-
-	/* Initialize the max receive buffer length */
-	gfar_write(&regs->mrblr, priv->rx_buffer_size);
-
-	/* Initialize the Minimum Frame Length Register */
-	gfar_write(&regs->minflr, MINFLR_INIT_SETTINGS);
+	put_device(&tbiphy->dev);
 }
 
 static int __gfar_is_rx_idle(struct gfar_private *priv)
@@ -1590,7 +1862,7 @@
 	/* Normaly TSEC should not hang on GRS commands, so we should
 	 * actually wait for IEVENT_GRSC flag.
 	 */
-	if (likely(!gfar_has_errata(priv, GFAR_ERRATA_A002)))
+	if (!gfar_has_errata(priv, GFAR_ERRATA_A002))
 		return 0;
 
 	/* Read the eTSEC register at offset 0xD1C. If bits 7-14 are
@@ -1606,93 +1878,74 @@
 }
 
 /* Halt the receive and transmit queues */
-static void gfar_halt_nodisable(struct net_device *dev)
+static void gfar_halt_nodisable(struct gfar_private *priv)
 {
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = NULL;
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
-	int i;
+	unsigned int timeout;
+	int stopped;
 
-	for (i = 0; i < priv->num_grps; i++) {
-		regs = priv->gfargrp[i].regs;
-		/* Mask all interrupts */
-		gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+	gfar_ints_disable(priv);
 
-		/* Clear all interrupts */
-		gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
-	}
+	if (gfar_is_dma_stopped(priv))
+		return;
 
-	regs = priv->gfargrp[0].regs;
 	/* Stop the DMA, and wait for it to stop */
 	tempval = gfar_read(&regs->dmactrl);
-	if ((tempval & (DMACTRL_GRS | DMACTRL_GTS)) !=
-	    (DMACTRL_GRS | DMACTRL_GTS)) {
-		int ret;
-
-		tempval |= (DMACTRL_GRS | DMACTRL_GTS);
-		gfar_write(&regs->dmactrl, tempval);
+	tempval |= (DMACTRL_GRS | DMACTRL_GTS);
+	gfar_write(&regs->dmactrl, tempval);
 
-		do {
-			ret = spin_event_timeout(((gfar_read(&regs->ievent) &
-				 (IEVENT_GRSC | IEVENT_GTSC)) ==
-				 (IEVENT_GRSC | IEVENT_GTSC)), 1000000, 0);
-			if (!ret && !(gfar_read(&regs->ievent) & IEVENT_GRSC))
-				ret = __gfar_is_rx_idle(priv);
-		} while (!ret);
+retry:
+	timeout = 1000;
+	while (!(stopped = gfar_is_dma_stopped(priv)) && timeout) {
+		cpu_relax();
+		timeout--;
 	}
+
+	if (!timeout)
+		stopped = gfar_is_dma_stopped(priv);
+
+	if (!stopped && !gfar_is_rx_dma_stopped(priv) &&
+	    !__gfar_is_rx_idle(priv))
+		goto retry;
 }
 
 /* Halt the receive and transmit queues */
-void gfar_halt(struct net_device *dev)
+void gfar_halt(struct gfar_private *priv)
 {
-	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
 
-	gfar_halt_nodisable(dev);
+	/* Dissable the Rx/Tx hw queues */
+	gfar_write(&regs->rqueue, 0);
+	gfar_write(&regs->tqueue, 0);
+
+	mdelay(10);
+
+	gfar_halt_nodisable(priv);
 
-	/* Disable Rx and Tx */
+	/* Disable Rx/Tx DMA */
 	tempval = gfar_read(&regs->maccfg1);
 	tempval &= ~(MACCFG1_RX_EN | MACCFG1_TX_EN);
 	gfar_write(&regs->maccfg1, tempval);
 }
 
-static void free_grp_irqs(struct gfar_priv_grp *grp)
-{
-	free_irq(gfar_irq(grp, TX)->irq, grp);
-	free_irq(gfar_irq(grp, RX)->irq, grp);
-	free_irq(gfar_irq(grp, ER)->irq, grp);
-}
-
 void stop_gfar(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	unsigned long flags;
-	int i;
 
-	phy_stop(priv->phydev);
+	netif_tx_stop_all_queues(dev);
 
+	smp_mb__before_atomic();
+	set_bit(GFAR_DOWN, &priv->state);
+	smp_mb__after_atomic();
 
-	/* Lock it down */
-	local_irq_save(flags);
-	lock_tx_qs(priv);
-	lock_rx_qs(priv);
-
-	gfar_halt(dev);
-
-	unlock_rx_qs(priv);
-	unlock_tx_qs(priv);
-	local_irq_restore(flags);
+	disable_napi(priv);
 
-	/* Free the IRQs */
-	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-		for (i = 0; i < priv->num_grps; i++)
-			free_grp_irqs(&priv->gfargrp[i]);
-	} else {
-		for (i = 0; i < priv->num_grps; i++)
-			free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
-				 &priv->gfargrp[i]);
-	}
+	/* disable ints and gracefully shut down Rx/Tx DMA */
+	gfar_halt(priv);
+
+	phy_stop(priv->phydev);
 
 	free_skb_resources(priv);
 }
@@ -1709,14 +1962,15 @@
 		if (!tx_queue->tx_skbuff[i])
 			continue;
 
-		dma_unmap_single(priv->dev, txbdp->bufPtr,
-				 txbdp->length, DMA_TO_DEVICE);
+		dma_unmap_single(priv->dev, be32_to_cpu(txbdp->bufPtr),
+				 be16_to_cpu(txbdp->length), DMA_TO_DEVICE);
 		txbdp->lstatus = 0;
 		for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
 		     j++) {
 			txbdp++;
-			dma_unmap_page(priv->dev, txbdp->bufPtr,
-				       txbdp->length, DMA_TO_DEVICE);
+			dma_unmap_page(priv->dev, be32_to_cpu(txbdp->bufPtr),
+				       be16_to_cpu(txbdp->length),
+				       DMA_TO_DEVICE);
 		}
 		txbdp++;
 		dev_kfree_skb_any(tx_queue->tx_skbuff[i]);
@@ -1728,26 +1982,32 @@
 
 static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
 {
-	struct rxbd8 *rxbdp;
-	struct gfar_private *priv = netdev_priv(rx_queue->dev);
 	int i;
 
-	rxbdp = rx_queue->rx_bd_base;
+	struct rxbd8 *rxbdp = rx_queue->rx_bd_base;
+
+	if (rx_queue->skb)
+		dev_kfree_skb(rx_queue->skb);
 
 	for (i = 0; i < rx_queue->rx_ring_size; i++) {
-		if (rx_queue->rx_skbuff[i]) {
-			dma_unmap_single(priv->dev, rxbdp->bufPtr,
-					 priv->rx_buffer_size,
-					 DMA_FROM_DEVICE);
-			dev_kfree_skb_any(rx_queue->rx_skbuff[i]);
-			rx_queue->rx_skbuff[i] = NULL;
-		}
+		struct	gfar_rx_buff *rxb = &rx_queue->rx_buff[i];
+
 		rxbdp->lstatus = 0;
 		rxbdp->bufPtr = 0;
 		rxbdp++;
+
+		if (!rxb->page)
+			continue;
+
+		dma_unmap_single(rx_queue->dev, rxb->dma,
+				 PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_page(rxb->page);
+
+		rxb->page = NULL;
 	}
-	kfree(rx_queue->rx_skbuff);
-	rx_queue->rx_skbuff = NULL;
+
+	kfree(rx_queue->rx_buff);
+	rx_queue->rx_buff = NULL;
 }
 
 /* If there are any tx skbs or rx skbs still around, free them.
@@ -1772,7 +2032,7 @@
 
 	for (i = 0; i < priv->num_rx_queues; i++) {
 		rx_queue = priv->rx_queue[i];
-		if (rx_queue->rx_skbuff)
+		if (rx_queue->rx_buff)
 			free_skb_rx_queue(rx_queue);
 	}
 
@@ -1783,17 +2043,15 @@
 			  priv->tx_queue[0]->tx_bd_dma_base);
 }
 
-void gfar_start(struct net_device *dev)
+void gfar_start(struct gfar_private *priv)
 {
-	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
 	int i = 0;
 
-	/* Enable Rx and Tx in MACCFG1 */
-	tempval = gfar_read(&regs->maccfg1);
-	tempval |= (MACCFG1_RX_EN | MACCFG1_TX_EN);
-	gfar_write(&regs->maccfg1, tempval);
+	/* Enable Rx/Tx hw queues */
+	gfar_write(&regs->rqueue, priv->rqueue);
+	gfar_write(&regs->tqueue, priv->tqueue);
 
 	/* Initialize DMACTRL to have WWR and WOP */
 	tempval = gfar_read(&regs->dmactrl);
@@ -1810,52 +2068,23 @@
 		/* Clear THLT/RHLT, so that the DMA starts polling now */
 		gfar_write(&regs->tstat, priv->gfargrp[i].tstat);
 		gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
-		/* Unmask the interrupts we look for */
-		gfar_write(&regs->imask, IMASK_DEFAULT);
 	}
 
-	dev->trans_start = jiffies; /* prevent tx timeout */
-}
-
-static void gfar_configure_coalescing(struct gfar_private *priv,
-			       unsigned long tx_mask, unsigned long rx_mask)
-{
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 __iomem *baddr;
-
-	if (priv->mode == MQ_MG_MODE) {
-		int i = 0;
+	/* Enable Rx/Tx DMA */
+	tempval = gfar_read(&regs->maccfg1);
+	tempval |= (MACCFG1_RX_EN | MACCFG1_TX_EN);
+	gfar_write(&regs->maccfg1, tempval);
 
-		baddr = &regs->txic0;
-		for_each_set_bit(i, &tx_mask, priv->num_tx_queues) {
-			gfar_write(baddr + i, 0);
-			if (likely(priv->tx_queue[i]->txcoalescing))
-				gfar_write(baddr + i, priv->tx_queue[i]->txic);
-		}
+	gfar_ints_enable(priv);
 
-		baddr = &regs->rxic0;
-		for_each_set_bit(i, &rx_mask, priv->num_rx_queues) {
-			gfar_write(baddr + i, 0);
-			if (likely(priv->rx_queue[i]->rxcoalescing))
-				gfar_write(baddr + i, priv->rx_queue[i]->rxic);
-		}
-	} else {
-		/* Backward compatible case -- even if we enable
-		 * multiple queues, there's only single reg to program
-		 */
-		gfar_write(&regs->txic, 0);
-		if (likely(priv->tx_queue[0]->txcoalescing))
-			gfar_write(&regs->txic, priv->tx_queue[0]->txic);
-
-		gfar_write(&regs->rxic, 0);
-		if (unlikely(priv->rx_queue[0]->rxcoalescing))
-			gfar_write(&regs->rxic, priv->rx_queue[0]->rxic);
-	}
+	priv->ndev->trans_start = jiffies; /* prevent tx timeout */
 }
 
-void gfar_configure_coalescing_all(struct gfar_private *priv)
+static void free_grp_irqs(struct gfar_priv_grp *grp)
 {
-	gfar_configure_coalescing(priv, 0xFF, 0xFF);
+	free_irq(gfar_irq(grp, TX)->irq, grp);
+	free_irq(gfar_irq(grp, RX)->irq, grp);
+	free_irq(gfar_irq(grp, ER)->irq, grp);
 }
 
 static int register_grp_irqs(struct gfar_priv_grp *grp)
@@ -1879,6 +2108,8 @@
 
 			goto err_irq_fail;
 		}
+		enable_irq_wake(gfar_irq(grp, ER)->irq);
+
 		err = request_irq(gfar_irq(grp, TX)->irq, gfar_transmit, 0,
 				  gfar_irq(grp, TX)->name, grp);
 		if (err < 0) {
@@ -1893,6 +2124,8 @@
 				  gfar_irq(grp, RX)->irq);
 			goto rx_irq_fail;
 		}
+		enable_irq_wake(gfar_irq(grp, RX)->irq);
+
 	} else {
 		err = request_irq(gfar_irq(grp, TX)->irq, gfar_interrupt, 0,
 				  gfar_irq(grp, TX)->name, grp);
@@ -1901,6 +2134,7 @@
 				  gfar_irq(grp, TX)->irq);
 			goto err_irq_fail;
 		}
+		enable_irq_wake(gfar_irq(grp, TX)->irq);
 	}
 
 	return 0;
@@ -1914,46 +2148,70 @@
 
 }
 
-/* Bring the controller up and running */
-int startup_gfar(struct net_device *ndev)
+static void gfar_free_irq(struct gfar_private *priv)
 {
-	struct gfar_private *priv = netdev_priv(ndev);
-	struct gfar __iomem *regs = NULL;
-	int err, i, j;
+	int i;
 
-	for (i = 0; i < priv->num_grps; i++) {
-		regs= priv->gfargrp[i].regs;
-		gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+	/* Free the IRQs */
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+		for (i = 0; i < priv->num_grps; i++)
+			free_grp_irqs(&priv->gfargrp[i]);
+	} else {
+		for (i = 0; i < priv->num_grps; i++)
+			free_irq(gfar_irq(&priv->gfargrp[i], TX)->irq,
+				 &priv->gfargrp[i]);
 	}
+}
 
-	regs= priv->gfargrp[0].regs;
-	err = gfar_alloc_skb_resources(ndev);
-	if (err)
-		return err;
-
-	gfar_init_mac(ndev);
+static int gfar_request_irq(struct gfar_private *priv)
+{
+	int err, i, j;
 
 	for (i = 0; i < priv->num_grps; i++) {
 		err = register_grp_irqs(&priv->gfargrp[i]);
 		if (err) {
 			for (j = 0; j < i; j++)
 				free_grp_irqs(&priv->gfargrp[j]);
-			goto irq_fail;
+			return err;
 		}
 	}
 
-	/* Start the controller */
-	gfar_start(ndev);
+	return 0;
+}
+
+/* Bring the controller up and running */
+int startup_gfar(struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
+	int err;
+
+	gfar_mac_reset(priv);
+
+	err = gfar_alloc_skb_resources(ndev);
+	if (err)
+		return err;
+
+	gfar_init_tx_rx_base(priv);
+
+	smp_mb__before_atomic();
+	clear_bit(GFAR_DOWN, &priv->state);
+	smp_mb__after_atomic();
+
+	/* Start Rx/Tx DMA and enable the interrupts */
+	gfar_start(priv);
+
+	/* force link state update after mac reset */
+	priv->oldlink = 0;
+	priv->oldspeed = 0;
+	priv->oldduplex = -1;
 
 	phy_start(priv->phydev);
 
-	gfar_configure_coalescing_all(priv);
+	enable_napi(priv);
 
-	return 0;
+	netif_tx_wake_all_queues(ndev);
 
-irq_fail:
-	free_skb_resources(priv);
-	return err;
+	return 0;
 }
 
 /* Called when something needs to use the ethernet device
@@ -1964,29 +2222,17 @@
 	struct gfar_private *priv = netdev_priv(dev);
 	int err;
 
-	enable_napi(priv);
-
-	/* Initialize a bunch of registers */
-	init_registers(dev);
-
-	gfar_set_mac_address(dev);
-
 	err = init_phy(dev);
+	if (err)
+		return err;
 
-	if (err) {
-		disable_napi(priv);
+	err = gfar_request_irq(priv);
+	if (err)
 		return err;
-	}
 
 	err = startup_gfar(dev);
-	if (err) {
-		disable_napi(priv);
+	if (err)
 		return err;
-	}
-
-	netif_tx_start_all_queues(dev);
-
-	device_set_wakeup_enable(&dev->dev, priv->wol_en);
 
 	return err;
 }
@@ -2014,16 +2260,16 @@
 	 */
 	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
-		fcb->phcs = udp_hdr(skb)->check;
+		fcb->phcs = (__force __be16)(udp_hdr(skb)->check);
 	} else
-		fcb->phcs = tcp_hdr(skb)->check;
+		fcb->phcs = (__force __be16)(tcp_hdr(skb)->check);
 
 	/* l3os is the distance between the start of the
 	 * frame (skb->data) and the start of the IP hdr.
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr
 	 */
-	fcb->l3os = (u16)(skb_network_offset(skb) - fcb_length);
+	fcb->l3os = (u8)(skb_network_offset(skb) - fcb_length);
 	fcb->l4os = skb_network_header_len(skb);
 
 	fcb->flags = flags;
@@ -2032,7 +2278,7 @@
 void inline gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
 {
 	fcb->flags |= TXFCB_VLN;
-	fcb->vlctl = vlan_tx_tag_get(skb);
+	fcb->vlctl = cpu_to_be16(skb_vlan_tag_get(skb));
 }
 
 static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
@@ -2049,6 +2295,24 @@
 	return skip_txbd(bdp, 1, base, ring_size);
 }
 
+/* eTSEC12: csum generation not supported for some fcb offsets */
+static inline bool gfar_csum_errata_12(struct gfar_private *priv,
+				       unsigned long fcb_addr)
+{
+	return (gfar_has_errata(priv, GFAR_ERRATA_12) &&
+	       (fcb_addr % 0x20) > 0x18);
+}
+
+/* eTSEC76: csum generation for frames larger than 2500 may
+ * cause excess delays before start of transmission
+ */
+static inline bool gfar_csum_errata_76(struct gfar_private *priv,
+				       unsigned int len)
+{
+	return (gfar_has_errata(priv, GFAR_ERRATA_76) &&
+	       (len > 2500));
+}
+
 /* This is called by the kernel when a frame is ready for transmission.
  * It is pointed to by the dev->hard_start_xmit function pointer
  */
@@ -2061,23 +2325,10 @@
 	struct txfcb *fcb = NULL;
 	struct txbd8 *txbdp, *txbdp_start, *base, *txbdp_tstamp = NULL;
 	u32 lstatus;
-	int i, rq = 0, do_tstamp = 0;
+	int i, rq = 0;
+	int do_tstamp, do_csum, do_vlan;
 	u32 bufaddr;
-	unsigned long flags;
-	unsigned int nr_frags, nr_txbds, length, fcb_length = GMAC_FCB_LEN;
-
-	/* TOE=1 frames larger than 2500 bytes may see excess delays
-	 * before start of transmission.
-	 */
-	if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_76) &&
-		     skb->ip_summed == CHECKSUM_PARTIAL &&
-		     skb->len > 2500)) {
-		int ret;
-
-		ret = skb_checksum_help(skb);
-		if (ret)
-			return ret;
-	}
+	unsigned int nr_frags, nr_txbds, bytes_sent, fcb_len = 0;
 
 	rq = skb->queue_mapping;
 	tx_queue = priv->tx_queue[rq];
@@ -2085,30 +2336,32 @@
 	base = tx_queue->tx_bd_base;
 	regs = tx_queue->grp->regs;
 
+	do_csum = (CHECKSUM_PARTIAL == skb->ip_summed);
+	do_vlan = skb_vlan_tag_present(skb);
+	do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+		    priv->hwts_tx_en;
+
+	if (do_csum || do_vlan)
+		fcb_len = GMAC_FCB_LEN;
+
 	/* check if time stamp should be generated */
-	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-		     priv->hwts_tx_en)) {
-		do_tstamp = 1;
-		fcb_length = GMAC_FCB_LEN + GMAC_TXPAL_LEN;
-	}
+	if (unlikely(do_tstamp))
+		fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN;
 
 	/* make space for additional header when fcb is needed */
-	if (((skb->ip_summed == CHECKSUM_PARTIAL) ||
-	     vlan_tx_tag_present(skb) ||
-	     unlikely(do_tstamp)) &&
-	    (skb_headroom(skb) < fcb_length)) {
+	if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) {
 		struct sk_buff *skb_new;
 
-		skb_new = skb_realloc_headroom(skb, fcb_length);
+		skb_new = skb_realloc_headroom(skb, fcb_len);
 		if (!skb_new) {
 			dev->stats.tx_errors++;
-			kfree_skb(skb);
+			dev_kfree_skb_any(skb);
 			return NETDEV_TX_OK;
 		}
 
 		if (skb->sk)
 			skb_set_owner_w(skb_new, skb->sk);
-		consume_skb(skb);
+		dev_consume_skb_any(skb);
 		skb = skb_new;
 	}
 
@@ -2130,11 +2383,14 @@
 	}
 
 	/* Update transmit stats */
-	tx_queue->stats.tx_bytes += skb->len;
+	bytes_sent = skb->len;
+	tx_queue->stats.tx_bytes += bytes_sent;
+	/* keep Tx bytes on wire for BQL accounting */
+	GFAR_CB(skb)->bytes_sent = bytes_sent;
 	tx_queue->stats.tx_packets++;
 
 	txbdp = txbdp_start = tx_queue->cur_tx;
-	lstatus = txbdp->lstatus;
+	lstatus = be32_to_cpu(txbdp->lstatus);
 
 	/* Time stamp insertion requires one additional TxBD */
 	if (unlikely(do_tstamp))
@@ -2142,20 +2398,24 @@
 						 tx_queue->tx_ring_size);
 
 	if (nr_frags == 0) {
-		if (unlikely(do_tstamp))
-			txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_LAST |
-							  TXBD_INTERRUPT);
-		else
+		if (unlikely(do_tstamp)) {
+			u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
+
+			lstatus_ts |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+			txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
+		} else {
 			lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+		}
 	} else {
 		/* Place the fragment addresses and lengths into the TxBDs */
 		for (i = 0; i < nr_frags; i++) {
+			unsigned int frag_len;
 			/* Point at the next BD, wrapping as needed */
 			txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
 
-			length = skb_shinfo(skb)->frags[i].size;
+			frag_len = skb_shinfo(skb)->frags[i].size;
 
-			lstatus = txbdp->lstatus | length |
+			lstatus = be32_to_cpu(txbdp->lstatus) | frag_len |
 				  BD_LFLAG(TXBD_READY);
 
 			/* Handle the last BD specially */
@@ -2165,15 +2425,17 @@
 			bufaddr = skb_frag_dma_map(priv->dev,
 						   &skb_shinfo(skb)->frags[i],
 						   0,
-						   length,
+						   frag_len,
 						   DMA_TO_DEVICE);
+			if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
+				goto dma_map_err;
 
 			/* set the TxBD length and buffer pointer */
-			txbdp->bufPtr = bufaddr;
-			txbdp->lstatus = lstatus;
+			txbdp->bufPtr = cpu_to_be32(bufaddr);
+			txbdp->lstatus = cpu_to_be32(lstatus);
 		}
 
-		lstatus = txbdp_start->lstatus;
+		lstatus = be32_to_cpu(txbdp_start->lstatus);
 	}
 
 	/* Add TxPAL between FCB and frame if required */
@@ -2182,40 +2444,46 @@
 		memset(skb->data, 0, GMAC_TXPAL_LEN);
 	}
 
-	/* Set up checksumming */
-	if (CHECKSUM_PARTIAL == skb->ip_summed) {
+	/* Add TxFCB if required */
+	if (fcb_len) {
 		fcb = gfar_add_fcb(skb);
-		/* as specified by errata */
-		if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12) &&
-			     ((unsigned long)fcb % 0x20) > 0x18)) {
+		lstatus |= BD_LFLAG(TXBD_TOE);
+	}
+
+	/* Set up checksumming */
+	if (do_csum) {
+		gfar_tx_checksum(skb, fcb, fcb_len);
+
+		if (unlikely(gfar_csum_errata_12(priv, (unsigned long)fcb)) ||
+		    unlikely(gfar_csum_errata_76(priv, skb->len))) {
 			__skb_pull(skb, GMAC_FCB_LEN);
 			skb_checksum_help(skb);
-		} else {
-			lstatus |= BD_LFLAG(TXBD_TOE);
-			gfar_tx_checksum(skb, fcb, fcb_length);
+			if (do_vlan || do_tstamp) {
+				/* put back a new fcb for vlan/tstamp TOE */
+				fcb = gfar_add_fcb(skb);
+			} else {
+				/* Tx TOE not used */
+				lstatus &= ~(BD_LFLAG(TXBD_TOE));
+				fcb = NULL;
+			}
 		}
 	}
 
-	if (vlan_tx_tag_present(skb)) {
-		if (unlikely(NULL == fcb)) {
-			fcb = gfar_add_fcb(skb);
-			lstatus |= BD_LFLAG(TXBD_TOE);
-		}
-
+	if (do_vlan)
 		gfar_tx_vlan(skb, fcb);
-	}
 
 	/* Setup tx hardware time stamping if requested */
 	if (unlikely(do_tstamp)) {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		if (fcb == NULL)
-			fcb = gfar_add_fcb(skb);
 		fcb->ptp = 1;
-		lstatus |= BD_LFLAG(TXBD_TOE);
 	}
 
-	txbdp_start->bufPtr = dma_map_single(priv->dev, skb->data,
-					     skb_headlen(skb), DMA_TO_DEVICE);
+	bufaddr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
+				 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, bufaddr)))
+		goto dma_map_err;
+
+	txbdp_start->bufPtr = cpu_to_be32(bufaddr);
 
 	/* If time stamping is requested one additional TxBD must be set up. The
 	 * first TxBD points to the FCB and must have a data length of
@@ -2223,41 +2491,27 @@
 	 * the full frame length.
 	 */
 	if (unlikely(do_tstamp)) {
-		txbdp_tstamp->bufPtr = txbdp_start->bufPtr + fcb_length;
-		txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_READY) |
-					 (skb_headlen(skb) - fcb_length);
+		u32 lstatus_ts = be32_to_cpu(txbdp_tstamp->lstatus);
+
+		bufaddr = be32_to_cpu(txbdp_start->bufPtr);
+		bufaddr += fcb_len;
+		lstatus_ts |= BD_LFLAG(TXBD_READY) |
+			      (skb_headlen(skb) - fcb_len);
+
+		txbdp_tstamp->bufPtr = cpu_to_be32(bufaddr);
+		txbdp_tstamp->lstatus = cpu_to_be32(lstatus_ts);
 		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
 	} else {
 		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
 	}
 
-	netdev_tx_sent_queue(txq, skb->len);
+	netdev_tx_sent_queue(txq, bytes_sent);
 
-	/* We can work in parallel with gfar_clean_tx_ring(), except
-	 * when modifying num_txbdfree. Note that we didn't grab the lock
-	 * when we were reading the num_txbdfree and checking for available
-	 * space, that's because outside of this function it can only grow,
-	 * and once we've got needed space, it cannot suddenly disappear.
-	 *
-	 * The lock also protects us from gfar_error(), which can modify
-	 * regs->tstat and thus retrigger the transfers, which is why we
-	 * also must grab the lock before setting ready bit for the first
-	 * to be transmitted BD.
-	 */
-	spin_lock_irqsave(&tx_queue->txlock, flags);
-
-	/* The powerpc-specific eieio() is used, as wmb() has too strong
-	 * semantics (it requires synchronization between cacheable and
-	 * uncacheable mappings, which eieio doesn't provide and which we
-	 * don't need), thus requiring a more expensive sync instruction.  At
-	 * some point, the set of architecture-independent barrier functions
-	 * should be expanded to include weaker barriers.
-	 */
-	eieio();
+	gfar_wmb();
 
-	txbdp_start->lstatus = lstatus;
+	txbdp_start->lstatus = cpu_to_be32(lstatus);
 
-	eieio(); /* force lstatus write before tx_skbuff */
+	gfar_wmb(); /* force lstatus write before tx_skbuff */
 
 	tx_queue->tx_skbuff[tx_queue->skb_curtx] = skb;
 
@@ -2269,8 +2523,15 @@
 
 	tx_queue->cur_tx = next_txbd(txbdp, base, tx_queue->tx_ring_size);
 
+	/* We can work in parallel with gfar_clean_tx_ring(), except
+	 * when modifying num_txbdfree. Note that we didn't grab the lock
+	 * when we were reading the num_txbdfree and checking for available
+	 * space, that's because outside of this function it can only grow.
+	 */
+	spin_lock_bh(&tx_queue->txlock);
 	/* reduce TxBD free count */
 	tx_queue->num_txbdfree -= (nr_txbds);
+	spin_unlock_bh(&tx_queue->txlock);
 
 	/* If the next BD still needs to be cleaned up, then the bds
 	 * are full.  We need to tell the kernel to stop sending us stuff.
@@ -2284,9 +2545,26 @@
 	/* Tell the DMA to go go go */
 	gfar_write(&regs->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex);
 
-	/* Unlock priv */
-	spin_unlock_irqrestore(&tx_queue->txlock, flags);
+	return NETDEV_TX_OK;
+
+dma_map_err:
+	txbdp = next_txbd(txbdp_start, base, tx_queue->tx_ring_size);
+	if (do_tstamp)
+		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+	for (i = 0; i < nr_frags; i++) {
+		lstatus = be32_to_cpu(txbdp->lstatus);
+		if (!(lstatus & BD_LFLAG(TXBD_READY)))
+			break;
 
+		lstatus &= ~BD_LFLAG(TXBD_READY);
+		txbdp->lstatus = cpu_to_be32(lstatus);
+		bufaddr = be32_to_cpu(txbdp->bufPtr);
+		dma_unmap_page(priv->dev, bufaddr, be16_to_cpu(txbdp->length),
+			       DMA_TO_DEVICE);
+		txbdp = next_txbd(txbdp, base, tx_queue->tx_ring_size);
+	}
+	gfar_wmb();
+	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
 
@@ -2295,8 +2573,6 @@
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	disable_napi(priv);
-
 	cancel_work_sync(&priv->reset_task);
 	stop_gfar(dev);
 
@@ -2304,7 +2580,7 @@
 	phy_disconnect(priv->phydev);
 	priv->phydev = NULL;
 
-	netif_tx_stop_all_queues(dev);
+	gfar_free_irq(priv);
 
 	return 0;
 }
@@ -2317,123 +2593,43 @@
 	return 0;
 }
 
-/* Check if rx parser should be activated */
-void gfar_check_rx_parser_mode(struct gfar_private *priv)
-{
-	struct gfar __iomem *regs;
-	u32 tempval;
-
-	regs = priv->gfargrp[0].regs;
-
-	tempval = gfar_read(&regs->rctrl);
-	/* If parse is no longer required, then disable parser */
-	if (tempval & RCTRL_REQ_PARSER) {
-		tempval |= RCTRL_PRSDEP_INIT;
-		priv->uses_rxfcb = 1;
-	} else {
-		tempval &= ~RCTRL_PRSDEP_INIT;
-		priv->uses_rxfcb = 0;
-	}
-	gfar_write(&regs->rctrl, tempval);
-}
-
-/* Enables and disables VLAN insertion/extraction */
-void gfar_vlan_mode(struct net_device *dev, netdev_features_t features)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = NULL;
-	unsigned long flags;
-	u32 tempval;
-
-	regs = priv->gfargrp[0].regs;
-	local_irq_save(flags);
-	lock_rx_qs(priv);
-
-	if (features & NETIF_F_HW_VLAN_CTAG_TX) {
-		/* Enable VLAN tag insertion */
-		tempval = gfar_read(&regs->tctrl);
-		tempval |= TCTRL_VLINS;
-		gfar_write(&regs->tctrl, tempval);
-	} else {
-		/* Disable VLAN tag insertion */
-		tempval = gfar_read(&regs->tctrl);
-		tempval &= ~TCTRL_VLINS;
-		gfar_write(&regs->tctrl, tempval);
-	}
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX) {
-		/* Enable VLAN tag extraction */
-		tempval = gfar_read(&regs->rctrl);
-		tempval |= (RCTRL_VLEX | RCTRL_PRSDEP_INIT);
-		gfar_write(&regs->rctrl, tempval);
-		priv->uses_rxfcb = 1;
-	} else {
-		/* Disable VLAN tag extraction */
-		tempval = gfar_read(&regs->rctrl);
-		tempval &= ~RCTRL_VLEX;
-		gfar_write(&regs->rctrl, tempval);
-
-		gfar_check_rx_parser_mode(priv);
-	}
-
-	gfar_change_mtu(dev, dev->mtu);
-
-	unlock_rx_qs(priv);
-	local_irq_restore(flags);
-}
-
 static int gfar_change_mtu(struct net_device *dev, int new_mtu)
 {
-	int tempsize, tempval;
 	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	int oldsize = priv->rx_buffer_size;
 	int frame_size = new_mtu + ETH_HLEN;
 
-	if ((frame_size < 64) || (frame_size > JUMBO_FRAME_SIZE)) {
+	if ((frame_size < 64) || (frame_size > GFAR_JUMBO_FRAME_SIZE)) {
 		netif_err(priv, drv, dev, "Invalid MTU setting\n");
 		return -EINVAL;
 	}
 
-	if (priv->uses_rxfcb)
-		frame_size += GMAC_FCB_LEN;
-
-	frame_size += priv->padding;
+	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
+		cpu_relax();
 
-	tempsize = (frame_size & ~(INCREMENTAL_BUFFER_SIZE - 1)) +
-		   INCREMENTAL_BUFFER_SIZE;
-
-	/* Only stop and start the controller if it isn't already
-	 * stopped, and we changed something
-	 */
-	if ((oldsize != tempsize) && (dev->flags & IFF_UP))
+	if (dev->flags & IFF_UP)
 		stop_gfar(dev);
 
-	priv->rx_buffer_size = tempsize;
-
 	dev->mtu = new_mtu;
 
-	gfar_write(&regs->mrblr, priv->rx_buffer_size);
-	gfar_write(&regs->maxfrm, priv->rx_buffer_size);
+	if (dev->flags & IFF_UP)
+		startup_gfar(dev);
 
-	/* If the mtu is larger than the max size for standard
-	 * ethernet frames (ie, a jumbo frame), then set maccfg2
-	 * to allow huge frames, and to check the length
-	 */
-	tempval = gfar_read(&regs->maccfg2);
-
-	if (priv->rx_buffer_size > DEFAULT_RX_BUFFER_SIZE ||
-	    gfar_has_errata(priv, GFAR_ERRATA_74))
-		tempval |= (MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK);
-	else
-		tempval &= ~(MACCFG2_HUGEFRAME | MACCFG2_LENGTHCHECK);
+	clear_bit_unlock(GFAR_RESETTING, &priv->state);
 
-	gfar_write(&regs->maccfg2, tempval);
+	return 0;
+}
 
-	if ((oldsize != tempsize) && (dev->flags & IFF_UP))
-		startup_gfar(dev);
+void reset_gfar(struct net_device *ndev)
+{
+	struct gfar_private *priv = netdev_priv(ndev);
 
-	return 0;
+	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
+		cpu_relax();
+
+	stop_gfar(ndev);
+	startup_gfar(ndev);
+
+	clear_bit_unlock(GFAR_RESETTING, &priv->state);
 }
 
 /* gfar_reset_task gets scheduled when a packet has not been
@@ -2445,16 +2641,7 @@
 {
 	struct gfar_private *priv = container_of(work, struct gfar_private,
 						 reset_task);
-	struct net_device *dev = priv->ndev;
-
-	if (dev->flags & IFF_UP) {
-		netif_tx_stop_all_queues(dev);
-		stop_gfar(dev);
-		startup_gfar(dev);
-		netif_tx_start_all_queues(dev);
-	}
-
-	netif_tx_schedule_all(dev);
+	reset_gfar(priv->ndev);
 }
 
 static void gfar_timeout(struct net_device *dev)
@@ -2465,15 +2652,6 @@
 	schedule_work(&priv->reset_task);
 }
 
-static void gfar_align_skb(struct sk_buff *skb)
-{
-	/* We need the data buffer to be aligned properly.  We will reserve
-	 * as many bytes as needed to align the data properly
-	 */
-	skb_reserve(skb, RXBUF_ALIGNMENT -
-		    (((unsigned long) skb->data) & (RXBUF_ALIGNMENT - 1)));
-}
-
 /* Interrupt Handler for Transmit complete */
 static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 {
@@ -2499,7 +2677,6 @@
 	skb_dirtytx = tx_queue->skb_dirtytx;
 
 	while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
-		unsigned long flags;
 
 		frags = skb_shinfo(skb)->nr_frags;
 
@@ -2513,7 +2690,7 @@
 
 		lbdp = skip_txbd(bdp, nr_txbds - 1, base, tx_ring_size);
 
-		lstatus = lbdp->lstatus;
+		lstatus = be32_to_cpu(lbdp->lstatus);
 
 		/* Only clean completed frames */
 		if ((lstatus & BD_LFLAG(TXBD_READY)) &&
@@ -2522,36 +2699,39 @@
 
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
 			next = next_txbd(bdp, base, tx_ring_size);
-			buflen = next->length + GMAC_FCB_LEN + GMAC_TXPAL_LEN;
+			buflen = be16_to_cpu(next->length) +
+				 GMAC_FCB_LEN + GMAC_TXPAL_LEN;
 		} else
-			buflen = bdp->length;
+			buflen = be16_to_cpu(bdp->length);
 
-		dma_unmap_single(priv->dev, bdp->bufPtr,
+		dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
 				 buflen, DMA_TO_DEVICE);
 
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
 			struct skb_shared_hwtstamps shhwtstamps;
-			u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7);
+			u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
+					  ~0x7UL);
 
 			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 			shhwtstamps.hwtstamp = ns_to_ktime(*ns);
 			skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
 			skb_tstamp_tx(skb, &shhwtstamps);
-			bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+			gfar_clear_txbd_status(bdp);
 			bdp = next;
 		}
 
-		bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+		gfar_clear_txbd_status(bdp);
 		bdp = next_txbd(bdp, base, tx_ring_size);
 
 		for (i = 0; i < frags; i++) {
-			dma_unmap_page(priv->dev, bdp->bufPtr,
-				       bdp->length, DMA_TO_DEVICE);
-			bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
+			dma_unmap_page(priv->dev, be32_to_cpu(bdp->bufPtr),
+				       be16_to_cpu(bdp->length),
+				       DMA_TO_DEVICE);
+			gfar_clear_txbd_status(bdp);
 			bdp = next_txbd(bdp, base, tx_ring_size);
 		}
 
-		bytes_sent += skb->len;
+		bytes_sent += GFAR_CB(skb)->bytes_sent;
 
 		dev_kfree_skb_any(skb);
 
@@ -2561,14 +2741,16 @@
 			      TX_RING_MOD_MASK(tx_ring_size);
 
 		howmany++;
-		spin_lock_irqsave(&tx_queue->txlock, flags);
+		spin_lock(&tx_queue->txlock);
 		tx_queue->num_txbdfree += nr_txbds;
-		spin_unlock_irqrestore(&tx_queue->txlock, flags);
+		spin_unlock(&tx_queue->txlock);
 	}
 
 	/* If we freed a buffer, we can restart transmission, if necessary */
-	if (netif_tx_queue_stopped(txq) && tx_queue->num_txbdfree)
-		netif_wake_subqueue(dev, tqi);
+	if (tx_queue->num_txbdfree &&
+	    netif_tx_queue_stopped(txq) &&
+	    !(test_bit(GFAR_DOWN, &priv->state)))
+		netif_wake_subqueue(priv->ndev, tqi);
 
 	/* Update dirty indicators */
 	tx_queue->skb_dirtytx = skb_dirtytx;
@@ -2577,70 +2759,85 @@
 	netdev_tx_completed_queue(txq, howmany, bytes_sent);
 }
 
-static void gfar_schedule_cleanup(struct gfar_priv_grp *gfargrp)
+static bool gfar_new_page(struct gfar_priv_rx_q *rxq, struct gfar_rx_buff *rxb)
 {
-	unsigned long flags;
+	struct page *page;
+	dma_addr_t addr;
 
-	spin_lock_irqsave(&gfargrp->grplock, flags);
-	if (napi_schedule_prep(&gfargrp->napi)) {
-		gfar_write(&gfargrp->regs->imask, IMASK_RTX_DISABLED);
-		__napi_schedule(&gfargrp->napi);
-	} else {
-		/* Clear IEVENT, so interrupts aren't called again
-		 * because of the packets that have already arrived.
-		 */
-		gfar_write(&gfargrp->regs->ievent, IEVENT_RTX_MASK);
+	page = dev_alloc_page();
+	if (unlikely(!page))
+		return false;
+
+	addr = dma_map_page(rxq->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(rxq->dev, addr))) {
+		__free_page(page);
+
+		return false;
 	}
-	spin_unlock_irqrestore(&gfargrp->grplock, flags);
 
-}
+	rxb->dma = addr;
+	rxb->page = page;
+	rxb->page_offset = 0;
 
-/* Interrupt Handler for Transmit complete */
-static irqreturn_t gfar_transmit(int irq, void *grp_id)
-{
-	gfar_schedule_cleanup((struct gfar_priv_grp *)grp_id);
-	return IRQ_HANDLED;
+	return true;
 }
 
-static void gfar_new_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp,
-			   struct sk_buff *skb)
+static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue)
 {
-	struct net_device *dev = rx_queue->dev;
-	struct gfar_private *priv = netdev_priv(dev);
-	dma_addr_t buf;
+	struct gfar_private *priv = netdev_priv(rx_queue->ndev);
+	struct gfar_extra_stats *estats = &priv->extra_stats;
 
-	buf = dma_map_single(priv->dev, skb->data,
-			     priv->rx_buffer_size, DMA_FROM_DEVICE);
-	gfar_init_rxbdp(rx_queue, bdp, buf);
+	netdev_err(rx_queue->ndev, "Can't alloc RX buffers\n");
+	atomic64_inc(&estats->rx_alloc_err);
 }
 
-static struct sk_buff *gfar_alloc_skb(struct net_device *dev)
+static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue,
+				int alloc_cnt)
 {
-	struct gfar_private *priv = netdev_priv(dev);
-	struct sk_buff *skb;
+	struct rxbd8 *bdp;
+	struct gfar_rx_buff *rxb;
+	int i;
 
-	skb = netdev_alloc_skb(dev, priv->rx_buffer_size + RXBUF_ALIGNMENT);
-	if (!skb)
-		return NULL;
+	i = rx_queue->next_to_use;
+	bdp = &rx_queue->rx_bd_base[i];
+	rxb = &rx_queue->rx_buff[i];
+
+	while (alloc_cnt--) {
+		/* try reuse page */
+		if (unlikely(!rxb->page)) {
+			if (unlikely(!gfar_new_page(rx_queue, rxb))) {
+				gfar_rx_alloc_err(rx_queue);
+				break;
+			}
+		}
 
-	gfar_align_skb(skb);
+		/* Setup the new RxBD */
+		gfar_init_rxbdp(rx_queue, bdp,
+				rxb->dma + rxb->page_offset + RXBUF_ALIGNMENT);
 
-	return skb;
-}
+		/* Update to the next pointer */
+		bdp++;
+		rxb++;
 
-struct sk_buff *gfar_new_skb(struct net_device *dev)
-{
-	return gfar_alloc_skb(dev);
+		if (unlikely(++i == rx_queue->rx_ring_size)) {
+			i = 0;
+			bdp = rx_queue->rx_bd_base;
+			rxb = rx_queue->rx_buff;
+		}
+	}
+
+	rx_queue->next_to_use = i;
+	rx_queue->next_to_alloc = i;
 }
 
-static inline void count_errors(unsigned short status, struct net_device *dev)
+static void count_errors(u32 lstatus, struct net_device *ndev)
 {
-	struct gfar_private *priv = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
 	struct gfar_extra_stats *estats = &priv->extra_stats;
 
 	/* If the packet was truncated, none of the other errors matter */
-	if (status & RXBD_TRUNCATED) {
+	if (lstatus & BD_LFLAG(RXBD_TRUNCATED)) {
 		stats->rx_length_errors++;
 
 		atomic64_inc(&estats->rx_trunc);
@@ -2648,52 +2845,186 @@
 		return;
 	}
 	/* Count the errors, if there were any */
-	if (status & (RXBD_LARGE | RXBD_SHORT)) {
+	if (lstatus & BD_LFLAG(RXBD_LARGE | RXBD_SHORT)) {
 		stats->rx_length_errors++;
 
-		if (status & RXBD_LARGE)
+		if (lstatus & BD_LFLAG(RXBD_LARGE))
 			atomic64_inc(&estats->rx_large);
 		else
 			atomic64_inc(&estats->rx_short);
 	}
-	if (status & RXBD_NONOCTET) {
+	if (lstatus & BD_LFLAG(RXBD_NONOCTET)) {
 		stats->rx_frame_errors++;
 		atomic64_inc(&estats->rx_nonoctet);
 	}
-	if (status & RXBD_CRCERR) {
+	if (lstatus & BD_LFLAG(RXBD_CRCERR)) {
 		atomic64_inc(&estats->rx_crcerr);
 		stats->rx_crc_errors++;
 	}
-	if (status & RXBD_OVERRUN) {
+	if (lstatus & BD_LFLAG(RXBD_OVERRUN)) {
 		atomic64_inc(&estats->rx_overrun);
-		stats->rx_crc_errors++;
+		stats->rx_over_errors++;
 	}
 }
 
 irqreturn_t gfar_receive(int irq, void *grp_id)
 {
-	gfar_schedule_cleanup((struct gfar_priv_grp *)grp_id);
+	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
+	unsigned long flags;
+	u32 imask, ievent;
+
+	ievent = gfar_read(&grp->regs->ievent);
+
+	if (unlikely(ievent & IEVENT_FGPI)) {
+		gfar_write(&grp->regs->ievent, IEVENT_FGPI);
+		return IRQ_HANDLED;
+	}
+
+	if (likely(napi_schedule_prep(&grp->napi_rx))) {
+		spin_lock_irqsave(&grp->grplock, flags);
+		imask = gfar_read(&grp->regs->imask);
+		imask &= IMASK_RX_DISABLED;
+		gfar_write(&grp->regs->imask, imask);
+		spin_unlock_irqrestore(&grp->grplock, flags);
+		__napi_schedule(&grp->napi_rx);
+	} else {
+		/* Clear IEVENT, so interrupts aren't called again
+		 * because of the packets that have already arrived.
+		 */
+		gfar_write(&grp->regs->ievent, IEVENT_RX_MASK);
+	}
+
 	return IRQ_HANDLED;
 }
 
+/* Interrupt Handler for Transmit complete */
+static irqreturn_t gfar_transmit(int irq, void *grp_id)
+{
+	struct gfar_priv_grp *grp = (struct gfar_priv_grp *)grp_id;
+	unsigned long flags;
+	u32 imask;
+
+	if (likely(napi_schedule_prep(&grp->napi_tx))) {
+		spin_lock_irqsave(&grp->grplock, flags);
+		imask = gfar_read(&grp->regs->imask);
+		imask &= IMASK_TX_DISABLED;
+		gfar_write(&grp->regs->imask, imask);
+		spin_unlock_irqrestore(&grp->grplock, flags);
+		__napi_schedule(&grp->napi_tx);
+	} else {
+		/* Clear IEVENT, so interrupts aren't called again
+		 * because of the packets that have already arrived.
+		 */
+		gfar_write(&grp->regs->ievent, IEVENT_TX_MASK);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
+			     struct sk_buff *skb, bool first)
+{
+	unsigned int size = lstatus & BD_LENGTH_MASK;
+	struct page *page = rxb->page;
+
+	/* Remove the FCS from the packet length */
+	if (likely(lstatus & BD_LFLAG(RXBD_LAST)))
+		size -= ETH_FCS_LEN;
+
+	if (likely(first))
+		skb_put(skb, size);
+	else
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				rxb->page_offset + RXBUF_ALIGNMENT,
+				size, GFAR_RXB_TRUESIZE);
+
+	/* try reuse page */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* change offset to the other half */
+	rxb->page_offset ^= GFAR_RXB_TRUESIZE;
+
+	atomic_inc(&page->_count);
+
+	return true;
+}
+
+static void gfar_reuse_rx_page(struct gfar_priv_rx_q *rxq,
+			       struct gfar_rx_buff *old_rxb)
+{
+	struct gfar_rx_buff *new_rxb;
+	u16 nta = rxq->next_to_alloc;
+
+	new_rxb = &rxq->rx_buff[nta];
+
+	/* find next buf that can reuse a page */
+	nta++;
+	rxq->next_to_alloc = (nta < rxq->rx_ring_size) ? nta : 0;
+
+	/* copy page reference */
+	*new_rxb = *old_rxb;
+
+	/* sync for use by the device */
+	dma_sync_single_range_for_device(rxq->dev, old_rxb->dma,
+					 old_rxb->page_offset,
+					 GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
+}
+
+static struct sk_buff *gfar_get_next_rxbuff(struct gfar_priv_rx_q *rx_queue,
+					    u32 lstatus, struct sk_buff *skb)
+{
+	struct gfar_rx_buff *rxb = &rx_queue->rx_buff[rx_queue->next_to_clean];
+	struct page *page = rxb->page;
+	bool first = false;
+
+	if (likely(!skb)) {
+		void *buff_addr = page_address(page) + rxb->page_offset;
+
+		skb = build_skb(buff_addr, GFAR_SKBFRAG_SIZE);
+		if (unlikely(!skb)) {
+			gfar_rx_alloc_err(rx_queue);
+			return NULL;
+		}
+		skb_reserve(skb, RXBUF_ALIGNMENT);
+		first = true;
+	}
+
+	dma_sync_single_range_for_cpu(rx_queue->dev, rxb->dma, rxb->page_offset,
+				      GFAR_RXB_TRUESIZE, DMA_FROM_DEVICE);
+
+	if (gfar_add_rx_frag(rxb, lstatus, skb, first)) {
+		/* reuse the free half of the page */
+		gfar_reuse_rx_page(rx_queue, rxb);
+	} else {
+		/* page cannot be reused, unmap it */
+		dma_unmap_page(rx_queue->dev, rxb->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+
+	/* clear rxb content */
+	rxb->page = NULL;
+
+	return skb;
+}
+
 static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb)
 {
 	/* If valid headers were found, and valid sums
 	 * were verified, then we tell the kernel that no
 	 * checksumming is necessary.  Otherwise, it is [FIXME]
 	 */
-	if ((fcb->flags & RXFCB_CSUM_MASK) == (RXFCB_CIP | RXFCB_CTU))
+	if ((be16_to_cpu(fcb->flags) & RXFCB_CSUM_MASK) ==
+	    (RXFCB_CIP | RXFCB_CTU))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	else
 		skb_checksum_none_assert(skb);
 }
 
-
 /* gfar_process_frame() -- handle one incoming packet if skb isn't NULL. */
-static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
-			       int amount_pull, struct napi_struct *napi)
+static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb)
 {
-	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar_private *priv = netdev_priv(ndev);
 	struct rxfcb *fcb = NULL;
 
 	/* fcb is at the beginning if exists */
@@ -2702,10 +3033,8 @@
 	/* Remove the FCB from the skb
 	 * Remove the padded bytes, if there are any
 	 */
-	if (amount_pull) {
-		skb_record_rx_queue(skb, fcb->rq);
-		skb_pull(skb, amount_pull);
-	}
+	if (priv->uses_rxfcb)
+		skb_pull(skb, GMAC_FCB_LEN);
 
 	/* Get receive timestamp from the skb */
 	if (priv->hwts_rx_en) {
@@ -2719,23 +3048,20 @@
 	if (priv->padding)
 		skb_pull(skb, priv->padding);
 
-	if (dev->features & NETIF_F_RXCSUM)
+	if (ndev->features & NETIF_F_RXCSUM)
 		gfar_rx_checksum(skb, fcb);
 
 	/* Tell the skb what kind of packet this is */
-	skb->protocol = eth_type_trans(skb, dev);
+	skb->protocol = eth_type_trans(skb, ndev);
 
 	/* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here.
 	 * Even if vlan rx accel is disabled, on some chips
 	 * RXFCB_VLN is pseudo randomly set.
 	 */
-	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX &&
-	    fcb->flags & RXFCB_VLN)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), fcb->vlctl);
-
-	/* Send the packet up the stack */
-	napi_gro_receive(napi, skb);
-
+	if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+	    be16_to_cpu(fcb->flags) & RXFCB_VLN)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       be16_to_cpu(fcb->vlctl));
 }
 
 /* gfar_clean_rx_ring() -- Processes each frame in the rx ring
@@ -2744,105 +3070,168 @@
  */
 int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 {
-	struct net_device *dev = rx_queue->dev;
-	struct rxbd8 *bdp, *base;
-	struct sk_buff *skb;
-	int pkt_len;
-	int amount_pull;
-	int howmany = 0;
-	struct gfar_private *priv = netdev_priv(dev);
+	struct net_device *ndev = rx_queue->ndev;
+	struct gfar_private *priv = netdev_priv(ndev);
+	struct rxbd8 *bdp;
+	int i, howmany = 0;
+	struct sk_buff *skb = rx_queue->skb;
+	int cleaned_cnt = gfar_rxbd_unused(rx_queue);
+	unsigned int total_bytes = 0, total_pkts = 0;
 
 	/* Get the first full descriptor */
-	bdp = rx_queue->cur_rx;
-	base = rx_queue->rx_bd_base;
+	i = rx_queue->next_to_clean;
 
-	amount_pull = priv->uses_rxfcb ? GMAC_FCB_LEN : 0;
+	while (rx_work_limit--) {
+		u32 lstatus;
 
-	while (!((bdp->status & RXBD_EMPTY) || (--rx_work_limit < 0))) {
-		struct sk_buff *newskb;
+		if (cleaned_cnt >= GFAR_RX_BUFF_ALLOC) {
+			gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
+			cleaned_cnt = 0;
+		}
+
+		bdp = &rx_queue->rx_bd_base[i];
+		lstatus = be32_to_cpu(bdp->lstatus);
+		if (lstatus & BD_LFLAG(RXBD_EMPTY))
+			break;
 
+		/* order rx buffer descriptor reads */
 		rmb();
 
-		/* Add another skb for the future */
-		newskb = gfar_new_skb(dev);
+		/* fetch next to clean buffer from the ring */
+		skb = gfar_get_next_rxbuff(rx_queue, lstatus, skb);
+		if (unlikely(!skb))
+			break;
 
-		skb = rx_queue->rx_skbuff[rx_queue->skb_currx];
+		cleaned_cnt++;
+		howmany++;
 
-		dma_unmap_single(priv->dev, bdp->bufPtr,
-				 priv->rx_buffer_size, DMA_FROM_DEVICE);
+		if (unlikely(++i == rx_queue->rx_ring_size))
+			i = 0;
 
-		if (unlikely(!(bdp->status & RXBD_ERR) &&
-			     bdp->length > priv->rx_buffer_size))
-			bdp->status = RXBD_LARGE;
-
-		/* We drop the frame if we failed to allocate a new buffer */
-		if (unlikely(!newskb || !(bdp->status & RXBD_LAST) ||
-			     bdp->status & RXBD_ERR)) {
-			count_errors(bdp->status, dev);
-
-			if (unlikely(!newskb))
-				newskb = skb;
-			else if (skb)
-				dev_kfree_skb(skb);
-		} else {
-			/* Increment the number of packets */
-			rx_queue->stats.rx_packets++;
-			howmany++;
-
-			if (likely(skb)) {
-				pkt_len = bdp->length - ETH_FCS_LEN;
-				/* Remove the FCS from the packet length */
-				skb_put(skb, pkt_len);
-				rx_queue->stats.rx_bytes += pkt_len;
-				skb_record_rx_queue(skb, rx_queue->qindex);
-				gfar_process_frame(dev, skb, amount_pull,
-						   &rx_queue->grp->napi);
+		rx_queue->next_to_clean = i;
 
-			} else {
-				netif_warn(priv, rx_err, dev, "Missing skb!\n");
-				rx_queue->stats.rx_dropped++;
-				atomic64_inc(&priv->extra_stats.rx_skbmissing);
-			}
+		/* fetch next buffer if not the last in frame */
+		if (!(lstatus & BD_LFLAG(RXBD_LAST)))
+			continue;
 
+		if (unlikely(lstatus & BD_LFLAG(RXBD_ERR))) {
+			count_errors(lstatus, ndev);
+
+			/* discard faulty buffer */
+			dev_kfree_skb(skb);
+			skb = NULL;
+			rx_queue->stats.rx_dropped++;
+			continue;
 		}
 
-		rx_queue->rx_skbuff[rx_queue->skb_currx] = newskb;
+		/* Increment the number of packets */
+		total_pkts++;
+		total_bytes += skb->len;
 
-		/* Setup the new bdp */
-		gfar_new_rxbdp(rx_queue, bdp, newskb);
+		skb_record_rx_queue(skb, rx_queue->qindex);
 
-		/* Update to the next pointer */
-		bdp = next_bd(bdp, base, rx_queue->rx_ring_size);
+		gfar_process_frame(ndev, skb);
+
+		/* Send the packet up the stack */
+		napi_gro_receive(&rx_queue->grp->napi_rx, skb);
 
-		/* update to point at the next skb */
-		rx_queue->skb_currx = (rx_queue->skb_currx + 1) &
-				      RX_RING_MOD_MASK(rx_queue->rx_ring_size);
+		skb = NULL;
 	}
 
-	/* Update the current rxbd pointer to be the next one */
-	rx_queue->cur_rx = bdp;
+	/* Store incomplete frames for completion */
+	rx_queue->skb = skb;
+
+	rx_queue->stats.rx_packets += total_pkts;
+	rx_queue->stats.rx_bytes += total_bytes;
+
+	if (cleaned_cnt)
+		gfar_alloc_rx_buffs(rx_queue, cleaned_cnt);
+
+	/* Update Last Free RxBD pointer for LFC */
+	if (unlikely(priv->tx_actual_en)) {
+		u32 bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
+
+		gfar_write(rx_queue->rfbptr, bdp_dma);
+	}
 
 	return howmany;
 }
 
-static int gfar_poll(struct napi_struct *napi, int budget)
+static int gfar_poll_rx_sq(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_rx);
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_rx_q *rx_queue = gfargrp->rx_queue;
+	int work_done = 0;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_RX_MASK);
+
+	work_done = gfar_clean_rx_ring(rx_queue, budget);
+
+	if (work_done < budget) {
+		u32 imask;
+		napi_complete(napi);
+		/* Clear the halt bit in RSTAT */
+		gfar_write(&regs->rstat, gfargrp->rstat);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_RX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
+
+	return work_done;
+}
+
+static int gfar_poll_tx_sq(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_tx);
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_tx_q *tx_queue = gfargrp->tx_queue;
+	u32 imask;
+
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_TX_MASK);
+
+	/* run Tx cleanup to completion */
+	if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx])
+		gfar_clean_tx_ring(tx_queue);
+
+	napi_complete(napi);
+
+	spin_lock_irq(&gfargrp->grplock);
+	imask = gfar_read(&regs->imask);
+	imask |= IMASK_TX_DEFAULT;
+	gfar_write(&regs->imask, imask);
+	spin_unlock_irq(&gfargrp->grplock);
+
+	return 0;
+}
+
+static int gfar_poll_rx(struct napi_struct *napi, int budget)
 {
 	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi);
+		container_of(napi, struct gfar_priv_grp, napi_rx);
 	struct gfar_private *priv = gfargrp->priv;
 	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_tx_q *tx_queue = NULL;
 	struct gfar_priv_rx_q *rx_queue = NULL;
 	int work_done = 0, work_done_per_q = 0;
 	int i, budget_per_q = 0;
-	int has_tx_work;
 	unsigned long rstat_rxf;
 	int num_act_queues;
 
 	/* Clear IEVENT, so interrupts aren't called again
 	 * because of the packets that have already arrived
 	 */
-	gfar_write(&regs->ievent, IEVENT_RTX_MASK);
+	gfar_write(&regs->ievent, IEVENT_RX_MASK);
 
 	rstat_rxf = gfar_read(&regs->rstat) & RSTAT_RXF_MASK;
 
@@ -2850,67 +3239,84 @@
 	if (num_act_queues)
 		budget_per_q = budget/num_act_queues;
 
-	while (1) {
-		has_tx_work = 0;
-		for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) {
-			tx_queue = priv->tx_queue[i];
-			/* run Tx cleanup to completion */
-			if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) {
-				gfar_clean_tx_ring(tx_queue);
-				has_tx_work = 1;
-			}
-		}
+	for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
+		/* skip queue if not active */
+		if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i)))
+			continue;
 
-		for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
-			/* skip queue if not active */
-			if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i)))
-				continue;
+		rx_queue = priv->rx_queue[i];
+		work_done_per_q =
+			gfar_clean_rx_ring(rx_queue, budget_per_q);
+		work_done += work_done_per_q;
+
+		/* finished processing this queue */
+		if (work_done_per_q < budget_per_q) {
+			/* clear active queue hw indication */
+			gfar_write(&regs->rstat,
+				   RSTAT_CLEAR_RXF0 >> i);
+			num_act_queues--;
 
-			rx_queue = priv->rx_queue[i];
-			work_done_per_q =
-				gfar_clean_rx_ring(rx_queue, budget_per_q);
-			work_done += work_done_per_q;
-
-			/* finished processing this queue */
-			if (work_done_per_q < budget_per_q) {
-				/* clear active queue hw indication */
-				gfar_write(&regs->rstat,
-					   RSTAT_CLEAR_RXF0 >> i);
-				rstat_rxf &= ~(RSTAT_CLEAR_RXF0 >> i);
-				num_act_queues--;
-
-				if (!num_act_queues)
-					break;
-				/* recompute budget per Rx queue */
-				budget_per_q =
-					(budget - work_done) / num_act_queues;
-			}
+			if (!num_act_queues)
+				break;
 		}
+	}
 
-		if (work_done >= budget)
-			break;
-
-		if (!num_act_queues && !has_tx_work) {
+	if (!num_act_queues) {
+		u32 imask;
+		napi_complete(napi);
+
+		/* Clear the halt bit in RSTAT */
+		gfar_write(&regs->rstat, gfargrp->rstat);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_RX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
 
-			napi_complete(napi);
+	return work_done;
+}
 
-			/* Clear the halt bit in RSTAT */
-			gfar_write(&regs->rstat, gfargrp->rstat);
+static int gfar_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct gfar_priv_grp *gfargrp =
+		container_of(napi, struct gfar_priv_grp, napi_tx);
+	struct gfar_private *priv = gfargrp->priv;
+	struct gfar __iomem *regs = gfargrp->regs;
+	struct gfar_priv_tx_q *tx_queue = NULL;
+	int has_tx_work = 0;
+	int i;
 
-			gfar_write(&regs->imask, IMASK_DEFAULT);
+	/* Clear IEVENT, so interrupts aren't called again
+	 * because of the packets that have already arrived
+	 */
+	gfar_write(&regs->ievent, IEVENT_TX_MASK);
 
-			/* If we are coalescing interrupts, update the timer
-			 * Otherwise, clear it
-			 */
-			gfar_configure_coalescing(priv, gfargrp->rx_bit_map,
-						  gfargrp->tx_bit_map);
-			break;
+	for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) {
+		tx_queue = priv->tx_queue[i];
+		/* run Tx cleanup to completion */
+		if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) {
+			gfar_clean_tx_ring(tx_queue);
+			has_tx_work = 1;
 		}
 	}
 
-	return work_done;
+	if (!has_tx_work) {
+		u32 imask;
+		napi_complete(napi);
+
+		spin_lock_irq(&gfargrp->grplock);
+		imask = gfar_read(&regs->imask);
+		imask |= IMASK_TX_DEFAULT;
+		gfar_write(&regs->imask, imask);
+		spin_unlock_irq(&gfargrp->grplock);
+	}
+
+	return 0;
 }
 
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 /* Polling 'interrupt' - used by things like netconsole to send skbs
  * without having to re-enable interrupts. It's not called while
@@ -2978,81 +3384,12 @@
 static void adjust_link(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	unsigned long flags;
 	struct phy_device *phydev = priv->phydev;
-	int new_state = 0;
-
-	local_irq_save(flags);
-	lock_tx_qs(priv);
-
-	if (phydev->link) {
-		u32 tempval = gfar_read(&regs->maccfg2);
-		u32 ecntrl = gfar_read(&regs->ecntrl);
 
-		/* Now we make sure that we can be in full duplex mode.
-		 * If not, we operate in half-duplex mode.
-		 */
-		if (phydev->duplex != priv->oldduplex) {
-			new_state = 1;
-			if (!(phydev->duplex))
-				tempval &= ~(MACCFG2_FULL_DUPLEX);
-			else
-				tempval |= MACCFG2_FULL_DUPLEX;
-
-			priv->oldduplex = phydev->duplex;
-		}
-
-		if (phydev->speed != priv->oldspeed) {
-			new_state = 1;
-			switch (phydev->speed) {
-			case 1000:
-				tempval =
-				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
-
-				ecntrl &= ~(ECNTRL_R100);
-				break;
-			case 100:
-			case 10:
-				tempval =
-				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
-
-				/* Reduced mode distinguishes
-				 * between 10 and 100
-				 */
-				if (phydev->speed == SPEED_100)
-					ecntrl |= ECNTRL_R100;
-				else
-					ecntrl &= ~(ECNTRL_R100);
-				break;
-			default:
-				netif_warn(priv, link, dev,
-					   "Ack!  Speed (%d) is not 10/100/1000!\n",
-					   phydev->speed);
-				break;
-			}
-
-			priv->oldspeed = phydev->speed;
-		}
-
-		gfar_write(&regs->maccfg2, tempval);
-		gfar_write(&regs->ecntrl, ecntrl);
-
-		if (!priv->oldlink) {
-			new_state = 1;
-			priv->oldlink = 1;
-		}
-	} else if (priv->oldlink) {
-		new_state = 1;
-		priv->oldlink = 0;
-		priv->oldspeed = 0;
-		priv->oldduplex = -1;
-	}
-
-	if (new_state && netif_msg_link(priv))
-		phy_print_status(phydev);
-	unlock_tx_qs(priv);
-	local_irq_restore(flags);
+	if (unlikely(phydev->link != priv->oldlink ||
+		     (phydev->link && (phydev->duplex != priv->oldduplex ||
+				       phydev->speed != priv->oldspeed))))
+		gfar_update_link_state(priv);
 }
 
 /* Update the hash table based on the current list of multicast
@@ -3197,22 +3534,21 @@
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	int idx;
-	char tmpbuf[ETH_ALEN];
 	u32 tempval;
 	u32 __iomem *macptr = &regs->macstnaddr1;
 
 	macptr += num*2;
 
-	/* Now copy it into the mac registers backwards, cuz
-	 * little endian is silly
+	/* For a station address of 0x12345678ABCD in transmission
+	 * order (BE), MACnADDR1 is set to 0xCDAB7856 and
+	 * MACnADDR2 is set to 0x34120000.
 	 */
-	for (idx = 0; idx < ETH_ALEN; idx++)
-		tmpbuf[ETH_ALEN - 1 - idx] = addr[idx];
+	tempval = (addr[5] << 24) | (addr[4] << 16) |
+		  (addr[3] << 8)  |  addr[2];
 
-	gfar_write(macptr, *((u32 *) (tmpbuf)));
+	gfar_write(macptr, tempval);
 
-	tempval = *((u32 *) (tmpbuf + 4));
+	tempval = (addr[1] << 24) | (addr[0] << 16);
 
 	gfar_write(macptr+1, tempval);
 }
@@ -3251,30 +3587,19 @@
 		if (events & IEVENT_CRL)
 			dev->stats.tx_aborted_errors++;
 		if (events & IEVENT_XFUN) {
-			unsigned long flags;
-
 			netif_dbg(priv, tx_err, dev,
 				  "TX FIFO underrun, packet dropped\n");
 			dev->stats.tx_dropped++;
 			atomic64_inc(&priv->extra_stats.tx_underrun);
 
-			local_irq_save(flags);
-			lock_tx_qs(priv);
-
-			/* Reactivate the Tx Queues */
-			gfar_write(&regs->tstat, gfargrp->tstat);
-
-			unlock_tx_qs(priv);
-			local_irq_restore(flags);
+			schedule_work(&priv->reset_task);
 		}
 		netif_dbg(priv, tx_err, dev, "Transmit Error\n");
 	}
 	if (events & IEVENT_BSY) {
-		dev->stats.rx_errors++;
+		dev->stats.rx_over_errors++;
 		atomic64_inc(&priv->extra_stats.rx_bsy);
 
-		gfar_receive(irq, grp_id);
-
 		netif_dbg(priv, rx_err, dev, "busy error (rstat: %x)\n",
 			  gfar_read(&regs->rstat));
 	}
@@ -3298,7 +3623,138 @@
 	return IRQ_HANDLED;
 }
 
-static struct of_device_id gfar_match[] =
+static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
+{
+	struct phy_device *phydev = priv->phydev;
+	u32 val = 0;
+
+	if (!phydev->duplex)
+		return val;
+
+	if (!priv->pause_aneg_en) {
+		if (priv->tx_pause_en)
+			val |= MACCFG1_TX_FLOW;
+		if (priv->rx_pause_en)
+			val |= MACCFG1_RX_FLOW;
+	} else {
+		u16 lcl_adv, rmt_adv;
+		u8 flowctrl;
+		/* get link partner capabilities */
+		rmt_adv = 0;
+		if (phydev->pause)
+			rmt_adv = LPA_PAUSE_CAP;
+		if (phydev->asym_pause)
+			rmt_adv |= LPA_PAUSE_ASYM;
+
+		lcl_adv = 0;
+		if (phydev->advertising & ADVERTISED_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_CAP;
+		if (phydev->advertising & ADVERTISED_Asym_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+		if (flowctrl & FLOW_CTRL_TX)
+			val |= MACCFG1_TX_FLOW;
+		if (flowctrl & FLOW_CTRL_RX)
+			val |= MACCFG1_RX_FLOW;
+	}
+
+	return val;
+}
+
+static noinline void gfar_update_link_state(struct gfar_private *priv)
+{
+	struct gfar __iomem *regs = priv->gfargrp[0].regs;
+	struct phy_device *phydev = priv->phydev;
+	struct gfar_priv_rx_q *rx_queue = NULL;
+	int i;
+
+	if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
+		return;
+
+	if (phydev->link) {
+		u32 tempval1 = gfar_read(&regs->maccfg1);
+		u32 tempval = gfar_read(&regs->maccfg2);
+		u32 ecntrl = gfar_read(&regs->ecntrl);
+		u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW);
+
+		if (phydev->duplex != priv->oldduplex) {
+			if (!(phydev->duplex))
+				tempval &= ~(MACCFG2_FULL_DUPLEX);
+			else
+				tempval |= MACCFG2_FULL_DUPLEX;
+
+			priv->oldduplex = phydev->duplex;
+		}
+
+		if (phydev->speed != priv->oldspeed) {
+			switch (phydev->speed) {
+			case 1000:
+				tempval =
+				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
+
+				ecntrl &= ~(ECNTRL_R100);
+				break;
+			case 100:
+			case 10:
+				tempval =
+				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
+
+				/* Reduced mode distinguishes
+				 * between 10 and 100
+				 */
+				if (phydev->speed == SPEED_100)
+					ecntrl |= ECNTRL_R100;
+				else
+					ecntrl &= ~(ECNTRL_R100);
+				break;
+			default:
+				netif_warn(priv, link, priv->ndev,
+					   "Ack!  Speed (%d) is not 10/100/1000!\n",
+					   phydev->speed);
+				break;
+			}
+
+			priv->oldspeed = phydev->speed;
+		}
+
+		tempval1 &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
+		tempval1 |= gfar_get_flowctrl_cfg(priv);
+
+		/* Turn last free buffer recording on */
+		if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) {
+			for (i = 0; i < priv->num_rx_queues; i++) {
+				u32 bdp_dma;
+
+				rx_queue = priv->rx_queue[i];
+				bdp_dma = gfar_rxbd_dma_lastfree(rx_queue);
+				gfar_write(rx_queue->rfbptr, bdp_dma);
+			}
+
+			priv->tx_actual_en = 1;
+		}
+
+		if (unlikely(!(tempval1 & MACCFG1_TX_FLOW) && tx_flow_oldval))
+			priv->tx_actual_en = 0;
+
+		gfar_write(&regs->maccfg1, tempval1);
+		gfar_write(&regs->maccfg2, tempval);
+		gfar_write(&regs->ecntrl, ecntrl);
+
+		if (!priv->oldlink)
+			priv->oldlink = 1;
+
+	} else if (priv->oldlink) {
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
+	}
+
+	if (netif_msg_link(priv))
+		phy_print_status(phydev);
+}
+
+static const struct of_device_id gfar_match[] =
 {
 	{
 		.type = "network",
@@ -3315,7 +3771,6 @@
 static struct platform_driver gfar_driver = {
 	.driver = {
 		.name = "fsl-gianfar",
-		.owner = THIS_MODULE,
 		.pm = GFAR_PM_OPS,
 		.of_match_table = gfar_match,
 	},