From 7be0cb35513b07bf74d93d052d57b12e2c654b43 Mon Sep 17 00:00:00 2001
From: Chen Minqiang <ptpt52@gmail.com>
Date: Thu, 15 Mar 2018 05:04:37 +0800
Subject: [PATCH 2/2] essedma: refine txq to be adaptive of cpus and netdev

 - use 4 queue for each cpu if only 1 netdev
 - use all 16 txqueue if only 1 netdev

Signed-off-by: Chen Minqiang <ptpt52@gmail.com>
---
 drivers/net/ethernet/qualcomm/essedma/edma.c     | 22 +++++--------
 drivers/net/ethernet/qualcomm/essedma/edma.h     |  5 +--
 drivers/net/ethernet/qualcomm/essedma/edma_axi.c | 40 ++++++++++++++----------
 3 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c
index 3f1da93..05f9ce9 100644
--- a/drivers/net/ethernet/qualcomm/essedma/edma.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.c
@@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED];
 bool edma_stp_rstp;
 u16 edma_ath_eth_type;
 
-/* edma_skb_priority_offset()
- * 	get edma skb priority
- */
-static unsigned int edma_skb_priority_offset(struct sk_buff *skb)
-{
-	return (skb->priority >> 2) & 1;
-}
-
 /* edma_alloc_tx_ring()
  *	Allocate Tx descriptors ring
  */
@@ -1042,13 +1034,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo,
 /* edma_tx_queue_get()
  *	Get the starting number of  the queue
  */
-static inline int edma_tx_queue_get(struct edma_adapter *adapter,
+static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter,
 				   struct sk_buff *skb, int txq_id)
 {
 	/* skb->priority is used as an index to skb priority table
 	 * and based on packet priority, correspong queue is assigned.
+	 * FIXME we just simple use jiffies for time base balance
 	 */
-	return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb);
+	return adapter->tx_start_offset[txq_id] + (jiffies % edma_cinfo->num_txq_per_core_netdev);
 }
 
 /* edma_tx_update_hw_idx()
@@ -1417,8 +1410,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb,
 	}
 
 	/* this will be one of the 4 TX queues exposed to linux kernel */
-	txq_id = skb_get_queue_mapping(skb);
-	queue_id = edma_tx_queue_get(adapter, skb, txq_id);
+	/* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */
+	txq_id = smp_processor_id() % EDMA_CPU_CORES_SUPPORTED;
+	queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id);
 	etdr = edma_cinfo->tpd_ring[queue_id];
 	nq = netdev_get_tx_queue(net_dev, txq_id);
 
@@ -1899,8 +1893,8 @@ void edma_free_irqs(struct edma_adapter *adapter)
 	int i, j;
 	int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2);
 
-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
-		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) {
+		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++)
 			free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]);
 
 		for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++)
diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h
index 29c8379..2ba43e0 100644
--- a/drivers/net/ethernet/qualcomm/essedma/edma.h
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.h
@@ -325,6 +325,7 @@ struct edma_common_info {
 	u32 from_cpu; /* from CPU TPD field */
 	u32 num_rxq_per_core; /* Rx queues per core */
 	u32 num_txq_per_core; /* Tx queues per core */
+	u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */
 	u16 tx_ring_count; /* Tx ring count */
 	u16 rx_ring_count; /* Rx ring*/
 	u16 rx_head_buffer_len; /* rx buffer length */
@@ -332,7 +333,7 @@ struct edma_common_info {
 	u32 page_mode; /* Jumbo frame supported flag */
 	u32 fraglist_mode; /* fraglist supported flag */
 	struct edma_hw hw; /* edma hw specific structure */
-	struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */
+	struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */
 	spinlock_t stats_lock; /* protect edma stats area for updation */
 
 	bool is_single_phy;
@@ -401,7 +402,7 @@ struct edma_adapter {
 	u32 link_state; /* phy link state */
 	u32 phy_mdio_addr; /* PHY device address on MII interface */
 	u32 poll_required; /* check if link polling is required */
-	u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */
+	u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */
 	u32 default_vlan_tag; /* vlan tag */
 	u32 dp_bitmap;
 	uint8_t phy_id[MII_BUS_ID_SIZE + 3];
diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
index d9f8b52..5824680 100644
--- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
@@ -721,11 +721,7 @@ static int edma_axi_probe(struct platform_device *pdev)
 	int i, j, k, err = 0;
 	int portid_bmp;
 	int idx = 0, idx_mac = 0;
-
-	if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) {
-		dev_err(&pdev->dev, "Invalid CPU Cores\n");
-		return -EINVAL;
-	}
+	int netdev_group = 2;
 
 	if ((num_rxq != 4) && (num_rxq != 8)) {
 		dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n");
@@ -749,7 +745,7 @@ static int edma_axi_probe(struct platform_device *pdev)
 	/* Initialize the netdev array before allocation
 	 * to avoid double free
 	 */
-	for (i = 0 ; i < edma_cinfo->num_gmac ; i++)
+	for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++)
 		edma_netdev[i] = NULL;
 
 	for (i = 0 ; i < edma_cinfo->num_gmac ; i++) {
@@ -770,8 +766,11 @@ static int edma_axi_probe(struct platform_device *pdev)
 
 	/* Fill ring details */
 	edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE;
-	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4);
+	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus());
+	edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
 	edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE;
+	if (edma_cinfo->num_txq_per_core == 0)
+		edma_cinfo->num_txq_per_core = 1;
 
 	/* Update num rx queues based on module parameter */
 	edma_cinfo->num_rx_queues = num_rxq;
@@ -941,6 +940,13 @@ static int edma_axi_probe(struct platform_device *pdev)
 		idx_mac++;
 	}
 
+	if (edma_cinfo->num_gmac == 1) {
+		netdev_group = 1;
+		edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
+	}
+	if (edma_cinfo->num_txq_per_core_netdev == 0)
+		edma_cinfo->num_txq_per_core_netdev = 1;
+
 	/* Populate the adapter structure register the netdevice */
 	for (i = 0; i < edma_cinfo->num_gmac; i++) {
 		int k, m;
@@ -948,17 +954,16 @@ static int edma_axi_probe(struct platform_device *pdev)
 		adapter[i] = netdev_priv(edma_netdev[i]);
 		adapter[i]->netdev = edma_netdev[i];
 		adapter[i]->pdev = pdev;
-		for (j = 0; j < CONFIG_NR_CPUS; j++) {
-			m = i % 2;
-			adapter[i]->tx_start_offset[j] =
-				((j << EDMA_TX_CPU_START_SHIFT) + (m << 1));
+		for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) {
+			m = i % netdev_group;
+			adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev;
 			/* Share the queues with available net-devices.
 			 * For instance , with 5 net-devices
 			 * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13
 			 * and eth1/eth3 will get the remaining.
 			 */
 			for (k = adapter[i]->tx_start_offset[j]; k <
-			     (adapter[i]->tx_start_offset[j] + 2); k++) {
+			     (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) {
 				if (edma_fill_netdev(edma_cinfo, k, i, j)) {
 					pr_err("Netdev overflow Error\n");
 					goto err_register;
@@ -1111,9 +1116,12 @@ static int edma_axi_probe(struct platform_device *pdev)
 	/* populate per_core_info, do a napi_Add, request 16 TX irqs,
 	 * 8 RX irqs, do a napi enable
 	 */
-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+	for (i = 0; i < num_online_cpus() && i < EDMA_MAX_TRANSMIT_QUEUE; i++) {
 		u8 rx_start;
 
+		tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core);
+		tx_start[i] = i * edma_cinfo->num_txq_per_core;
+
 		edma_cinfo->edma_percpu_info[i].napi.state = 0;
 
 		netif_napi_add(edma_netdev[0],
@@ -1138,7 +1146,7 @@ static int edma_axi_probe(struct platform_device *pdev)
 
 		/* Request irq per core */
 		for (j = edma_cinfo->edma_percpu_info[i].tx_start;
-		     j < tx_start[i] + 4; j++) {
+		     j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) {
 			sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j);
 			err = request_irq(edma_cinfo->tx_irq[j],
 					  edma_interrupt,
@@ -1263,7 +1271,7 @@ err_configure:
 #endif
 err_rmap_add_fail:
 	edma_free_irqs(adapter[0]);
-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
 	{
 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
 		del_timer_sync(&edma_cinfo->edma_percpu_info[i].rx_realloc_timer);
@@ -1314,7 +1322,7 @@ static int edma_axi_remove(struct platform_device *pdev)
 		unregister_netdev(edma_netdev[i]);
 
 	edma_stop_rx_tx(hw);
-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
 
 	edma_irq_disable(edma_cinfo);
-- 
2.7.4