From 7be0cb35513b07bf74d93d052d57b12e2c654b43 Mon Sep 17 00:00:00 2001 From: Chen Minqiang Date: Thu, 15 Mar 2018 05:04:37 +0800 Subject: [PATCH 2/2] essedma: refine txq to be adaptive of cpus and netdev - use 4 queue for each cpu if only 1 netdev - use all 16 txqueue if only 1 netdev Signed-off-by: Chen Minqiang --- drivers/net/ethernet/qualcomm/essedma/edma.c | 22 +++++-------- drivers/net/ethernet/qualcomm/essedma/edma.h | 5 +-- drivers/net/ethernet/qualcomm/essedma/edma_axi.c | 40 ++++++++++++++---------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c index 3f1da93..05f9ce9 100644 --- a/drivers/net/ethernet/qualcomm/essedma/edma.c +++ b/drivers/net/ethernet/qualcomm/essedma/edma.c @@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED]; bool edma_stp_rstp; u16 edma_ath_eth_type; -/* edma_skb_priority_offset() - * get edma skb priority - */ -static unsigned int edma_skb_priority_offset(struct sk_buff *skb) -{ - return (skb->priority >> 2) & 1; -} - /* edma_alloc_tx_ring() * Allocate Tx descriptors ring */ @@ -1042,13 +1034,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo, /* edma_tx_queue_get() * Get the starting number of the queue */ -static inline int edma_tx_queue_get(struct edma_adapter *adapter, +static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter, struct sk_buff *skb, int txq_id) { /* skb->priority is used as an index to skb priority table * and based on packet priority, correspong queue is assigned. + * FIXME we just simple use jiffies for time base balance */ - return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb); + return adapter->tx_start_offset[txq_id] + (jiffies % edma_cinfo->num_txq_per_core_netdev); } /* edma_tx_update_hw_idx() @@ -1417,8 +1410,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb, } /* this will be one of the 4 TX queues exposed to linux kernel */ - txq_id = skb_get_queue_mapping(skb); - queue_id = edma_tx_queue_get(adapter, skb, txq_id); + /* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */ + txq_id = smp_processor_id() % EDMA_CPU_CORES_SUPPORTED; + queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id); etdr = edma_cinfo->tpd_ring[queue_id]; nq = netdev_get_tx_queue(net_dev, txq_id); @@ -1899,8 +1893,8 @@ void edma_free_irqs(struct edma_adapter *adapter) int i, j; int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2); - for (i = 0; i < CONFIG_NR_CPUS; i++) { - for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++) + for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { + for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++) free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]); for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++) diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h index 29c8379..2ba43e0 100644 --- a/drivers/net/ethernet/qualcomm/essedma/edma.h +++ b/drivers/net/ethernet/qualcomm/essedma/edma.h @@ -325,6 +325,7 @@ struct edma_common_info { u32 from_cpu; /* from CPU TPD field */ u32 num_rxq_per_core; /* Rx queues per core */ u32 num_txq_per_core; /* Tx queues per core */ + u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */ u16 tx_ring_count; /* Tx ring count */ u16 rx_ring_count; /* Rx ring*/ u16 rx_head_buffer_len; /* rx buffer length */ @@ -332,7 +333,7 @@ struct edma_common_info { u32 page_mode; /* Jumbo frame supported flag */ u32 fraglist_mode; /* fraglist supported flag */ struct edma_hw hw; /* edma hw specific structure */ - struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */ + struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */ spinlock_t stats_lock; /* protect edma stats area for updation */ bool is_single_phy; @@ -401,7 +402,7 @@ struct edma_adapter { u32 link_state; /* phy link state */ u32 phy_mdio_addr; /* PHY device address on MII interface */ u32 poll_required; /* check if link polling is required */ - u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */ + u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */ u32 default_vlan_tag; /* vlan tag */ u32 dp_bitmap; uint8_t phy_id[MII_BUS_ID_SIZE + 3]; diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c index d9f8b52..5824680 100644 --- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c +++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c @@ -721,11 +721,7 @@ static int edma_axi_probe(struct platform_device *pdev) int i, j, k, err = 0; int portid_bmp; int idx = 0, idx_mac = 0; - - if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) { - dev_err(&pdev->dev, "Invalid CPU Cores\n"); - return -EINVAL; - } + int netdev_group = 2; if ((num_rxq != 4) && (num_rxq != 8)) { dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n"); @@ -749,7 +745,7 @@ static int edma_axi_probe(struct platform_device *pdev) /* Initialize the netdev array before allocation * to avoid double free */ - for (i = 0 ; i < edma_cinfo->num_gmac ; i++) + for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++) edma_netdev[i] = NULL; for (i = 0 ; i < edma_cinfo->num_gmac ; i++) { @@ -770,8 +766,11 @@ static int edma_axi_probe(struct platform_device *pdev) /* Fill ring details */ edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE; - edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4); + edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus()); + edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE; + if (edma_cinfo->num_txq_per_core == 0) + edma_cinfo->num_txq_per_core = 1; /* Update num rx queues based on module parameter */ edma_cinfo->num_rx_queues = num_rxq; @@ -941,6 +940,13 @@ static int edma_axi_probe(struct platform_device *pdev) idx_mac++; } + if (edma_cinfo->num_gmac == 1) { + netdev_group = 1; + edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); + } + if (edma_cinfo->num_txq_per_core_netdev == 0) + edma_cinfo->num_txq_per_core_netdev = 1; + /* Populate the adapter structure register the netdevice */ for (i = 0; i < edma_cinfo->num_gmac; i++) { int k, m; @@ -948,17 +954,16 @@ static int edma_axi_probe(struct platform_device *pdev) adapter[i] = netdev_priv(edma_netdev[i]); adapter[i]->netdev = edma_netdev[i]; adapter[i]->pdev = pdev; - for (j = 0; j < CONFIG_NR_CPUS; j++) { - m = i % 2; - adapter[i]->tx_start_offset[j] = - ((j << EDMA_TX_CPU_START_SHIFT) + (m << 1)); + for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) { + m = i % netdev_group; + adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev; /* Share the queues with available net-devices. * For instance , with 5 net-devices * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13 * and eth1/eth3 will get the remaining. */ for (k = adapter[i]->tx_start_offset[j]; k < - (adapter[i]->tx_start_offset[j] + 2); k++) { + (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) { if (edma_fill_netdev(edma_cinfo, k, i, j)) { pr_err("Netdev overflow Error\n"); goto err_register; @@ -1111,9 +1116,12 @@ static int edma_axi_probe(struct platform_device *pdev) /* populate per_core_info, do a napi_Add, request 16 TX irqs, * 8 RX irqs, do a napi enable */ - for (i = 0; i < CONFIG_NR_CPUS; i++) { + for (i = 0; i < num_online_cpus() && i < EDMA_MAX_TRANSMIT_QUEUE; i++) { u8 rx_start; + tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core); + tx_start[i] = i * edma_cinfo->num_txq_per_core; + edma_cinfo->edma_percpu_info[i].napi.state = 0; netif_napi_add(edma_netdev[0], @@ -1138,7 +1146,7 @@ static int edma_axi_probe(struct platform_device *pdev) /* Request irq per core */ for (j = edma_cinfo->edma_percpu_info[i].tx_start; - j < tx_start[i] + 4; j++) { + j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) { sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j); err = request_irq(edma_cinfo->tx_irq[j], edma_interrupt, @@ -1263,7 +1271,7 @@ err_configure: #endif err_rmap_add_fail: edma_free_irqs(adapter[0]); - for (i = 0; i < CONFIG_NR_CPUS; i++) + for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { napi_disable(&edma_cinfo->edma_percpu_info[i].napi); del_timer_sync(&edma_cinfo->edma_percpu_info[i].rx_realloc_timer); @@ -1314,7 +1322,7 @@ static int edma_axi_remove(struct platform_device *pdev) unregister_netdev(edma_netdev[i]); edma_stop_rx_tx(hw); - for (i = 0; i < CONFIG_NR_CPUS; i++) + for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) napi_disable(&edma_cinfo->edma_percpu_info[i].napi); edma_irq_disable(edma_cinfo); -- 2.7.4