--- zzzz-none-000/linux-3.10.107/net/rds/ib.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/rds/ib.h 2021-02-04 17:41:59.000000000 +0000 @@ -9,8 +9,11 @@ #include "rds.h" #include "rdma_transport.h" -#define RDS_FMR_SIZE 256 -#define RDS_FMR_POOL_SIZE 8192 +#define RDS_FMR_1M_POOL_SIZE (8192 / 2) +#define RDS_FMR_1M_MSG_SIZE 256 +#define RDS_FMR_8K_MSG_SIZE 2 +#define RDS_MR_8K_SCALE (256 / (RDS_FMR_8K_MSG_SIZE + 1)) +#define RDS_FMR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2)) #define RDS_IB_MAX_SGE 8 #define RDS_IB_RECV_SGE 2 @@ -24,6 +27,9 @@ #define RDS_IB_RECYCLE_BATCH_COUNT 32 +#define RDS_IB_WC_MAX 32 +#define RDS_IB_SEND_OP BIT_ULL(63) + extern struct rw_semaphore rds_ib_devices_lock; extern struct list_head rds_ib_devices; @@ -69,7 +75,11 @@ struct rds_ib_send_work { void *s_op; - struct ib_send_wr s_wr; + union { + struct ib_send_wr s_wr; + struct ib_rdma_wr s_rdma_wr; + struct ib_atomic_wr s_atomic_wr; + }; struct ib_sge s_sge[RDS_IB_MAX_SGE]; unsigned long s_queued; }; @@ -89,6 +99,20 @@ atomic_t w_free_ctr; }; +/* Rings are posted with all the allocations they'll need to queue the + * incoming message to the receiving socket so this can't fail. + * All fragments start with a header, so we can make sure we're not receiving + * garbage, and we can tell a small 8 byte fragment from an ACK frame. + */ +struct rds_ib_ack_state { + u64 ack_next; + u64 ack_recv; + unsigned int ack_required:1; + unsigned int ack_next_valid:1; + unsigned int ack_recv_valid:1; +}; + + struct rds_ib_device; struct rds_ib_connection { @@ -100,9 +124,14 @@ /* alphabet soup, IBTA style */ struct rdma_cm_id *i_cm_id; struct ib_pd *i_pd; - struct ib_mr *i_mr; struct ib_cq *i_send_cq; struct ib_cq *i_recv_cq; + struct ib_wc i_send_wc[RDS_IB_WC_MAX]; + struct ib_wc i_recv_wc[RDS_IB_WC_MAX]; + + /* interrupt handling */ + struct tasklet_struct i_send_tasklet; + struct tasklet_struct i_recv_tasklet; /* tx */ struct rds_ib_work_ring i_send_ring; @@ -113,7 +142,6 @@ atomic_t i_signaled_sends; /* rx */ - struct tasklet_struct i_recv_tasklet; struct mutex i_recv_mutex; struct rds_ib_work_ring i_recv_ring; struct rds_ib_incoming *i_ibinc; @@ -165,6 +193,12 @@ struct rds_ib_ipaddr { struct list_head list; __be32 ipaddr; + struct rcu_head rcu; +}; + +enum { + RDS_IB_MR_8K_POOL, + RDS_IB_MR_1M_POOL, }; struct rds_ib_device { @@ -173,10 +207,12 @@ struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; - struct rds_ib_mr_pool *mr_pool; - unsigned int fmr_max_remaps; unsigned int max_fmrs; + struct rds_ib_mr_pool *mr_1m_pool; + struct rds_ib_mr_pool *mr_8k_pool; + unsigned int fmr_max_remaps; + unsigned int max_8k_fmrs; + unsigned int max_1m_fmrs; int max_sge; unsigned int max_wrs; unsigned int max_initiator_depth; @@ -199,14 +235,14 @@ struct rds_ib_statistics { uint64_t s_ib_connect_raced; uint64_t s_ib_listen_closed_stale; - uint64_t s_ib_tx_cq_call; + uint64_t s_ib_evt_handler_call; + uint64_t s_ib_tasklet_call; uint64_t s_ib_tx_cq_event; uint64_t s_ib_tx_ring_full; uint64_t s_ib_tx_throttle; uint64_t s_ib_tx_sg_mapping_failure; uint64_t s_ib_tx_stalled; uint64_t s_ib_tx_credit_updates; - uint64_t s_ib_rx_cq_call; uint64_t s_ib_rx_cq_event; uint64_t s_ib_rx_ring_empty; uint64_t s_ib_rx_refill_from_cq; @@ -218,12 +254,18 @@ uint64_t s_ib_ack_send_delayed; uint64_t s_ib_ack_send_piggybacked; uint64_t s_ib_ack_received; - uint64_t s_ib_rdma_mr_alloc; - uint64_t s_ib_rdma_mr_free; - uint64_t s_ib_rdma_mr_used; - uint64_t s_ib_rdma_mr_pool_flush; - uint64_t s_ib_rdma_mr_pool_wait; - uint64_t s_ib_rdma_mr_pool_depleted; + uint64_t s_ib_rdma_mr_8k_alloc; + uint64_t s_ib_rdma_mr_8k_free; + uint64_t s_ib_rdma_mr_8k_used; + uint64_t s_ib_rdma_mr_8k_pool_flush; + uint64_t s_ib_rdma_mr_8k_pool_wait; + uint64_t s_ib_rdma_mr_8k_pool_depleted; + uint64_t s_ib_rdma_mr_1m_alloc; + uint64_t s_ib_rdma_mr_1m_free; + uint64_t s_ib_rdma_mr_1m_used; + uint64_t s_ib_rdma_mr_1m_pool_flush; + uint64_t s_ib_rdma_mr_1m_pool_wait; + uint64_t s_ib_rdma_mr_1m_pool_depleted; uint64_t s_ib_atomic_cswp; uint64_t s_ib_atomic_fadd; }; @@ -235,28 +277,34 @@ * doesn't define it. */ static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev, - struct scatterlist *sg, unsigned int sg_dma_len, int direction) + struct scatterlist *sglist, + unsigned int sg_dma_len, + int direction) { + struct scatterlist *sg; unsigned int i; - for (i = 0; i < sg_dma_len; ++i) { + for_each_sg(sglist, sg, sg_dma_len, i) { ib_dma_sync_single_for_cpu(dev, - ib_sg_dma_address(dev, &sg[i]), - ib_sg_dma_len(dev, &sg[i]), + ib_sg_dma_address(dev, sg), + ib_sg_dma_len(dev, sg), direction); } } #define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev, - struct scatterlist *sg, unsigned int sg_dma_len, int direction) + struct scatterlist *sglist, + unsigned int sg_dma_len, + int direction) { + struct scatterlist *sg; unsigned int i; - for (i = 0; i < sg_dma_len; ++i) { + for_each_sg(sglist, sg, sg_dma_len, i) { ib_dma_sync_single_for_device(dev, - ib_sg_dma_address(dev, &sg[i]), - ib_sg_dma_len(dev, &sg[i]), + ib_sg_dma_address(dev, sg), + ib_sg_dma_len(dev, sg), direction); } } @@ -269,7 +317,8 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev); extern struct ib_client rds_ib_client; -extern unsigned int fmr_message_size; +extern unsigned int rds_ib_fmr_1m_pool_size; +extern unsigned int rds_ib_fmr_8k_pool_size; extern unsigned int rds_ib_retry_count; extern spinlock_t ib_nodev_conns_lock; @@ -299,7 +348,8 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); void rds_ib_destroy_nodev_conns(void); -struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); +struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, + int npages); void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, @@ -307,6 +357,8 @@ void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_flush_mrs(void); +int rds_ib_fmr_init(void); +void rds_ib_fmr_exit(void); /* ib_recv.c */ int rds_ib_recv_init(void); @@ -314,11 +366,11 @@ int rds_ib_recv(struct rds_connection *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); -void rds_ib_recv_refill(struct rds_connection *conn, int prefill); +void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); void rds_ib_inc_free(struct rds_incoming *inc); -int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, - size_t size); -void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); +int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); +void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc, + struct rds_ib_ack_state *state); void rds_ib_recv_tasklet_fn(unsigned long data); void rds_ib_recv_init_ring(struct rds_ib_connection *ic); void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); @@ -326,6 +378,7 @@ void rds_ib_attempt_ack(struct rds_ib_connection *ic); void rds_ib_ack_send_complete(struct rds_ib_connection *ic); u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic); +void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required); /* ib_ring.c */ void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr); @@ -340,11 +393,10 @@ extern wait_queue_head_t rds_ib_ring_empty_wait; /* ib_send.c */ -char *rds_ib_wc_status_str(enum ib_wc_status status); void rds_ib_xmit_complete(struct rds_connection *conn); int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); -void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); void rds_ib_send_init_ring(struct rds_ib_connection *ic); void rds_ib_send_clear_ring(struct rds_ib_connection *ic); int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);