11#ifndef __INCLUDE_DAO_DMA_H__
12#define __INCLUDE_DAO_DMA_H__
16#include <rte_dmadev.h>
18#include <rte_mempool.h>
19#include <rte_prefetch.h>
22#include <dao_config.h>
27#define DAO_DMA_MAX_POINTER 15u
30#define DAO_DMA_MAX_META_POINTER 48
33#define DAO_DMA_MAX_VCHAN_PER_LCORE 64
36#define DAO_DMA_MAX_INFLIGHT_MDATA 4096
39#define DAO_DMA_DOORBELL_THRESHOLD 1024
298static __rte_always_inline
int
316static __rte_always_inline uint16_t
319 return rte_dma_burst_capacity(vchan->
devid, vchan->
vchan);
334static __rte_always_inline
bool
339 uint16_t inflight = vchan->
tail - vchan->
head;
340 uint32_t n_src, n_dst, nb_desc;
343 if (likely((src_avail >= (
int)nb_src || !vchan->
src_i) &&
344 (dst_avail >= (
int)nb_dst || !vchan->
dst_i)))
348 n_src = (nb_src + flush_thr - 1) / flush_thr;
349 n_dst = (nb_dst + flush_thr - 1) / flush_thr;
350 nb_desc = RTE_MAX(n_src, n_dst);
372static __rte_always_inline
bool
375 uint16_t head = vchan->
head;
376 uint16_t tail = vchan->
tail;
378 if (vchan->
src_i && (tail == op_idx))
381 return head <= tail ? (op_idx < head || op_idx >= tail) : (op_idx < head && op_idx >= tail);
394static __rte_always_inline
bool
399 uint64_t flags = (uint64_t)vchan->
auto_free << 3;
402 if (likely((src_avail >= (
int)avail || !vchan->
src_i) &&
403 (dst_avail >= (
int)avail || !vchan->
dst_i)))
407 flags |= RTE_DMA_OP_FLAG_SUBMIT;
410 vchan->
dst_i, flags);
411 if (unlikely(rc < 0)) {
419 if (flags & RTE_DMA_OP_FLAG_SUBMIT)
440static __rte_always_inline uint16_t
456static __rte_always_inline uint16_t
472static __rte_always_inline
struct rte_dma_sge *
486static __rte_always_inline
struct rte_dma_sge *
509static __rte_always_inline
void
513 uint16_t src_i = vchan->
src_i;
514 uint16_t dst_i = vchan->
dst_i;
516 vchan->
dst[dst_i].addr = dst;
517 vchan->
dst[dst_i].length = dst_len;
518 vchan->
src[src_i].addr = src;
519 vchan->
src[src_i].length = src_len;
521 vchan->
src_i = src_i + 1;
522 vchan->
dst_i = dst_i + 1;
538static __rte_always_inline
void
541 uint16_t dst_i = vchan->
dst_i;
543 vchan->
dst[dst_i].addr = dst;
544 vchan->
dst[dst_i].length = dst_len;
546 vchan->
dst_i = dst_i + 1;
562static __rte_always_inline
void
565 uint16_t src_i = vchan->
src_i;
567 vchan->
src[src_i].addr = src;
568 vchan->
src[src_i].length = src_len;
570 vchan->
src_i = src_i + 1;
585static __rte_always_inline uint16_t
588 struct rte_dma_sge *src, *dst;
589 uint16_t src_i = vchan->
src_i;
590 uint16_t dst_i = vchan->
dst_i;
591 int src_avail = vchan->
flush_thr - src_i;
594 src = vchan->
src + src_i;
595 dst = vchan->
dst + dst_i;
596 if (src_avail >= 4) {
597 vst1q_u64((uint64_t *)&src[0], vsrc[0]);
598 vst1q_u64((uint64_t *)&src[1], vsrc[1]);
599 vst1q_u64((uint64_t *)&src[2], vsrc[2]);
600 vst1q_u64((uint64_t *)&src[3], vsrc[3]);
602 vst1q_u64((uint64_t *)&dst[0], vdst[0]);
603 vst1q_u64((uint64_t *)&dst[1], vdst[1]);
604 vst1q_u64((uint64_t *)&dst[2], vdst[2]);
605 vst1q_u64((uint64_t *)&dst[3], vdst[3]);
607 vchan->
src_i = src_i + 4;
608 vchan->
dst_i = dst_i + 4;
613 while (i < 4 && src_avail > 0) {
614 vst1q_u64((uint64_t *)src, vsrc[i]);
615 vst1q_u64((uint64_t *)dst, vdst[i]);
621 vchan->
src_i = src_i + i;
622 vchan->
dst_i = dst_i + i;
627 src_i = vchan->
src_i;
628 dst_i = vchan->
dst_i;
629 src = vchan->
src + src_i;
630 dst = vchan->
dst + dst_i;
633 while (i < 4 && src_avail > 0) {
634 vst1q_u64((uint64_t *)src, vsrc[i]);
635 vst1q_u64((uint64_t *)dst, vdst[i]);
652static __rte_always_inline
void
659 cmpl = rte_dma_completed(vchan->
devid, vchan->
vchan, 128, NULL, &has_err);
660 if (unlikely(has_err)) {
675static __rte_always_inline uint16_t
691static __rte_always_inline
struct rte_dma_op **
708static __rte_always_inline
void
722static __rte_always_inline
void
736static __rte_always_inline
void
740 op->user_meta = (uint64_t)(uintptr_t)ptr;
741 op->event_meta = (uint64_t)(uintptr_t)pend_ptr;
742 op->rsvd = ((uint32_t)val << 16) | pend_val;
754static __rte_always_inline
void
757 uint32_t cmpl, i, j, idx = 0;
761 cmpl = rte_dma_completed(vchan->
devid, vchan->
vchan, 128, NULL, &has_err);
762 if (unlikely(has_err)) {
766 for (i = vchan->
head; i < vchan->
head + cmpl; i++) {
768 for (j = 0; j < vchan->
mdata[idx].
cnt; j++) {
790static __rte_always_inline
void
795 struct rte_dma_op *deq_ops[DEQ_SZ];
801 cmpl = rte_dma_dequeue_ops(vchan->
devid, vchan->
vchan, deq_ops, DEQ_SZ);
805 for (i = 0; i < cmpl; i++) {
806 struct rte_dma_op *op = deq_ops[i];
808 if (unlikely(op->status != RTE_DMA_STATUS_SUCCESSFUL))
812 uint16_t *ptr = (uint16_t *)(uintptr_t)op->user_meta;
813 uint16_t *pend_ptr = (uint16_t *)(uintptr_t)op->event_meta;
814 uint16_t val = op->rsvd >> 16;
815 uint16_t pend_val = op->rsvd & 0xFFFF;
818 __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
821 *pend_ptr -= pend_val;
847static __rte_always_inline
void
849 uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
870static __rte_always_inline
void
873 uint32_t cmpl, i, j, idx = 0;
877 cmpl = rte_dma_completed(vchan->
devid, vchan->
vchan, 128, NULL, &has_err);
878 if (unlikely(has_err)) {
882 for (i = vchan->
head; i < vchan->
head + cmpl; i++) {
884 for (j = 0; j < vchan->
mdata[idx].
cnt; j++) {
908static __rte_always_inline
void
static __rte_always_inline void dao_dma_enq_src_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len)
static __rte_always_inline int dao_dma_has_stats_feature(void)
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_dst(struct dao_dma_vchan_state *vchan)
static __rte_always_inline void dao_dma_check_meta_compl(struct dao_dma_vchan_state *vchan, const int mem_order)
static __rte_always_inline uint16_t dao_dma_burst_capacity(struct dao_dma_vchan_state *vchan)
#define DAO_DMA_DOORBELL_THRESHOLD
int dao_dma_lcore_mem2dev_autofree_set(int16_t dma_devid, uint16_t vchan, bool enable)
static __rte_always_inline void dao_dma_ops_release(struct dao_dma_vchan_state *vchan, uint16_t n)
#define DAO_DMA_MAX_INFLIGHT_MDATA
static __rte_always_inline struct rte_dma_op ** dao_dma_ops_get(struct dao_dma_vchan_state *vchan, uint16_t n)
static __rte_always_inline void dao_dma_ops_put(struct dao_dma_vchan_state *vchan, uint16_t n)
int16_t dao_dma_ctrl_mem2dev(void)
void dao_dma_compl_wait_inflight(uint16_t vchan)
static __rte_always_inline void dao_dma_check_compl(struct dao_dma_vchan_state *vchan)
int dao_dma_lcore_mem2dev_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
static __rte_always_inline bool dao_dma_desc_avail_get(struct dao_dma_vchan_state *vchan, uint32_t nb_src, uint32_t nb_dst)
static __rte_always_inline void dao_dma_enq_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len, rte_iova_t dst, uint32_t dst_len)
int dao_dma_lcore_dev2mem_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int dao_dma_stats_get(uint16_t lcore_id, struct dao_dma_stats *stats)
static __rte_always_inline void dao_dma_check_meta_compl_ops(struct dao_dma_vchan_state *vchan, const int mem_order)
int dao_dma_flush_submit_ops(void)
#define DAO_DMA_MAX_VCHAN_PER_LCORE
#define DAO_DMA_MAX_META_POINTER
static __rte_always_inline uint16_t dao_dma_src_avail(struct dao_dma_vchan_state *vchan)
int dao_dma_flush_submit(void)
RTE_DECLARE_PER_LCORE(struct dao_dma_vchan_info *, dao_dma_vchan_info)
static __rte_always_inline void dao_dma_enq_dst_x1(struct dao_dma_vchan_state *vchan, rte_iova_t dst, uint32_t dst_len)
static __rte_always_inline void dao_dma_check_meta_compl_v2(struct dao_dma_vchan_state *vchan, const int mem_order)
static __rte_always_inline void dao_dma_op_set_cmpl(struct rte_dma_op *op, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val)
static __rte_always_inline uint16_t dao_dma_dst_avail(struct dao_dma_vchan_state *vchan)
static __rte_always_inline bool dao_dma_flush(struct dao_dma_vchan_state *vchan, const uint8_t avail)
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_src(struct dao_dma_vchan_state *vchan)
static __rte_always_inline void dao_dma_update_cmpl_meta(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
int dao_dma_flush_submit_v2(void)
static __rte_always_inline void dao_dma_update_cmpl_meta_v2(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t tail)
static __rte_always_inline bool dao_dma_op_status(struct dao_dma_vchan_state *vchan, uint16_t op_idx)
static __rte_always_inline uint16_t dao_dma_ops_avail(struct dao_dma_vchan_state *vchan)
int dao_dma_lcore_mem2dev_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int16_t dao_dma_ctrl_dev2mem(void)
static __rte_always_inline uint16_t dao_dma_enq_x4(struct dao_dma_vchan_state *vchan, uint64x2_t *vsrc, uint64x2_t *vdst)
int dao_dma_ctrl_dev_set(int16_t dev2mem_id, int16_t mem2dev_id)
int dao_dma_lcore_dev2mem_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
#define DAO_DMA_MAX_POINTER
uint16_t val[DAO_DMA_MAX_META_POINTER]
uint16_t * pend_ptr[DAO_DMA_MAX_META_POINTER]
uint16_t * ptr[DAO_DMA_MAX_META_POINTER]
uint16_t pend_val[DAO_DMA_MAX_META_POINTER]
struct dao_dma_vchan_stats dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
struct dao_dma_vchan_stats mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
struct dao_dma_vchan_state mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
struct dao_dma_vchan_state dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
struct rte_dma_sge dst[DAO_DMA_MAX_POINTER]
struct rte_dma_op ** dma_ops
struct rte_dma_sge src[DAO_DMA_MAX_POINTER]
struct dao_dma_cmpl_mdata mdata[DAO_DMA_MAX_INFLIGHT_MDATA]