Data Accelerator Offload
Loading...
Searching...
No Matches
dao_dma.h
Go to the documentation of this file.
1/* SPDX-License-Identifier: Marvell-MIT
2 * Copyright (c) 2023 Marvell.
3 */
4
11#ifndef __INCLUDE_DAO_DMA_H__
12#define __INCLUDE_DAO_DMA_H__
13
14#include <rte_eal.h>
15
16#include <rte_dmadev.h>
17#include <rte_lcore.h>
18#include <rte_mempool.h>
19#include <rte_prefetch.h>
20#include <rte_vect.h>
21
22#include <dao_config.h>
23
24#include "dao_log.h"
25
27#define DAO_DMA_MAX_POINTER 15u
28
30#define DAO_DMA_MAX_META_POINTER 48
31
33#define DAO_DMA_MAX_VCHAN_PER_LCORE 64
34
36#define DAO_DMA_MAX_INFLIGHT_MDATA 4096
37
51
55 uint16_t tail;
57 uint16_t head;
59 int16_t devid;
61 uint8_t vchan;
62 uint8_t rsvd;
64 uint16_t src_i;
66 uint16_t dst_i;
68 uint8_t flush_thr;
70 uint8_t auto_free : 1;
71 uint8_t rsvd2 : 7;
73 uint16_t pend_ops;
75 struct rte_dma_sge src[DAO_DMA_MAX_POINTER];
77 struct rte_dma_sge dst[DAO_DMA_MAX_POINTER];
79 uint64_t ptrs;
81 uint64_t ops;
83 uint64_t dbells;
85 uint64_t dma_enq_errs;
89 void *ops_mem;
91 struct rte_dma_op **dma_ops;
93 uint16_t ops_head;
95 uint16_t ops_tail;
97 uint16_t ops_mask;
101
113
117 uint64_t ptrs;
119 uint64_t ops;
121 uint64_t dbells;
123 uint64_t enq_errs;
124};
125
137
140
148
156
164
175int dao_dma_stats_get(uint16_t lcore_id, struct dao_dma_stats *stats);
176
189int dao_dma_lcore_dev2mem_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr);
190
205int dao_dma_lcore_dev2mem_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr,
206 uint16_t nb_ops);
207
220int dao_dma_lcore_mem2dev_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr);
221
236int dao_dma_lcore_mem2dev_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr,
237 uint16_t nb_ops);
238
251int dao_dma_lcore_mem2dev_autofree_set(int16_t dma_devid, uint16_t vchan, bool enable);
252
263int dao_dma_ctrl_dev_set(int16_t dev2mem_id, int16_t mem2dev_id);
264
272
280
287void dao_dma_compl_wait(uint16_t vchan);
288
295void dao_dma_compl_wait_ops(uint16_t vchan);
296
303void dao_dma_compl_wait_sp(uint16_t vchan);
304
312
319static __rte_always_inline int
321{
322#if DAO_DMA_STATS
323 return 1;
324#else
325 return 0;
326#endif
327}
328
340static __rte_always_inline bool
341dao_dma_op_status(struct dao_dma_vchan_state *vchan, uint16_t op_idx)
342{
343 uint16_t head = vchan->head;
344 uint16_t tail = vchan->tail;
345
346 if (vchan->src_i && (tail == op_idx))
347 return false;
348
349 return head <= tail ? (op_idx < head || op_idx >= tail) : (op_idx < head && op_idx >= tail);
350}
351
362static __rte_always_inline bool
363dao_dma_flush(struct dao_dma_vchan_state *vchan, const uint8_t avail)
364{
365 int src_avail = vchan->flush_thr - vchan->src_i;
366 int dst_avail = vchan->flush_thr - vchan->dst_i;
367 uint64_t flags = (uint64_t)vchan->auto_free << 3;
368 int rc;
369
370 if (likely((src_avail >= (int)avail || !vchan->src_i) &&
371 (dst_avail >= (int)avail || !vchan->dst_i)))
372 goto exit;
373
374 rc = rte_dma_copy_sg(vchan->devid, vchan->vchan, vchan->src, vchan->dst, vchan->src_i,
375 vchan->dst_i, flags);
376 if (unlikely(rc < 0)) {
378 vchan->dma_enq_errs++;
379 return false;
380 }
381 vchan->tail++;
382 vchan->pend_ops++;
384 vchan->ptrs += vchan->src_i;
385 vchan->ops++;
386 }
387 vchan->src_i = 0;
388 vchan->dst_i = 0;
389exit:
390 return true;
391}
392
401static __rte_always_inline uint16_t
403{
404 int src_avail = vchan->flush_thr - vchan->src_i;
405
406 return src_avail;
407}
408
417static __rte_always_inline uint16_t
419{
420 int dst_avail = vchan->flush_thr - vchan->dst_i;
421
422 return dst_avail;
423}
424
433static __rte_always_inline struct rte_dma_sge *
435{
436 return &vchan->src[vchan->src_i];
437}
438
447static __rte_always_inline struct rte_dma_sge *
449{
450 return &vchan->dst[vchan->dst_i];
451}
452
470static __rte_always_inline void
471dao_dma_enq_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len, rte_iova_t dst,
472 uint32_t dst_len)
473{
474 uint16_t src_i = vchan->src_i;
475 uint16_t dst_i = vchan->dst_i;
476
477 vchan->dst[dst_i].addr = dst;
478 vchan->dst[dst_i].length = dst_len;
479 vchan->src[src_i].addr = src;
480 vchan->src[src_i].length = src_len;
481
482 vchan->src_i = src_i + 1;
483 vchan->dst_i = dst_i + 1;
484}
485
499static __rte_always_inline void
500dao_dma_enq_dst_x1(struct dao_dma_vchan_state *vchan, rte_iova_t dst, uint32_t dst_len)
501{
502 uint16_t dst_i = vchan->dst_i;
503
504 vchan->dst[dst_i].addr = dst;
505 vchan->dst[dst_i].length = dst_len;
506
507 vchan->dst_i = dst_i + 1;
508}
509
523static __rte_always_inline void
524dao_dma_enq_src_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len)
525{
526 uint16_t src_i = vchan->src_i;
527
528 vchan->src[src_i].addr = src;
529 vchan->src[src_i].length = src_len;
530
531 vchan->src_i = src_i + 1;
532}
533
546static __rte_always_inline uint16_t
547dao_dma_enq_x4(struct dao_dma_vchan_state *vchan, uint64x2_t *vsrc, uint64x2_t *vdst)
548{
549 struct rte_dma_sge *src, *dst;
550 uint16_t src_i = vchan->src_i;
551 uint16_t dst_i = vchan->dst_i;
552 int src_avail = vchan->flush_thr - src_i;
553 int i;
554
555 src = vchan->src + src_i;
556 dst = vchan->dst + dst_i;
557 if (src_avail >= 4) {
558 vst1q_u64((uint64_t *)&src[0], vsrc[0]);
559 vst1q_u64((uint64_t *)&src[1], vsrc[1]);
560 vst1q_u64((uint64_t *)&src[2], vsrc[2]);
561 vst1q_u64((uint64_t *)&src[3], vsrc[3]);
562
563 vst1q_u64((uint64_t *)&dst[0], vdst[0]);
564 vst1q_u64((uint64_t *)&dst[1], vdst[1]);
565 vst1q_u64((uint64_t *)&dst[2], vdst[2]);
566 vst1q_u64((uint64_t *)&dst[3], vdst[3]);
567
568 vchan->src_i = src_i + 4;
569 vchan->dst_i = dst_i + 4;
570 return 4;
571 }
572
573 i = 0;
574 while (i < 4 && src_avail > 0) {
575 vst1q_u64((uint64_t *)src, vsrc[i]);
576 vst1q_u64((uint64_t *)dst, vdst[i]);
577 src++;
578 dst++;
579 i++;
580 src_avail--;
581 };
582 vchan->src_i = src_i + i;
583 vchan->dst_i = dst_i + i;
584
585 /* Flush enqueued pointers */
586 dao_dma_flush(vchan, 4);
587
588 src_i = vchan->src_i;
589 dst_i = vchan->dst_i;
590 src = vchan->src + src_i;
591 dst = vchan->dst + dst_i;
592 src_avail = vchan->flush_thr - src_i;
593
594 while (i < 4 && src_avail > 0) {
595 vst1q_u64((uint64_t *)src, vsrc[i]);
596 vst1q_u64((uint64_t *)dst, vdst[i]);
597 i++;
598 src++;
599 dst++;
600 src_avail--;
601 vchan->src_i++;
602 vchan->dst_i++;
603 };
604 return i;
605}
606
613static __rte_always_inline void
615{
616 uint16_t cmpl;
617 bool has_err = 0;
618
619 /* Fetch all DMA completed status */
620 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
621 if (unlikely(has_err)) {
622 vchan->dma_compl_errs++;
623 cmpl += 1;
624 }
625 vchan->head += cmpl;
626}
627
636static __rte_always_inline uint16_t
638{
639 return vchan->ops_head - vchan->ops_tail;
640}
641
652static __rte_always_inline struct rte_dma_op **
653dao_dma_ops_get(struct dao_dma_vchan_state *vchan, uint16_t n)
654{
655 uint16_t tail = vchan->ops_tail;
656
657 vchan->ops_tail = tail + n;
658 return &vchan->dma_ops[tail & vchan->ops_mask];
659}
660
669static __rte_always_inline void
670dao_dma_ops_put(struct dao_dma_vchan_state *vchan, uint16_t n)
671{
672 vchan->ops_head += n;
673}
674
683static __rte_always_inline void
684dao_dma_ops_release(struct dao_dma_vchan_state *vchan, uint16_t n)
685{
686 vchan->ops_tail -= n;
687}
688
697static __rte_always_inline void
698dao_dma_op_set_cmpl(struct rte_dma_op *op, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr,
699 uint16_t pend_val)
700{
701 op->user_meta = (uint64_t)(uintptr_t)ptr;
702 op->event_meta = (uint64_t)(uintptr_t)pend_ptr;
703 op->rsvd = ((uint32_t)val << 16) | pend_val;
704}
705
715static __rte_always_inline void
716dao_dma_check_meta_compl(struct dao_dma_vchan_state *vchan, const int mem_order)
717{
718 uint32_t cmpl, i, j, idx = 0;
719 bool has_err = 0;
720
721 /* Fetch all DMA completed status */
722 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
723 if (unlikely(has_err)) {
724 vchan->dma_compl_errs++;
725 cmpl += 1;
726 }
727 for (i = vchan->head; i < vchan->head + cmpl; i++) {
729 for (j = 0; j < vchan->mdata[idx].cnt; j++) {
730 if (mem_order)
731 __atomic_store_n(vchan->mdata[idx].ptr[j], vchan->mdata[idx].val[j],
732 __ATOMIC_RELEASE);
733 else
734 *vchan->mdata[idx].ptr[j] = vchan->mdata[idx].val[j];
735 *vchan->mdata[idx].pend_ptr[j] -= vchan->mdata[idx].pend_val[j];
736 }
737 vchan->mdata[idx].cnt = 0;
738 }
739 vchan->head += cmpl;
740}
741
751static __rte_always_inline void
752dao_dma_check_meta_compl_ops(struct dao_dma_vchan_state *vchan, const int mem_order)
753{
754#define DEQ_SZ 128
755 uint32_t cmpl, i;
756 struct rte_dma_op *deq_ops[DEQ_SZ];
757
758 /* Skip costly dequeue call when no ops are inflight */
759 if (vchan->head == vchan->tail)
760 return;
761
762 cmpl = rte_dma_dequeue_ops(vchan->devid, vchan->vchan, deq_ops, DEQ_SZ);
763 if (!cmpl)
764 return;
765
766 for (i = 0; i < cmpl; i++) {
767 struct rte_dma_op *op = deq_ops[i];
768
769 if (unlikely(op->status != RTE_DMA_STATUS_SUCCESSFUL))
770 vchan->dma_compl_errs++;
771
772 if (op->rsvd) {
773 uint16_t *ptr = (uint16_t *)(uintptr_t)op->user_meta;
774 uint16_t *pend_ptr = (uint16_t *)(uintptr_t)op->event_meta;
775 uint16_t val = op->rsvd >> 16;
776 uint16_t pend_val = op->rsvd & 0xFFFF;
777
778 if (mem_order)
779 __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
780 else
781 *ptr = val;
782 *pend_ptr -= pend_val;
783 op->rsvd = 0;
784 }
785 }
786
787 /* Return ops to ring buffer */
788 dao_dma_ops_put(vchan, cmpl);
789 vchan->head += cmpl;
790}
791
808static __rte_always_inline void
809dao_dma_update_cmpl_meta(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val,
810 uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
811{
812 uint16_t idx = tail % DAO_DMA_MAX_INFLIGHT_MDATA;
813 uint16_t j = vchan->mdata[idx].cnt;
814
815 vchan->mdata[idx].ptr[j] = ptr;
816 vchan->mdata[idx].val[j] = val;
817 vchan->mdata[idx].pend_ptr[j] = pend_ptr;
818 vchan->mdata[idx].pend_val[j] = pend_val;
819 vchan->mdata[idx].cnt = j + 1;
820}
821
831static __rte_always_inline void
832dao_dma_check_meta_compl_v2(struct dao_dma_vchan_state *vchan, const int mem_order)
833{
834 uint32_t cmpl, i, j, idx = 0;
835 bool has_err = 0;
836
837 /* Fetch all DMA completed status */
838 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
839 if (unlikely(has_err)) {
840 vchan->dma_compl_errs++;
841 cmpl += 1;
842 }
843 for (i = vchan->head; i < vchan->head + cmpl; i++) {
845 for (j = 0; j < vchan->mdata[idx].cnt; j++) {
846 if (mem_order)
847 __atomic_store_n(vchan->mdata[idx].ptr[j], vchan->mdata[idx].val[j],
848 __ATOMIC_RELAXED);
849 else
850 *vchan->mdata[idx].ptr[j] = vchan->mdata[idx].val[j];
851 }
852 vchan->mdata[idx].cnt = 0;
853 }
854 vchan->head += cmpl;
855}
856
869static __rte_always_inline void
870dao_dma_update_cmpl_meta_v2(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val,
871 uint16_t tail)
872{
873 uint16_t idx = tail % DAO_DMA_MAX_INFLIGHT_MDATA;
874 uint16_t j = vchan->mdata[idx].cnt;
875
876 vchan->mdata[idx].ptr[j] = ptr;
877 vchan->mdata[idx].val[j] = val;
878 vchan->mdata[idx].cnt = j + 1;
879}
880
881#endif /* __INCLUDE_DAO_DMA_H__ */
static __rte_always_inline void dao_dma_enq_src_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len)
Definition dao_dma.h:524
static __rte_always_inline int dao_dma_has_stats_feature(void)
Definition dao_dma.h:320
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_dst(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:448
static __rte_always_inline void dao_dma_check_meta_compl(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:716
void dao_dma_compl_wait(uint16_t vchan)
void dao_dma_compl_wait_sp(uint16_t vchan)
int dao_dma_lcore_mem2dev_autofree_set(int16_t dma_devid, uint16_t vchan, bool enable)
static __rte_always_inline void dao_dma_ops_release(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:684
#define DAO_DMA_MAX_INFLIGHT_MDATA
Definition dao_dma.h:36
static __rte_always_inline struct rte_dma_op ** dao_dma_ops_get(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:653
static __rte_always_inline void dao_dma_ops_put(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:670
int16_t dao_dma_ctrl_mem2dev(void)
static __rte_always_inline void dao_dma_check_compl(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:614
int dao_dma_lcore_mem2dev_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
static __rte_always_inline void dao_dma_enq_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len, rte_iova_t dst, uint32_t dst_len)
Definition dao_dma.h:471
int dao_dma_lcore_dev2mem_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int dao_dma_stats_get(uint16_t lcore_id, struct dao_dma_stats *stats)
static __rte_always_inline void dao_dma_check_meta_compl_ops(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:752
int dao_dma_flush_submit_ops(void)
#define DAO_DMA_MAX_VCHAN_PER_LCORE
Definition dao_dma.h:33
#define DAO_DMA_MAX_META_POINTER
Definition dao_dma.h:30
static __rte_always_inline uint16_t dao_dma_src_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:402
int dao_dma_flush_submit(void)
void dao_dma_compl_wait_ops(uint16_t vchan)
void dao_dma_compl_wait_for_curr_tail(uint16_t vchan)
RTE_DECLARE_PER_LCORE(struct dao_dma_vchan_info *, dao_dma_vchan_info)
static __rte_always_inline void dao_dma_enq_dst_x1(struct dao_dma_vchan_state *vchan, rte_iova_t dst, uint32_t dst_len)
Definition dao_dma.h:500
static __rte_always_inline void dao_dma_check_meta_compl_v2(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:832
static __rte_always_inline void dao_dma_op_set_cmpl(struct rte_dma_op *op, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val)
Definition dao_dma.h:698
static __rte_always_inline uint16_t dao_dma_dst_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:418
static __rte_always_inline bool dao_dma_flush(struct dao_dma_vchan_state *vchan, const uint8_t avail)
Definition dao_dma.h:363
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_src(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:434
static __rte_always_inline void dao_dma_update_cmpl_meta(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
Definition dao_dma.h:809
int dao_dma_flush_submit_v2(void)
static __rte_always_inline void dao_dma_update_cmpl_meta_v2(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t tail)
Definition dao_dma.h:870
static __rte_always_inline bool dao_dma_op_status(struct dao_dma_vchan_state *vchan, uint16_t op_idx)
Definition dao_dma.h:341
static __rte_always_inline uint16_t dao_dma_ops_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:637
int dao_dma_lcore_mem2dev_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int16_t dao_dma_ctrl_dev2mem(void)
static __rte_always_inline uint16_t dao_dma_enq_x4(struct dao_dma_vchan_state *vchan, uint64x2_t *vsrc, uint64x2_t *vdst)
Definition dao_dma.h:547
int dao_dma_ctrl_dev_set(int16_t dev2mem_id, int16_t mem2dev_id)
int dao_dma_lcore_dev2mem_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
#define DAO_DMA_MAX_POINTER
Definition dao_dma.h:27
uint16_t val[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:45
uint16_t * pend_ptr[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:41
uint16_t * ptr[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:47
uint16_t pend_val[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:43
uint16_t nb_mem2dev
Definition dao_dma.h:131
struct dao_dma_vchan_stats dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:133
struct dao_dma_vchan_stats mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:135
uint16_t nb_dev2mem
Definition dao_dma.h:129
struct dao_dma_vchan_state mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:111
uint16_t nb_mem2dev
Definition dao_dma.h:107
struct dao_dma_vchan_state dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:109
uint16_t nb_dev2mem
Definition dao_dma.h:105
uint64_t dma_enq_errs
Definition dao_dma.h:85
uint16_t ops_head
Definition dao_dma.h:93
uint16_t ops_tail
Definition dao_dma.h:95
uint16_t ops_mask
Definition dao_dma.h:97
uint8_t flush_thr
Definition dao_dma.h:68
struct rte_dma_sge dst[DAO_DMA_MAX_POINTER]
Definition dao_dma.h:77
struct rte_dma_op ** dma_ops
Definition dao_dma.h:91
uint8_t auto_free
Definition dao_dma.h:70
uint16_t pend_ops
Definition dao_dma.h:73
uint64_t dma_compl_errs
Definition dao_dma.h:87
uint64_t dbells
Definition dao_dma.h:83
struct rte_dma_sge src[DAO_DMA_MAX_POINTER]
Definition dao_dma.h:75
struct dao_dma_cmpl_mdata mdata[DAO_DMA_MAX_INFLIGHT_MDATA]
Definition dao_dma.h:99
uint64_t enq_errs
Definition dao_dma.h:123