Data Accelerator Offload
Loading...
Searching...
No Matches
dao_dma.h
Go to the documentation of this file.
1/* SPDX-License-Identifier: Marvell-MIT
2 * Copyright (c) 2023 Marvell.
3 */
4
11#ifndef __INCLUDE_DAO_DMA_H__
12#define __INCLUDE_DAO_DMA_H__
13
14#include <rte_eal.h>
15
16#include <rte_dmadev.h>
17#include <rte_lcore.h>
18#include <rte_mempool.h>
19#include <rte_prefetch.h>
20#include <rte_vect.h>
21
22#include <dao_config.h>
23
24#include "dao_log.h"
25
27#define DAO_DMA_MAX_POINTER 15u
28
30#define DAO_DMA_MAX_META_POINTER 48
31
33#define DAO_DMA_MAX_VCHAN_PER_LCORE 64
34
36#define DAO_DMA_MAX_INFLIGHT_MDATA 4096
37
51
55 uint16_t tail;
57 uint16_t head;
59 int16_t devid;
61 uint8_t vchan;
62 uint8_t rsvd;
64 uint16_t src_i;
66 uint16_t dst_i;
68 uint8_t flush_thr;
70 uint8_t auto_free : 1;
71 uint8_t rsvd2 : 7;
73 uint16_t pend_ops;
75 struct rte_dma_sge src[DAO_DMA_MAX_POINTER];
77 struct rte_dma_sge dst[DAO_DMA_MAX_POINTER];
79 uint64_t ptrs;
81 uint64_t ops;
83 uint64_t dbells;
85 uint64_t dma_enq_errs;
89 void *ops_mem;
91 struct rte_dma_op **dma_ops;
93 uint16_t ops_head;
95 uint16_t ops_tail;
97 uint16_t ops_mask;
101
113
117 uint64_t ptrs;
119 uint64_t ops;
121 uint64_t dbells;
123 uint64_t enq_errs;
124};
125
137
140
148
156
164
175int dao_dma_stats_get(uint16_t lcore_id, struct dao_dma_stats *stats);
176
189int dao_dma_lcore_dev2mem_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr);
190
205int dao_dma_lcore_dev2mem_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr,
206 uint16_t nb_ops);
207
220int dao_dma_lcore_mem2dev_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr);
221
236int dao_dma_lcore_mem2dev_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr,
237 uint16_t nb_ops);
238
251int dao_dma_lcore_mem2dev_autofree_set(int16_t dma_devid, uint16_t vchan, bool enable);
252
263int dao_dma_ctrl_dev_set(int16_t dev2mem_id, int16_t mem2dev_id);
264
272
280
287void dao_dma_compl_wait_inflight(uint16_t vchan);
288
295static __rte_always_inline int
297{
298#if DAO_DMA_STATS
299 return 1;
300#else
301 return 0;
302#endif
303}
304
316static __rte_always_inline bool
317dao_dma_op_status(struct dao_dma_vchan_state *vchan, uint16_t op_idx)
318{
319 uint16_t head = vchan->head;
320 uint16_t tail = vchan->tail;
321
322 if (vchan->src_i && (tail == op_idx))
323 return false;
324
325 return head <= tail ? (op_idx < head || op_idx >= tail) : (op_idx < head && op_idx >= tail);
326}
327
338static __rte_always_inline bool
339dao_dma_flush(struct dao_dma_vchan_state *vchan, const uint8_t avail)
340{
341 int src_avail = vchan->flush_thr - vchan->src_i;
342 int dst_avail = vchan->flush_thr - vchan->dst_i;
343 uint64_t flags = (uint64_t)vchan->auto_free << 3;
344 int rc;
345
346 if (likely((src_avail >= (int)avail || !vchan->src_i) &&
347 (dst_avail >= (int)avail || !vchan->dst_i)))
348 goto exit;
349
350 rc = rte_dma_copy_sg(vchan->devid, vchan->vchan, vchan->src, vchan->dst, vchan->src_i,
351 vchan->dst_i, flags);
352 if (unlikely(rc < 0)) {
354 vchan->dma_enq_errs++;
355 return false;
356 }
357 vchan->tail++;
358 vchan->pend_ops++;
360 vchan->ptrs += vchan->src_i;
361 vchan->ops++;
362 }
363 vchan->src_i = 0;
364 vchan->dst_i = 0;
365exit:
366 return true;
367}
368
377static __rte_always_inline uint16_t
379{
380 int src_avail = vchan->flush_thr - vchan->src_i;
381
382 return src_avail;
383}
384
393static __rte_always_inline uint16_t
395{
396 int dst_avail = vchan->flush_thr - vchan->dst_i;
397
398 return dst_avail;
399}
400
409static __rte_always_inline struct rte_dma_sge *
411{
412 return &vchan->src[vchan->src_i];
413}
414
423static __rte_always_inline struct rte_dma_sge *
425{
426 return &vchan->dst[vchan->dst_i];
427}
428
446static __rte_always_inline void
447dao_dma_enq_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len, rte_iova_t dst,
448 uint32_t dst_len)
449{
450 uint16_t src_i = vchan->src_i;
451 uint16_t dst_i = vchan->dst_i;
452
453 vchan->dst[dst_i].addr = dst;
454 vchan->dst[dst_i].length = dst_len;
455 vchan->src[src_i].addr = src;
456 vchan->src[src_i].length = src_len;
457
458 vchan->src_i = src_i + 1;
459 vchan->dst_i = dst_i + 1;
460}
461
475static __rte_always_inline void
476dao_dma_enq_dst_x1(struct dao_dma_vchan_state *vchan, rte_iova_t dst, uint32_t dst_len)
477{
478 uint16_t dst_i = vchan->dst_i;
479
480 vchan->dst[dst_i].addr = dst;
481 vchan->dst[dst_i].length = dst_len;
482
483 vchan->dst_i = dst_i + 1;
484}
485
499static __rte_always_inline void
500dao_dma_enq_src_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len)
501{
502 uint16_t src_i = vchan->src_i;
503
504 vchan->src[src_i].addr = src;
505 vchan->src[src_i].length = src_len;
506
507 vchan->src_i = src_i + 1;
508}
509
522static __rte_always_inline uint16_t
523dao_dma_enq_x4(struct dao_dma_vchan_state *vchan, uint64x2_t *vsrc, uint64x2_t *vdst)
524{
525 struct rte_dma_sge *src, *dst;
526 uint16_t src_i = vchan->src_i;
527 uint16_t dst_i = vchan->dst_i;
528 int src_avail = vchan->flush_thr - src_i;
529 int i;
530
531 src = vchan->src + src_i;
532 dst = vchan->dst + dst_i;
533 if (src_avail >= 4) {
534 vst1q_u64((uint64_t *)&src[0], vsrc[0]);
535 vst1q_u64((uint64_t *)&src[1], vsrc[1]);
536 vst1q_u64((uint64_t *)&src[2], vsrc[2]);
537 vst1q_u64((uint64_t *)&src[3], vsrc[3]);
538
539 vst1q_u64((uint64_t *)&dst[0], vdst[0]);
540 vst1q_u64((uint64_t *)&dst[1], vdst[1]);
541 vst1q_u64((uint64_t *)&dst[2], vdst[2]);
542 vst1q_u64((uint64_t *)&dst[3], vdst[3]);
543
544 vchan->src_i = src_i + 4;
545 vchan->dst_i = dst_i + 4;
546 return 4;
547 }
548
549 i = 0;
550 while (i < 4 && src_avail > 0) {
551 vst1q_u64((uint64_t *)src, vsrc[i]);
552 vst1q_u64((uint64_t *)dst, vdst[i]);
553 src++;
554 dst++;
555 i++;
556 src_avail--;
557 };
558 vchan->src_i = src_i + i;
559 vchan->dst_i = dst_i + i;
560
561 /* Flush enqueued pointers */
562 dao_dma_flush(vchan, 4);
563
564 src_i = vchan->src_i;
565 dst_i = vchan->dst_i;
566 src = vchan->src + src_i;
567 dst = vchan->dst + dst_i;
568 src_avail = vchan->flush_thr - src_i;
569
570 while (i < 4 && src_avail > 0) {
571 vst1q_u64((uint64_t *)src, vsrc[i]);
572 vst1q_u64((uint64_t *)dst, vdst[i]);
573 i++;
574 src++;
575 dst++;
576 src_avail--;
577 vchan->src_i++;
578 vchan->dst_i++;
579 };
580 return i;
581}
582
589static __rte_always_inline void
591{
592 uint16_t cmpl;
593 bool has_err = 0;
594
595 /* Fetch all DMA completed status */
596 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
597 if (unlikely(has_err)) {
598 vchan->dma_compl_errs++;
599 cmpl += 1;
600 }
601 vchan->head += cmpl;
602}
603
612static __rte_always_inline uint16_t
614{
615 return vchan->ops_head - vchan->ops_tail;
616}
617
628static __rte_always_inline struct rte_dma_op **
629dao_dma_ops_get(struct dao_dma_vchan_state *vchan, uint16_t n)
630{
631 uint16_t tail = vchan->ops_tail;
632
633 vchan->ops_tail = tail + n;
634 return &vchan->dma_ops[tail & vchan->ops_mask];
635}
636
645static __rte_always_inline void
646dao_dma_ops_put(struct dao_dma_vchan_state *vchan, uint16_t n)
647{
648 vchan->ops_head += n;
649}
650
659static __rte_always_inline void
660dao_dma_ops_release(struct dao_dma_vchan_state *vchan, uint16_t n)
661{
662 vchan->ops_tail -= n;
663}
664
673static __rte_always_inline void
674dao_dma_op_set_cmpl(struct rte_dma_op *op, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr,
675 uint16_t pend_val)
676{
677 op->user_meta = (uint64_t)(uintptr_t)ptr;
678 op->event_meta = (uint64_t)(uintptr_t)pend_ptr;
679 op->rsvd = ((uint32_t)val << 16) | pend_val;
680}
681
691static __rte_always_inline void
692dao_dma_check_meta_compl(struct dao_dma_vchan_state *vchan, const int mem_order)
693{
694 uint32_t cmpl, i, j, idx = 0;
695 bool has_err = 0;
696
697 /* Fetch all DMA completed status */
698 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
699 if (unlikely(has_err)) {
700 vchan->dma_compl_errs++;
701 cmpl += 1;
702 }
703 for (i = vchan->head; i < vchan->head + cmpl; i++) {
705 for (j = 0; j < vchan->mdata[idx].cnt; j++) {
706 if (mem_order)
707 __atomic_store_n(vchan->mdata[idx].ptr[j], vchan->mdata[idx].val[j],
708 __ATOMIC_RELEASE);
709 else
710 *vchan->mdata[idx].ptr[j] = vchan->mdata[idx].val[j];
711 *vchan->mdata[idx].pend_ptr[j] -= vchan->mdata[idx].pend_val[j];
712 }
713 vchan->mdata[idx].cnt = 0;
714 }
715 vchan->head += cmpl;
716}
717
727static __rte_always_inline void
728dao_dma_check_meta_compl_ops(struct dao_dma_vchan_state *vchan, const int mem_order)
729{
730#define DEQ_SZ 128
731 uint32_t cmpl, i;
732 struct rte_dma_op *deq_ops[DEQ_SZ];
733
734 /* Skip costly dequeue call when no ops are inflight */
735 if (vchan->head == vchan->tail)
736 return;
737
738 cmpl = rte_dma_dequeue_ops(vchan->devid, vchan->vchan, deq_ops, DEQ_SZ);
739 if (!cmpl)
740 return;
741
742 for (i = 0; i < cmpl; i++) {
743 struct rte_dma_op *op = deq_ops[i];
744
745 if (unlikely(op->status != RTE_DMA_STATUS_SUCCESSFUL))
746 vchan->dma_compl_errs++;
747
748 if (op->rsvd) {
749 uint16_t *ptr = (uint16_t *)(uintptr_t)op->user_meta;
750 uint16_t *pend_ptr = (uint16_t *)(uintptr_t)op->event_meta;
751 uint16_t val = op->rsvd >> 16;
752 uint16_t pend_val = op->rsvd & 0xFFFF;
753
754 if (mem_order)
755 __atomic_store_n(ptr, val, __ATOMIC_RELEASE);
756 else
757 *ptr = val;
758 *pend_ptr -= pend_val;
759 op->rsvd = 0;
760 }
761 }
762
763 /* Return ops to ring buffer */
764 dao_dma_ops_put(vchan, cmpl);
765 vchan->head += cmpl;
766}
767
784static __rte_always_inline void
785dao_dma_update_cmpl_meta(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val,
786 uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
787{
788 uint16_t idx = tail % DAO_DMA_MAX_INFLIGHT_MDATA;
789 uint16_t j = vchan->mdata[idx].cnt;
790
791 vchan->mdata[idx].ptr[j] = ptr;
792 vchan->mdata[idx].val[j] = val;
793 vchan->mdata[idx].pend_ptr[j] = pend_ptr;
794 vchan->mdata[idx].pend_val[j] = pend_val;
795 vchan->mdata[idx].cnt = j + 1;
796}
797
807static __rte_always_inline void
808dao_dma_check_meta_compl_v2(struct dao_dma_vchan_state *vchan, const int mem_order)
809{
810 uint32_t cmpl, i, j, idx = 0;
811 bool has_err = 0;
812
813 /* Fetch all DMA completed status */
814 cmpl = rte_dma_completed(vchan->devid, vchan->vchan, 128, NULL, &has_err);
815 if (unlikely(has_err)) {
816 vchan->dma_compl_errs++;
817 cmpl += 1;
818 }
819 for (i = vchan->head; i < vchan->head + cmpl; i++) {
821 for (j = 0; j < vchan->mdata[idx].cnt; j++) {
822 if (mem_order)
823 __atomic_store_n(vchan->mdata[idx].ptr[j], vchan->mdata[idx].val[j],
824 __ATOMIC_RELAXED);
825 else
826 *vchan->mdata[idx].ptr[j] = vchan->mdata[idx].val[j];
827 }
828 vchan->mdata[idx].cnt = 0;
829 }
830 vchan->head += cmpl;
831}
832
845static __rte_always_inline void
846dao_dma_update_cmpl_meta_v2(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val,
847 uint16_t tail)
848{
849 uint16_t idx = tail % DAO_DMA_MAX_INFLIGHT_MDATA;
850 uint16_t j = vchan->mdata[idx].cnt;
851
852 vchan->mdata[idx].ptr[j] = ptr;
853 vchan->mdata[idx].val[j] = val;
854 vchan->mdata[idx].cnt = j + 1;
855}
856
857#endif /* __INCLUDE_DAO_DMA_H__ */
static __rte_always_inline void dao_dma_enq_src_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len)
Definition dao_dma.h:500
static __rte_always_inline int dao_dma_has_stats_feature(void)
Definition dao_dma.h:296
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_dst(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:424
static __rte_always_inline void dao_dma_check_meta_compl(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:692
int dao_dma_lcore_mem2dev_autofree_set(int16_t dma_devid, uint16_t vchan, bool enable)
static __rte_always_inline void dao_dma_ops_release(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:660
#define DAO_DMA_MAX_INFLIGHT_MDATA
Definition dao_dma.h:36
static __rte_always_inline struct rte_dma_op ** dao_dma_ops_get(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:629
static __rte_always_inline void dao_dma_ops_put(struct dao_dma_vchan_state *vchan, uint16_t n)
Definition dao_dma.h:646
int16_t dao_dma_ctrl_mem2dev(void)
void dao_dma_compl_wait_inflight(uint16_t vchan)
static __rte_always_inline void dao_dma_check_compl(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:590
int dao_dma_lcore_mem2dev_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
static __rte_always_inline void dao_dma_enq_x1(struct dao_dma_vchan_state *vchan, rte_iova_t src, uint32_t src_len, rte_iova_t dst, uint32_t dst_len)
Definition dao_dma.h:447
int dao_dma_lcore_dev2mem_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int dao_dma_stats_get(uint16_t lcore_id, struct dao_dma_stats *stats)
static __rte_always_inline void dao_dma_check_meta_compl_ops(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:728
int dao_dma_flush_submit_ops(void)
#define DAO_DMA_MAX_VCHAN_PER_LCORE
Definition dao_dma.h:33
#define DAO_DMA_MAX_META_POINTER
Definition dao_dma.h:30
static __rte_always_inline uint16_t dao_dma_src_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:378
int dao_dma_flush_submit(void)
RTE_DECLARE_PER_LCORE(struct dao_dma_vchan_info *, dao_dma_vchan_info)
static __rte_always_inline void dao_dma_enq_dst_x1(struct dao_dma_vchan_state *vchan, rte_iova_t dst, uint32_t dst_len)
Definition dao_dma.h:476
static __rte_always_inline void dao_dma_check_meta_compl_v2(struct dao_dma_vchan_state *vchan, const int mem_order)
Definition dao_dma.h:808
static __rte_always_inline void dao_dma_op_set_cmpl(struct rte_dma_op *op, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val)
Definition dao_dma.h:674
static __rte_always_inline uint16_t dao_dma_dst_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:394
static __rte_always_inline bool dao_dma_flush(struct dao_dma_vchan_state *vchan, const uint8_t avail)
Definition dao_dma.h:339
static __rte_always_inline struct rte_dma_sge * dao_dma_sge_src(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:410
static __rte_always_inline void dao_dma_update_cmpl_meta(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t *pend_ptr, uint16_t pend_val, uint16_t tail)
Definition dao_dma.h:785
int dao_dma_flush_submit_v2(void)
static __rte_always_inline void dao_dma_update_cmpl_meta_v2(struct dao_dma_vchan_state *vchan, uint16_t *ptr, uint16_t val, uint16_t tail)
Definition dao_dma.h:846
static __rte_always_inline bool dao_dma_op_status(struct dao_dma_vchan_state *vchan, uint16_t op_idx)
Definition dao_dma.h:317
static __rte_always_inline uint16_t dao_dma_ops_avail(struct dao_dma_vchan_state *vchan)
Definition dao_dma.h:613
int dao_dma_lcore_mem2dev_set_ops(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr, uint16_t nb_ops)
int16_t dao_dma_ctrl_dev2mem(void)
static __rte_always_inline uint16_t dao_dma_enq_x4(struct dao_dma_vchan_state *vchan, uint64x2_t *vsrc, uint64x2_t *vdst)
Definition dao_dma.h:523
int dao_dma_ctrl_dev_set(int16_t dev2mem_id, int16_t mem2dev_id)
int dao_dma_lcore_dev2mem_set(int16_t dma_devid, uint16_t nb_vchans, uint16_t flush_thr)
#define DAO_DMA_MAX_POINTER
Definition dao_dma.h:27
uint16_t val[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:45
uint16_t * pend_ptr[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:41
uint16_t * ptr[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:47
uint16_t pend_val[DAO_DMA_MAX_META_POINTER]
Definition dao_dma.h:43
uint16_t nb_mem2dev
Definition dao_dma.h:131
struct dao_dma_vchan_stats dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:133
struct dao_dma_vchan_stats mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:135
uint16_t nb_dev2mem
Definition dao_dma.h:129
struct dao_dma_vchan_state mem2dev[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:111
uint16_t nb_mem2dev
Definition dao_dma.h:107
struct dao_dma_vchan_state dev2mem[DAO_DMA_MAX_VCHAN_PER_LCORE]
Definition dao_dma.h:109
uint16_t nb_dev2mem
Definition dao_dma.h:105
uint64_t dma_enq_errs
Definition dao_dma.h:85
uint16_t ops_head
Definition dao_dma.h:93
uint16_t ops_tail
Definition dao_dma.h:95
uint16_t ops_mask
Definition dao_dma.h:97
uint8_t flush_thr
Definition dao_dma.h:68
struct rte_dma_sge dst[DAO_DMA_MAX_POINTER]
Definition dao_dma.h:77
struct rte_dma_op ** dma_ops
Definition dao_dma.h:91
uint8_t auto_free
Definition dao_dma.h:70
uint16_t pend_ops
Definition dao_dma.h:73
uint64_t dma_compl_errs
Definition dao_dma.h:87
uint64_t dbells
Definition dao_dma.h:83
struct rte_dma_sge src[DAO_DMA_MAX_POINTER]
Definition dao_dma.h:75
struct dao_dma_cmpl_mdata mdata[DAO_DMA_MAX_INFLIGHT_MDATA]
Definition dao_dma.h:99
uint64_t enq_errs
Definition dao_dma.h:123