[PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs

Yi Sun posted 2 patches 1 month, 1 week ago
[PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs
Posted by Yi Sun 1 month, 1 week ago
Introduce sysfs interfaces for 3 new Data Streaming Accelerator (DSA)
capability registers (dsacap0-2) to enable userspace awareness of hardware
features in DSA version 3 and later devices.

Userspace components (e.g. configure libraries, workload Apps) require this
information to:
1. Select optimal data transfer strategies based on SGL capabilities
2. Enable hardware-specific optimizations for floating-point operations
3. Configure memory operations with proper numerical handling
4. Verify compute operation compatibility before submitting jobs

The output format is <dsacap2>,<dsacap1>,<dsacap0>, where each DSA
capability value is a 64-bit hexadecimal number, separated by commas.
The ordering follows the DSA 3.0 specification layout:
 Offset:    0x190    0x188    0x180
 Reg:       dsacap2  dsacap1  dsacap0

Example:
cat /sys/bus/dsa/devices/dsa0/dsacaps
 000000000000f18d,0014000e000007aa,00fa01ff01ff03ff

According to the DSA 3.0 specification, there are 15 fields defined for
the three dsacap registers. However, there's no need to define all
register structures unless a use case requires them. At this point,
support for the Scatter-Gather List (SGL) located in dsacap0 is necessary,
so only dsacap0 is defined accordingly.

For reference, the DSA 3.0 specification is available at:
Link: https://software.intel.com/content/www/us/en/develop/articles/intel-data-streaming-accelerator-architecture-specification.html

Signed-off-by: Yi Sun <yi.sun@intel.com>
Co-developed-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Yi Lai <yi1.lai@intel.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>

diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 4a355e6747ae..bd281063d626 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -136,6 +136,21 @@ Description:	The last executed device administrative command's status/error.
 		Also last configuration error overloaded.
 		Writing to it will clear the status.
 
+What:		/sys/bus/dsa/devices/dsa<m>/dsacaps
+Date:		Oct 5, 2025
+KernelVersion:	6.17.0
+Contact:	dmaengine@vger.kernel.org
+Description:	The DSA3 specification introduces three new capability
+		registers: dsacap[0-2]. User components (e.g., configuration
+		libraries and workload applications) require this information
+		to properly utilize the DSA3 features.
+		This includes SGL capability support, Enabling hardware-specific
+		optimizations, Configuring memory, etc.
+		The output format is '<dsacap2>,<dsacap1>,<dsacap0>' where each
+		DSA cap value is a 64 bit hex value.
+		This attribute should only be visible on DSA devices of version
+		3 or later.
+
 What:		/sys/bus/dsa/devices/dsa<m>/iaa_cap
 Date:		Sept 14, 2022
 KernelVersion: 6.0.0
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 74e6695881e6..cc0a3fe1c957 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -252,6 +252,9 @@ struct idxd_hw {
 	struct opcap opcap;
 	u32 cmd_cap;
 	union iaa_cap_reg iaa_cap;
+	union dsacap0_reg dsacap0;
+	union dsacap1_reg dsacap1;
+	union dsacap2_reg dsacap2;
 };
 
 enum idxd_device_state {
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 35bdefd3728b..084df60d407b 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -582,6 +582,12 @@ static void idxd_read_caps(struct idxd_device *idxd)
 	}
 	multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4);
 
+	if (idxd->hw.version >= DEVICE_VERSION_3) {
+		idxd->hw.dsacap0.bits = ioread64(idxd->reg_base + IDXD_DSACAP0_OFFSET);
+		idxd->hw.dsacap1.bits = ioread64(idxd->reg_base + IDXD_DSACAP1_OFFSET);
+		idxd->hw.dsacap2.bits = ioread64(idxd->reg_base + IDXD_DSACAP2_OFFSET);
+	}
+
 	/* read iaa cap */
 	if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2)
 		idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET);
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 9c1c546fe443..439bbc311591 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -13,6 +13,7 @@
 
 #define DEVICE_VERSION_1		0x100
 #define DEVICE_VERSION_2		0x200
+#define DEVICE_VERSION_3		0x300
 
 #define IDXD_MMIO_BAR		0
 #define IDXD_WQ_BAR		2
@@ -582,6 +583,30 @@ union evl_status_reg {
 	u64 bits;
 };
 
+#define IDXD_DSACAP0_OFFSET		0x180
+union dsacap0_reg {
+	u64 bits;
+	struct {
+		u64 max_sgl_shift:4;
+		u64 max_gr_block_shift:4;
+		u64 ops_inter_domain:7;
+		u64 rsvd1:17;
+		u64 sgl_formats:16;
+		u64 max_sg_process:8;
+		u64 rsvd2:8;
+	};
+};
+
+#define IDXD_DSACAP1_OFFSET		0x188
+union dsacap1_reg {
+	u64 bits;
+};
+
+#define IDXD_DSACAP2_OFFSET		0x190
+union dsacap2_reg {
+	u64 bits;
+};
+
 #define IDXD_MAX_BATCH_IDENT	256
 
 struct __evl_entry {
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 9f0701021af0..cc2c83d7f710 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1713,6 +1713,18 @@ static ssize_t event_log_size_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(event_log_size);
 
+static ssize_t dsacaps_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd = confdev_to_idxd(dev);
+
+	return sysfs_emit(buf, "%016llx,%016llx,%016llx\n",
+			  (u64)idxd->hw.dsacap2.bits,
+			  (u64)idxd->hw.dsacap1.bits,
+			  (u64)idxd->hw.dsacap0.bits);
+}
+static DEVICE_ATTR_RO(dsacaps);
+
 static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr,
 						      struct idxd_device *idxd)
 {
@@ -1750,6 +1762,14 @@ static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr,
 		!idxd->hw.gen_cap.evl_support);
 }
 
+static bool idxd_device_attr_dsacaps_invisible(struct attribute *attr,
+					       struct idxd_device *idxd)
+{
+	return attr == &dev_attr_dsacaps.attr &&
+		(idxd->data->type != IDXD_TYPE_DSA ||
+		idxd->hw.version < DEVICE_VERSION_3);
+}
+
 static umode_t idxd_device_attr_visible(struct kobject *kobj,
 					struct attribute *attr, int n)
 {
@@ -1768,6 +1788,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj,
 	if (idxd_device_attr_event_log_size_invisible(attr, idxd))
 		return 0;
 
+	if (idxd_device_attr_dsacaps_invisible(attr, idxd))
+		return 0;
+
 	return attr->mode;
 }
 
@@ -1795,6 +1818,7 @@ static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_cmd_status.attr,
 	&dev_attr_iaa_cap.attr,
 	&dev_attr_event_log_size.attr,
+	&dev_attr_dsacaps.attr,
 	NULL,
 };
 
-- 
2.43.0
Re: [PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs
Posted by Vinod Koul 1 month ago
On 21-08-25, 16:51, Yi Sun wrote:
> Introduce sysfs interfaces for 3 new Data Streaming Accelerator (DSA)
> capability registers (dsacap0-2) to enable userspace awareness of hardware
> features in DSA version 3 and later devices.
> 
> Userspace components (e.g. configure libraries, workload Apps) require this
> information to:
> 1. Select optimal data transfer strategies based on SGL capabilities
> 2. Enable hardware-specific optimizations for floating-point operations
> 3. Configure memory operations with proper numerical handling
> 4. Verify compute operation compatibility before submitting jobs
> 
> The output format is <dsacap2>,<dsacap1>,<dsacap0>, where each DSA
> capability value is a 64-bit hexadecimal number, separated by commas.
> The ordering follows the DSA 3.0 specification layout:
>  Offset:    0x190    0x188    0x180
>  Reg:       dsacap2  dsacap1  dsacap0
> 
> Example:
> cat /sys/bus/dsa/devices/dsa0/dsacaps
>  000000000000f18d,0014000e000007aa,00fa01ff01ff03ff

sysfs are supposed to be single values only, should we rather do per
capability? Also in future if you have more than three...? what happens
then?

-- 
~Vinod
RE: [PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs
Posted by Sun, Yi 1 month ago
Hi Vinod,

The three capability registers are consecutive in BAR0 (0x180, 0x188, 0x190) and represent a single functionality. Exposing them as one sysfs entry makes their relationship clearer and avoids clutter, since there are already many files under dsa<x>.

If more capability registers are added at sequential offsets and for the same function in the future, they can be appended in order(higher offsets placed left-to-right) to maintain consistency.

We considered exposing them as separate files, but agreed that a single file provides better clarity and reduces noise.

Thanks
   --Sun, Yi

-----Original Message-----
From: Vinod Koul <vkoul@kernel.org> 
Sent: Tuesday, September 2, 2025 15:25
To: Sun, Yi <yi.sun@intel.com>
Cc: Gomes, Vinicius <vinicius.gomes@intel.com>; Jiang, Dave <dave.jiang@intel.com>; dmaengine@vger.kernel.org; linux-kernel@vger.kernel.org; Jin, Gordon <gordon.jin@intel.com>; fenghuay@nvidia.com; Lai, Yi1 <yi1.lai@intel.com>; Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Subject: Re: [PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs

On 21-08-25, 16:51, Yi Sun wrote:
> Introduce sysfs interfaces for 3 new Data Streaming Accelerator (DSA)
> capability registers (dsacap0-2) to enable userspace awareness of hardware
> features in DSA version 3 and later devices.
> 
> Userspace components (e.g. configure libraries, workload Apps) require this
> information to:
> 1. Select optimal data transfer strategies based on SGL capabilities
> 2. Enable hardware-specific optimizations for floating-point operations
> 3. Configure memory operations with proper numerical handling
> 4. Verify compute operation compatibility before submitting jobs
> 
> The output format is <dsacap2>,<dsacap1>,<dsacap0>, where each DSA
> capability value is a 64-bit hexadecimal number, separated by commas.
> The ordering follows the DSA 3.0 specification layout:
>  Offset:    0x190    0x188    0x180
>  Reg:       dsacap2  dsacap1  dsacap0
> 
> Example:
> cat /sys/bus/dsa/devices/dsa0/dsacaps
>  000000000000f18d,0014000e000007aa,00fa01ff01ff03ff

sysfs are supposed to be single values only, should we rather do per
capability? Also in future if you have more than three...? what happens
then?

-- 
~Vinod
Re: [PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs
Posted by Sun, Yi 2 weeks, 6 days ago
Hi Vinod,

Gentle ping —— if splitting into three sysfs entries is really required, please let me know so I can adjust in time and avoid missing the v6.17 merge window.

Thanks
    --Sun, Yi

On 02.09.2025 16:34, Sun, Yi wrote:
>Hi Vinod,
>
>The three capability registers are consecutive in BAR0 (0x180, 0x188, 0x190) and represent a single functionality. Exposing them as one sysfs entry makes their relationship clearer and avoids clutter, since there are already many files under dsa<x>.
>
>If more capability registers are added at sequential offsets and for the same function in the future, they can be appended in order(higher offsets placed left-to-right) to maintain consistency.
>
>We considered exposing them as separate files, but agreed that a single file provides better clarity and reduces noise.
>
>Thanks
>   --Sun, Yi
>
>-----Original Message-----
>From: Vinod Koul <vkoul@kernel.org>
>Sent: Tuesday, September 2, 2025 15:25
>To: Sun, Yi <yi.sun@intel.com>
>Cc: Gomes, Vinicius <vinicius.gomes@intel.com>; Jiang, Dave <dave.jiang@intel.com>; dmaengine@vger.kernel.org; linux-kernel@vger.kernel.org; Jin, Gordon <gordon.jin@intel.com>; fenghuay@nvidia.com; Lai, Yi1 <yi1.lai@intel.com>; Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
>Subject: Re: [PATCH v3 1/2] dmaengine: idxd: Expose DSA3.0 capabilities through sysfs
>
>On 21-08-25, 16:51, Yi Sun wrote:
>> Introduce sysfs interfaces for 3 new Data Streaming Accelerator (DSA)
>> capability registers (dsacap0-2) to enable userspace awareness of hardware
>> features in DSA version 3 and later devices.
>>
>> Userspace components (e.g. configure libraries, workload Apps) require this
>> information to:
>> 1. Select optimal data transfer strategies based on SGL capabilities
>> 2. Enable hardware-specific optimizations for floating-point operations
>> 3. Configure memory operations with proper numerical handling
>> 4. Verify compute operation compatibility before submitting jobs
>>
>> The output format is <dsacap2>,<dsacap1>,<dsacap0>, where each DSA
>> capability value is a 64-bit hexadecimal number, separated by commas.
>> The ordering follows the DSA 3.0 specification layout:
>>  Offset:    0x190    0x188    0x180
>>  Reg:       dsacap2  dsacap1  dsacap0
>>
>> Example:
>> cat /sys/bus/dsa/devices/dsa0/dsacaps
>>  000000000000f18d,0014000e000007aa,00fa01ff01ff03ff
>
>sysfs are supposed to be single values only, should we rather do per
>capability? Also in future if you have more than three...? what happens
>then?
>
>-- 
>~Vinod