Set up a triggered buffer for the accel and angl_vel values.
Signed-off-by: Justin Weiss <justin@justinweiss.com>
---
drivers/iio/imu/bmi270/Kconfig | 1 +
drivers/iio/imu/bmi270/bmi270.h | 8 +++++
drivers/iio/imu/bmi270/bmi270_core.c | 47 ++++++++++++++++++++++++++++
3 files changed, 56 insertions(+)
diff --git a/drivers/iio/imu/bmi270/Kconfig b/drivers/iio/imu/bmi270/Kconfig
index 0ffd29794fda..6362acc706da 100644
--- a/drivers/iio/imu/bmi270/Kconfig
+++ b/drivers/iio/imu/bmi270/Kconfig
@@ -6,6 +6,7 @@
config BMI270
tristate
select IIO_BUFFER
+ select IIO_TRIGGERED_BUFFER
config BMI270_I2C
tristate "Bosch BMI270 I2C driver"
diff --git a/drivers/iio/imu/bmi270/bmi270.h b/drivers/iio/imu/bmi270/bmi270.h
index 51e374fd4290..335400c34b0d 100644
--- a/drivers/iio/imu/bmi270/bmi270.h
+++ b/drivers/iio/imu/bmi270/bmi270.h
@@ -11,6 +11,14 @@ struct bmi270_data {
struct device *dev;
struct regmap *regmap;
const struct bmi270_chip_info *chip_info;
+
+ /*
+ * Ensure natural alignment for timestamp if present.
+ * Max length needed: 2 * 3 channels + 4 bytes padding + 8 byte ts.
+ * If fewer channels are enabled, less space may be needed, as
+ * long as the timestamp is still aligned to 8 bytes.
+ */
+ __le16 buf[12] __aligned(8);
};
enum bmi270_device_type {
diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c
index e5ee80c12166..f49db5d1bffd 100644
--- a/drivers/iio/imu/bmi270/bmi270_core.c
+++ b/drivers/iio/imu/bmi270/bmi270_core.c
@@ -7,6 +7,8 @@
#include <linux/regmap.h>
#include <linux/iio/iio.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/trigger_consumer.h>
#include "bmi270.h"
@@ -66,6 +68,7 @@ enum bmi270_scan {
BMI270_SCAN_GYRO_X,
BMI270_SCAN_GYRO_Y,
BMI270_SCAN_GYRO_Z,
+ BMI270_SCAN_TIMESTAMP,
};
const struct bmi270_chip_info bmi270_chip_info[] = {
@@ -82,6 +85,29 @@ const struct bmi270_chip_info bmi270_chip_info[] = {
};
EXPORT_SYMBOL_NS_GPL(bmi270_chip_info, IIO_BMI270);
+static irqreturn_t bmi270_trigger_handler(int irq, void *p)
+{
+ struct iio_poll_func *pf = p;
+ struct iio_dev *indio_dev = pf->indio_dev;
+ struct bmi270_data *bmi270_device = iio_priv(indio_dev);
+ int i, ret, j = 0, base = BMI270_ACCEL_X_REG;
+ __le16 sample;
+
+ for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) {
+ ret = regmap_bulk_read(bmi270_device->regmap,
+ base + i * sizeof(sample),
+ &sample, sizeof(sample));
+ if (ret)
+ goto done;
+ bmi270_device->buf[j++] = sample;
+ }
+
+ iio_push_to_buffers_with_timestamp(indio_dev, bmi270_device->buf, pf->timestamp);
+done:
+ iio_trigger_notify_done(indio_dev->trig);
+ return IRQ_HANDLED;
+}
+
static int bmi270_get_data(struct bmi270_data *bmi270_device,
int chan_type, int axis, int *val)
{
@@ -139,6 +165,13 @@ static const struct iio_info bmi270_info = {
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \
BIT(IIO_CHAN_INFO_FREQUENCY), \
+ .scan_index = BMI270_SCAN_ACCEL_##_axis, \
+ .scan_type = { \
+ .sign = 's', \
+ .realbits = 16, \
+ .storagebits = 16, \
+ .endianness = IIO_LE \
+ }, \
}
#define BMI270_ANG_VEL_CHANNEL(_axis) { \
@@ -148,6 +181,13 @@ static const struct iio_info bmi270_info = {
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \
.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \
BIT(IIO_CHAN_INFO_FREQUENCY), \
+ .scan_index = BMI270_SCAN_GYRO_##_axis, \
+ .scan_type = { \
+ .sign = 's', \
+ .realbits = 16, \
+ .storagebits = 16, \
+ .endianness = IIO_LE \
+ }, \
}
static const struct iio_chan_spec bmi270_channels[] = {
@@ -157,6 +197,7 @@ static const struct iio_chan_spec bmi270_channels[] = {
BMI270_ANG_VEL_CHANNEL(X),
BMI270_ANG_VEL_CHANNEL(Y),
BMI270_ANG_VEL_CHANNEL(Z),
+ IIO_CHAN_SOFT_TIMESTAMP(BMI270_SCAN_TIMESTAMP),
};
static int bmi270_validate_chip_id(struct bmi270_data *bmi270_device)
@@ -317,6 +358,12 @@ int bmi270_core_probe(struct device *dev, struct regmap *regmap,
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->info = &bmi270_info;
+ ret = devm_iio_triggered_buffer_setup(dev, indio_dev,
+ iio_pollfunc_store_time,
+ bmi270_trigger_handler, NULL);
+ if (ret)
+ return ret;
+
return devm_iio_device_register(dev, indio_dev);
}
EXPORT_SYMBOL_NS_GPL(bmi270_core_probe, IIO_BMI270);
--
2.47.0
On Fri, 11 Oct 2024 08:37:48 -0700 Justin Weiss <justin@justinweiss.com> wrote: > Set up a triggered buffer for the accel and angl_vel values. > > Signed-off-by: Justin Weiss <justin@justinweiss.com> Hi Justin A few suggestions inline. Other than the DMA safe buffer thing, looks good but you might want to consider using a single bulk read. My cynical view is that if someone paid for an IMU they probably want all the channels, so optimizing for that case is a good plan. > --- > drivers/iio/imu/bmi270/Kconfig | 1 + > drivers/iio/imu/bmi270/bmi270.h | 8 +++++ > drivers/iio/imu/bmi270/bmi270_core.c | 47 ++++++++++++++++++++++++++++ > 3 files changed, 56 insertions(+) > > diff --git a/drivers/iio/imu/bmi270/Kconfig b/drivers/iio/imu/bmi270/Kconfig > index 0ffd29794fda..6362acc706da 100644 > --- a/drivers/iio/imu/bmi270/Kconfig > +++ b/drivers/iio/imu/bmi270/Kconfig > @@ -6,6 +6,7 @@ > config BMI270 > tristate > select IIO_BUFFER Hmm. The IIO_BUFFER select shouldn't have been here as no obvious use in the driver. Ah well - this patch 'fixes' that :) > + select IIO_TRIGGERED_BUFFER > > config BMI270_I2C > tristate "Bosch BMI270 I2C driver" > diff --git a/drivers/iio/imu/bmi270/bmi270.h b/drivers/iio/imu/bmi270/bmi270.h > index 51e374fd4290..335400c34b0d 100644 > --- a/drivers/iio/imu/bmi270/bmi270.h > +++ b/drivers/iio/imu/bmi270/bmi270.h > @@ -11,6 +11,14 @@ struct bmi270_data { > struct device *dev; > struct regmap *regmap; > const struct bmi270_chip_info *chip_info; > + > + /* > + * Ensure natural alignment for timestamp if present. > + * Max length needed: 2 * 3 channels + 4 bytes padding + 8 byte ts. > + * If fewer channels are enabled, less space may be needed, as > + * long as the timestamp is still aligned to 8 bytes. > + */ > + __le16 buf[12] __aligned(8); > }; > > enum bmi270_device_type { > diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c > index e5ee80c12166..f49db5d1bffd 100644 > --- a/drivers/iio/imu/bmi270/bmi270_core.c > +++ b/drivers/iio/imu/bmi270/bmi270_core.c > @@ -7,6 +7,8 @@ > #include <linux/regmap.h> > > #include <linux/iio/iio.h> > +#include <linux/iio/triggered_buffer.h> > +#include <linux/iio/trigger_consumer.h> > > #include "bmi270.h" > > @@ -66,6 +68,7 @@ enum bmi270_scan { > BMI270_SCAN_GYRO_X, > BMI270_SCAN_GYRO_Y, > BMI270_SCAN_GYRO_Z, > + BMI270_SCAN_TIMESTAMP, > }; > > const struct bmi270_chip_info bmi270_chip_info[] = { > @@ -82,6 +85,29 @@ const struct bmi270_chip_info bmi270_chip_info[] = { > }; > EXPORT_SYMBOL_NS_GPL(bmi270_chip_info, IIO_BMI270); > > +static irqreturn_t bmi270_trigger_handler(int irq, void *p) > +{ > + struct iio_poll_func *pf = p; > + struct iio_dev *indio_dev = pf->indio_dev; > + struct bmi270_data *bmi270_device = iio_priv(indio_dev); > + int i, ret, j = 0, base = BMI270_ACCEL_X_REG; > + __le16 sample; > + > + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { > + ret = regmap_bulk_read(bmi270_device->regmap, > + base + i * sizeof(sample), > + &sample, sizeof(sample)); This is always a fun corner. regmap doesn't guarantee to bounce buffer the data used by the underlying transport. In the case of SPI that means we need a DMA safe buffer for bulk accesses. In practice it may well bounce the data today but there are optmizations that would make it zero copy that might get applied in future. Easiest way to do that is put your sample variable in the iio_priv structure at the end and mark it __aligned(IIO_DMA_MINALIGN) Given you are reading a bunch of contiguous registers here it may well make sense to do a single bulk read directly into buf and then use the available_scan_masks to let the IIO core know it always gets a full set of samples. Then if the user selects a subset the IIO core will reorganize the data that they get presented with. Whether that makes sense from a performance point of view depends on the speed of the spi transfers vs the cost of setting up the individual ones. You could optimize contiguous reads in here, but probably not worth that complexity. > + if (ret) > + goto done; > + bmi270_device->buf[j++] = sample; It's not a huge buffer and you aren't DMAing into it, so maybe just put this on the stack? > + } > + > + iio_push_to_buffers_with_timestamp(indio_dev, bmi270_device->buf, pf->timestamp); > +done: > + iio_trigger_notify_done(indio_dev->trig); > + return IRQ_HANDLED; > +}
Jonathan Cameron <jic23@kernel.org> writes: > On Fri, 11 Oct 2024 08:37:48 -0700 > Justin Weiss <justin@justinweiss.com> wrote: > >> Set up a triggered buffer for the accel and angl_vel values. >> >> Signed-off-by: Justin Weiss <justin@justinweiss.com> > Hi Justin > > A few suggestions inline. Other than the DMA safe buffer thing, looks good > but you might want to consider using a single bulk read. > > My cynical view is that if someone paid for an IMU they probably want all > the channels, so optimizing for that case is a good plan. > >> --- >> drivers/iio/imu/bmi270/Kconfig | 1 + >> drivers/iio/imu/bmi270/bmi270.h | 8 +++++ >> drivers/iio/imu/bmi270/bmi270_core.c | 47 ++++++++++++++++++++++++++++ >> 3 files changed, 56 insertions(+) >> >> diff --git a/drivers/iio/imu/bmi270/Kconfig b/drivers/iio/imu/bmi270/Kconfig >> index 0ffd29794fda..6362acc706da 100644 >> --- a/drivers/iio/imu/bmi270/Kconfig >> +++ b/drivers/iio/imu/bmi270/Kconfig >> @@ -6,6 +6,7 @@ >> config BMI270 >> tristate >> select IIO_BUFFER > > Hmm. The IIO_BUFFER select shouldn't have been here as no obvious use > in the driver. Ah well - this patch 'fixes' that :) > >> + select IIO_TRIGGERED_BUFFER >> >> config BMI270_I2C >> tristate "Bosch BMI270 I2C driver" >> diff --git a/drivers/iio/imu/bmi270/bmi270.h b/drivers/iio/imu/bmi270/bmi270.h >> index 51e374fd4290..335400c34b0d 100644 >> --- a/drivers/iio/imu/bmi270/bmi270.h >> +++ b/drivers/iio/imu/bmi270/bmi270.h >> @@ -11,6 +11,14 @@ struct bmi270_data { >> struct device *dev; >> struct regmap *regmap; >> const struct bmi270_chip_info *chip_info; >> + >> + /* >> + * Ensure natural alignment for timestamp if present. >> + * Max length needed: 2 * 3 channels + 4 bytes padding + 8 byte ts. >> + * If fewer channels are enabled, less space may be needed, as >> + * long as the timestamp is still aligned to 8 bytes. >> + */ >> + __le16 buf[12] __aligned(8); >> }; >> >> enum bmi270_device_type { >> diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c >> index e5ee80c12166..f49db5d1bffd 100644 >> --- a/drivers/iio/imu/bmi270/bmi270_core.c >> +++ b/drivers/iio/imu/bmi270/bmi270_core.c >> @@ -7,6 +7,8 @@ >> #include <linux/regmap.h> >> >> #include <linux/iio/iio.h> >> +#include <linux/iio/triggered_buffer.h> >> +#include <linux/iio/trigger_consumer.h> >> >> #include "bmi270.h" >> >> @@ -66,6 +68,7 @@ enum bmi270_scan { >> BMI270_SCAN_GYRO_X, >> BMI270_SCAN_GYRO_Y, >> BMI270_SCAN_GYRO_Z, >> + BMI270_SCAN_TIMESTAMP, >> }; >> >> const struct bmi270_chip_info bmi270_chip_info[] = { >> @@ -82,6 +85,29 @@ const struct bmi270_chip_info bmi270_chip_info[] = { >> }; >> EXPORT_SYMBOL_NS_GPL(bmi270_chip_info, IIO_BMI270); >> >> +static irqreturn_t bmi270_trigger_handler(int irq, void *p) >> +{ >> + struct iio_poll_func *pf = p; >> + struct iio_dev *indio_dev = pf->indio_dev; >> + struct bmi270_data *bmi270_device = iio_priv(indio_dev); >> + int i, ret, j = 0, base = BMI270_ACCEL_X_REG; >> + __le16 sample; >> + >> + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { >> + ret = regmap_bulk_read(bmi270_device->regmap, >> + base + i * sizeof(sample), >> + &sample, sizeof(sample)); > > This is always a fun corner. > regmap doesn't guarantee to bounce buffer the data used by the underlying > transport. In the case of SPI that means we need a DMA safe buffer for bulk > accesses. In practice it may well bounce the data today but there are optmizations > that would make it zero copy that might get applied in future. > > Easiest way to do that is put your sample variable in the iio_priv structure > at the end and mark it __aligned(IIO_DMA_MINALIGN) > > Given you are reading a bunch of contiguous registers here it may well make > sense to do a single bulk read directly into buf and then use > the available_scan_masks to let the IIO core know it always gets a full set > of samples. Then if the user selects a subset the IIO core will reorganize > the data that they get presented with. That's convenient :-) It should make this much simpler. To clarify, I'll use regmap_bulk_read to read all of the registers at once into a stack-allocated buffer, and then push that buffer. Then I can remove bmi270_device->buf entirely, and avoid the DMA problem that way. Then I'll provide one avail_mask that sets all of the BMI270_SCAN_ACCEL_* and BMI270_SCAN_GYRO_* bits. > Whether that makes sense from a performance point of view depends on > the speed of the spi transfers vs the cost of setting up the individual ones. > > You could optimize contiguous reads in here, but probably not worth that > complexity. > >> + if (ret) >> + goto done; >> + bmi270_device->buf[j++] = sample; > > It's not a huge buffer and you aren't DMAing into it, so maybe just put this > on the stack? > >> + } >> + >> + iio_push_to_buffers_with_timestamp(indio_dev, bmi270_device->buf, pf->timestamp); >> +done: >> + iio_trigger_notify_done(indio_dev->trig); >> + return IRQ_HANDLED; >> +}
On Sat, 12 Oct 2024 19:43:19 -0700 Justin Weiss <justin@justinweiss.com> wrote: > Jonathan Cameron <jic23@kernel.org> writes: > > > On Fri, 11 Oct 2024 08:37:48 -0700 > > Justin Weiss <justin@justinweiss.com> wrote: > > > >> Set up a triggered buffer for the accel and angl_vel values. > >> > >> Signed-off-by: Justin Weiss <justin@justinweiss.com> > > Hi Justin > > > > A few suggestions inline. Other than the DMA safe buffer thing, looks good > > but you might want to consider using a single bulk read. > > > > My cynical view is that if someone paid for an IMU they probably want all > > the channels, so optimizing for that case is a good plan. > > > >> --- > >> drivers/iio/imu/bmi270/Kconfig | 1 + > >> drivers/iio/imu/bmi270/bmi270.h | 8 +++++ > >> drivers/iio/imu/bmi270/bmi270_core.c | 47 ++++++++++++++++++++++++++++ > >> 3 files changed, 56 insertions(+) > >> > >> diff --git a/drivers/iio/imu/bmi270/Kconfig b/drivers/iio/imu/bmi270/Kconfig > >> index 0ffd29794fda..6362acc706da 100644 > >> --- a/drivers/iio/imu/bmi270/Kconfig > >> +++ b/drivers/iio/imu/bmi270/Kconfig > >> @@ -6,6 +6,7 @@ > >> config BMI270 > >> tristate > >> select IIO_BUFFER > > > > Hmm. The IIO_BUFFER select shouldn't have been here as no obvious use > > in the driver. Ah well - this patch 'fixes' that :) > > > >> + select IIO_TRIGGERED_BUFFER > >> > >> config BMI270_I2C > >> tristate "Bosch BMI270 I2C driver" > >> diff --git a/drivers/iio/imu/bmi270/bmi270.h b/drivers/iio/imu/bmi270/bmi270.h > >> index 51e374fd4290..335400c34b0d 100644 > >> --- a/drivers/iio/imu/bmi270/bmi270.h > >> +++ b/drivers/iio/imu/bmi270/bmi270.h > >> @@ -11,6 +11,14 @@ struct bmi270_data { > >> struct device *dev; > >> struct regmap *regmap; > >> const struct bmi270_chip_info *chip_info; > >> + > >> + /* > >> + * Ensure natural alignment for timestamp if present. > >> + * Max length needed: 2 * 3 channels + 4 bytes padding + 8 byte ts. > >> + * If fewer channels are enabled, less space may be needed, as > >> + * long as the timestamp is still aligned to 8 bytes. > >> + */ > >> + __le16 buf[12] __aligned(8); > >> }; > >> > >> enum bmi270_device_type { > >> diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c > >> index e5ee80c12166..f49db5d1bffd 100644 > >> --- a/drivers/iio/imu/bmi270/bmi270_core.c > >> +++ b/drivers/iio/imu/bmi270/bmi270_core.c > >> @@ -7,6 +7,8 @@ > >> #include <linux/regmap.h> > >> > >> #include <linux/iio/iio.h> > >> +#include <linux/iio/triggered_buffer.h> > >> +#include <linux/iio/trigger_consumer.h> > >> > >> #include "bmi270.h" > >> > >> @@ -66,6 +68,7 @@ enum bmi270_scan { > >> BMI270_SCAN_GYRO_X, > >> BMI270_SCAN_GYRO_Y, > >> BMI270_SCAN_GYRO_Z, > >> + BMI270_SCAN_TIMESTAMP, > >> }; > >> > >> const struct bmi270_chip_info bmi270_chip_info[] = { > >> @@ -82,6 +85,29 @@ const struct bmi270_chip_info bmi270_chip_info[] = { > >> }; > >> EXPORT_SYMBOL_NS_GPL(bmi270_chip_info, IIO_BMI270); > >> > >> +static irqreturn_t bmi270_trigger_handler(int irq, void *p) > >> +{ > >> + struct iio_poll_func *pf = p; > >> + struct iio_dev *indio_dev = pf->indio_dev; > >> + struct bmi270_data *bmi270_device = iio_priv(indio_dev); > >> + int i, ret, j = 0, base = BMI270_ACCEL_X_REG; > >> + __le16 sample; > >> + > >> + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { > >> + ret = regmap_bulk_read(bmi270_device->regmap, > >> + base + i * sizeof(sample), > >> + &sample, sizeof(sample)); > > > > This is always a fun corner. > > regmap doesn't guarantee to bounce buffer the data used by the underlying > > transport. In the case of SPI that means we need a DMA safe buffer for bulk > > accesses. In practice it may well bounce the data today but there are optmizations > > that would make it zero copy that might get applied in future. > > > > Easiest way to do that is put your sample variable in the iio_priv structure > > at the end and mark it __aligned(IIO_DMA_MINALIGN) > > > > Given you are reading a bunch of contiguous registers here it may well make > > sense to do a single bulk read directly into buf and then use > > the available_scan_masks to let the IIO core know it always gets a full set > > of samples. Then if the user selects a subset the IIO core will reorganize > > the data that they get presented with. > > That's convenient :-) > > It should make this much simpler. To clarify, I'll use regmap_bulk_read > to read all of the registers at once into a stack-allocated buffer, and > then push that buffer. Then I can remove bmi270_device->buf entirely, > and avoid the DMA problem that way. Given this supports SPI. The target buffer can't be on the stack. You still need the __aligned(IIO_DMA_MINALIGN) element in your iio_priv() structure. > > Then I'll provide one avail_mask that sets all of the > BMI270_SCAN_ACCEL_* and BMI270_SCAN_GYRO_* bits. Otherwise your description is what I'd expect to see. > > > Whether that makes sense from a performance point of view depends on > > the speed of the spi transfers vs the cost of setting up the individual ones. > > > > You could optimize contiguous reads in here, but probably not worth that > > complexity. > > > >> + if (ret) > >> + goto done; > >> + bmi270_device->buf[j++] = sample; > > > > It's not a huge buffer and you aren't DMAing into it, so maybe just put this > > on the stack? This would only be correct for the case where you aren't DMAing directly into it. I guess I confused the message above with this point! Jonathan > > > >> + } > >> + > >> + iio_push_to_buffers_with_timestamp(indio_dev, bmi270_device->buf, pf->timestamp); > >> +done: > >> + iio_trigger_notify_done(indio_dev->trig); > >> + return IRQ_HANDLED; > >> +}
Jonathan Cameron <jic23@kernel.org> writes: > On Sat, 12 Oct 2024 19:43:19 -0700 > Justin Weiss <justin@justinweiss.com> wrote: > >> Jonathan Cameron <jic23@kernel.org> writes: >> >> > On Fri, 11 Oct 2024 08:37:48 -0700 >> > Justin Weiss <justin@justinweiss.com> wrote: >> > >> >> Set up a triggered buffer for the accel and angl_vel values. >> >> >> >> Signed-off-by: Justin Weiss <justin@justinweiss.com> >> > Hi Justin >> > >> > A few suggestions inline. Other than the DMA safe buffer thing, looks good >> > but you might want to consider using a single bulk read. >> > >> > My cynical view is that if someone paid for an IMU they probably want all >> > the channels, so optimizing for that case is a good plan. >> > >> >> ... >> >> >> >> + __le16 sample; >> >> + >> >> + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { >> >> + ret = regmap_bulk_read(bmi270_device->regmap, >> >> + base + i * sizeof(sample), >> >> + &sample, sizeof(sample)); >> > >> > This is always a fun corner. >> > regmap doesn't guarantee to bounce buffer the data used by the underlying >> > transport. In the case of SPI that means we need a DMA safe buffer for bulk >> > accesses. In practice it may well bounce the data today but there are optmizations >> > that would make it zero copy that might get applied in future. >> > >> > Easiest way to do that is put your sample variable in the iio_priv structure >> > at the end and mark it __aligned(IIO_DMA_MINALIGN) >> > >> > Given you are reading a bunch of contiguous registers here it may well make >> > sense to do a single bulk read directly into buf and then use >> > the available_scan_masks to let the IIO core know it always gets a full set >> > of samples. Then if the user selects a subset the IIO core will reorganize >> > the data that they get presented with. >> >> That's convenient :-) >> >> It should make this much simpler. To clarify, I'll use regmap_bulk_read >> to read all of the registers at once into a stack-allocated buffer, and >> then push that buffer. Then I can remove bmi270_device->buf entirely, >> and avoid the DMA problem that way. > > Given this supports SPI. The target buffer can't be on the stack. > You still need the __aligned(IIO_DMA_MINALIGN) element in your iio_priv() > structure. > Got it. I see that the BMI323 driver does the regmap_read into the DMA-aligned buffer, and then copies it to the timestamp-aligned buffer, which it then sends to iio_push_to_buffers_with_timestamp. Is that a good way to handle this? I think the timestamp-aligned buffer could still be stack-allocated in that case. Or maybe a second buffer isn't even necessary, if DMA_MINALIGN is at least the correct alignment for iio_push_to_buffers_with_timestamp and I could pass the DMA-aligned buffer in. Justin >> >> Then I'll provide one avail_mask that sets all of the >> BMI270_SCAN_ACCEL_* and BMI270_SCAN_GYRO_* bits. > Otherwise your description is what I'd expect to see. > >> >> > Whether that makes sense from a performance point of view depends on >> > the speed of the spi transfers vs the cost of setting up the individual ones. >> > >> > You could optimize contiguous reads in here, but probably not worth that >> > complexity. >> > >> >> + if (ret) >> >> + goto done; >> >> + bmi270_device->buf[j++] = sample; >> > >> > It's not a huge buffer and you aren't DMAing into it, so maybe just put this >> > on the stack? > This would only be correct for the case where you aren't DMAing directly into it. > I guess I confused the message above with this point! > > Jonathan > >> > >> >> + } >> >> + >> >> + iio_push_to_buffers_with_timestamp(indio_dev, bmi270_device->buf, pf->timestamp); >> >> +done: >> >> + iio_trigger_notify_done(indio_dev->trig); >> >> + return IRQ_HANDLED; >> >> +}
On Sun, 13 Oct 2024 13:54:36 -0700 Justin Weiss <justin@justinweiss.com> wrote: > Jonathan Cameron <jic23@kernel.org> writes: > > > On Sat, 12 Oct 2024 19:43:19 -0700 > > Justin Weiss <justin@justinweiss.com> wrote: > > > >> Jonathan Cameron <jic23@kernel.org> writes: > >> > >> > On Fri, 11 Oct 2024 08:37:48 -0700 > >> > Justin Weiss <justin@justinweiss.com> wrote: > >> > > >> >> Set up a triggered buffer for the accel and angl_vel values. > >> >> > >> >> Signed-off-by: Justin Weiss <justin@justinweiss.com> > >> > Hi Justin > >> > > >> > A few suggestions inline. Other than the DMA safe buffer thing, looks good > >> > but you might want to consider using a single bulk read. > >> > > >> > My cynical view is that if someone paid for an IMU they probably want all > >> > the channels, so optimizing for that case is a good plan. > >> > > >> >> ... > >> >> > >> >> + __le16 sample; > >> >> + > >> >> + for_each_set_bit(i, indio_dev->active_scan_mask, indio_dev->masklength) { > >> >> + ret = regmap_bulk_read(bmi270_device->regmap, > >> >> + base + i * sizeof(sample), > >> >> + &sample, sizeof(sample)); > >> > > >> > This is always a fun corner. > >> > regmap doesn't guarantee to bounce buffer the data used by the underlying > >> > transport. In the case of SPI that means we need a DMA safe buffer for bulk > >> > accesses. In practice it may well bounce the data today but there are optmizations > >> > that would make it zero copy that might get applied in future. > >> > > >> > Easiest way to do that is put your sample variable in the iio_priv structure > >> > at the end and mark it __aligned(IIO_DMA_MINALIGN) > >> > > >> > Given you are reading a bunch of contiguous registers here it may well make > >> > sense to do a single bulk read directly into buf and then use > >> > the available_scan_masks to let the IIO core know it always gets a full set > >> > of samples. Then if the user selects a subset the IIO core will reorganize > >> > the data that they get presented with. > >> > >> That's convenient :-) > >> > >> It should make this much simpler. To clarify, I'll use regmap_bulk_read > >> to read all of the registers at once into a stack-allocated buffer, and > >> then push that buffer. Then I can remove bmi270_device->buf entirely, > >> and avoid the DMA problem that way. > > > > Given this supports SPI. The target buffer can't be on the stack. > > You still need the __aligned(IIO_DMA_MINALIGN) element in your iio_priv() > > structure. > > > > Got it. I see that the BMI323 driver does the regmap_read into the > DMA-aligned buffer, and then copies it to the timestamp-aligned buffer, > which it then sends to iio_push_to_buffers_with_timestamp. Is that a > good way to handle this? Yes. We don't have a zero copy path so that's the best we can do. > > I think the timestamp-aligned buffer could still be stack-allocated in > that case. No it can't. __aligned doesn't do anything useful for us on the stack as we don't control what comes after it. You could in theory force alignment and pad - as long as compilers now respect __aligned for this case (they didn't use to!) It's potentially a few hundred bytes on the stack, so better on the heap. > Or maybe a second buffer isn't even necessary, if > DMA_MINALIGN is at least the correct alignment for > iio_push_to_buffers_with_timestamp and I could pass the DMA-aligned > buffer in. Common trick is to just DMA into almost what you already have in bmi270_data. But now the timestamp is in a fixed place you can use a structure to handle the alignment for you. /* + * Where IIO_DMA_MINALIGN is larger than 8 bytes, align to that + * to ensure a DMA safe buffer. + */ + //__le16 buf[12] __aligned(IIO_DMA_MINALIGN); struct { __le16 channels[6]; aligned_s64 timestamp; //type is only in iio tree currently. } data __aligned(IIO_DMA_MINALIGN); The aligned_s64 is needed for x86_32 where s64 is only aligned to 4 bytes whereas IIO assume natural alignment of everything (so 8 bytes). }; Then pass that to the push_to_buffers call just as you currently do. Jonathan
© 2016 - 2024 Red Hat, Inc.