For any remote call to DSP, after sending an invocation message,
fastRPC driver waits for glink response and during this time the
CPU can go into low power modes. This adds latency to overall fastrpc
call as CPU wakeup and scheduling latencies are included. Add polling
mode support with which fastRPC driver will poll continuously on a
memory after sending a message to remote subsystem which will eliminate
CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll
mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl
request with FASTRPC_POLL_MODE request id.
Signed-off-by: Ekansh Gupta <ekansh.gupta@oss.qualcomm.com>
---
drivers/misc/fastrpc.c | 142 ++++++++++++++++++++++++++++++++++--
include/uapi/misc/fastrpc.h | 10 +++
2 files changed, 145 insertions(+), 7 deletions(-)
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index e935ae3776b4..c1e67dbacf2c 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -23,6 +23,8 @@
#include <uapi/misc/fastrpc.h>
#include <linux/of_reserved_mem.h>
#include <linux/bits.h>
+#include <linux/compiler.h>
+#include <linux/iopoll.h>
#define ADSP_DOMAIN_ID (0)
#define MDSP_DOMAIN_ID (1)
@@ -37,6 +39,7 @@
#define FASTRPC_CTX_MAX (256)
#define FASTRPC_INIT_HANDLE 1
#define FASTRPC_DSP_UTILITIES_HANDLE 2
+#define FASTRPC_MAX_STATIC_HANDLE (20)
#define FASTRPC_CTXID_MASK GENMASK(15, 8)
#define INIT_FILELEN_MAX (2 * 1024 * 1024)
#define INIT_FILE_NAMELEN_MAX (128)
@@ -105,6 +108,12 @@
#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
+/* Poll response number from remote processor for call completion */
+#define FASTRPC_POLL_RESPONSE (0xdecaf)
+
+/* Polling mode timeout limit */
+#define FASTRPC_POLL_MAX_TIMEOUT_US (10000)
+
struct fastrpc_phy_page {
dma_addr_t addr; /* dma address */
u64 size; /* size of contiguous region */
@@ -235,8 +244,14 @@ struct fastrpc_invoke_ctx {
u32 sc;
u64 *fdlist;
u32 *crc;
+ /* Poll memory that DSP updates */
+ u32 *poll;
u64 ctxid;
u64 msg_sz;
+ /* work done status flag */
+ bool is_work_done;
+ /* process updates poll memory instead of glink response */
+ bool is_polled;
struct kref refcount;
struct list_head node; /* list of ctxs */
struct completion work;
@@ -307,6 +322,8 @@ struct fastrpc_user {
int client_id;
int pd;
bool is_secure_dev;
+ /* Flags poll mode state */
+ bool poll_mode;
/* Lock for lists */
spinlock_t lock;
/* lock for allocations */
@@ -924,7 +941,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
sizeof(struct fastrpc_invoke_buf) +
sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
sizeof(u64) * FASTRPC_MAX_FDLIST +
- sizeof(u32) * FASTRPC_MAX_CRCLIST;
+ sizeof(u32) * FASTRPC_MAX_CRCLIST +
+ sizeof(u32);
return size;
}
@@ -1020,6 +1038,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
pages = fastrpc_phy_page_start(list, ctx->nscalars);
ctx->fdlist = (u64 *)(pages + ctx->nscalars);
+ ctx->poll = (u32 *)((uintptr_t)ctx->fdlist + sizeof(u64) * FASTRPC_MAX_FDLIST +
+ sizeof(u32) * FASTRPC_MAX_CRCLIST);
+
args = (uintptr_t)ctx->buf->virt + metalen;
rlen = pkt_size - metalen;
ctx->rpra = rpra;
@@ -1188,6 +1209,75 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
}
+static inline u32 fastrpc_poll_op(void *p)
+{
+ struct fastrpc_invoke_ctx *ctx = p;
+
+ dma_rmb();
+ return READ_ONCE(*ctx->poll);
+}
+
+static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx)
+{
+ u32 val;
+ int ret;
+
+ /*
+ * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll
+ * or until another path marks the work done.
+ */
+ ret = read_poll_timeout_atomic(fastrpc_poll_op, val,
+ (val == FASTRPC_POLL_RESPONSE) ||
+ ctx->is_work_done, 1,
+ FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx);
+
+ if (!ret && val == FASTRPC_POLL_RESPONSE) {
+ ctx->is_work_done = true;
+ ctx->retval = 0;
+ }
+
+ if (ret == -ETIMEDOUT)
+ ret = -EIO;
+
+ return ret;
+}
+
+static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
+ u32 kernel)
+{
+ int err = 0;
+
+ if (kernel) {
+ if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
+ err = -ETIMEDOUT;
+ } else {
+ err = wait_for_completion_interruptible(&ctx->work);
+ }
+
+ return err;
+}
+
+static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
+ u32 kernel)
+{
+ int err;
+
+ do {
+ if (ctx->is_polled) {
+ err = poll_for_remote_response(ctx);
+ /* If polling timed out, move to normal response mode */
+ if (err)
+ ctx->is_polled = false;
+ } else {
+ err = fastrpc_wait_for_response(ctx, kernel);
+ if (err)
+ return err;
+ }
+ } while (!ctx->is_work_done);
+
+ return err;
+}
+
static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
u32 handle, u32 sc,
struct fastrpc_invoke_args *args)
@@ -1223,16 +1313,26 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
if (err)
goto bail;
- if (kernel) {
- if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
- err = -ETIMEDOUT;
- } else {
- err = wait_for_completion_interruptible(&ctx->work);
- }
+ /*
+ * Set message context as polled if the call is for a user PD
+ * dynamic module and user has enabled poll mode.
+ */
+ if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD &&
+ fl->poll_mode)
+ ctx->is_polled = true;
+
+ err = fastrpc_wait_for_completion(ctx, kernel);
if (err)
goto bail;
+ if (!ctx->is_work_done) {
+ err = -ETIMEDOUT;
+ dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n",
+ handle, sc);
+ goto bail;
+ }
+
/* make sure that all memory writes by DSP are seen by CPU */
dma_rmb();
/* populate all the output buffers with results */
@@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
return 0;
}
+static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
+{
+ struct fastrpc_ioctl_set_option opt = {0};
+ int i;
+
+ if (copy_from_user(&opt, argp, sizeof(opt)))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
+ if (opt.reserved[i] != 0)
+ return -EINVAL;
+ }
+
+ if (opt.req != FASTRPC_POLL_MODE)
+ return -EINVAL;
+
+ if (opt.value)
+ fl->poll_mode = true;
+ else
+ fl->poll_mode = false;
+
+ return 0;
+}
+
static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
{
struct fastrpc_ioctl_capability cap = {0};
@@ -2167,6 +2291,9 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
case FASTRPC_IOCTL_MEM_UNMAP:
err = fastrpc_req_mem_unmap(fl, argp);
break;
+ case FASTRPC_IOCTL_SET_OPTION:
+ err = fastrpc_set_option(fl, argp);
+ break;
case FASTRPC_IOCTL_GET_DSP_INFO:
err = fastrpc_get_dsp_info(fl, argp);
break;
@@ -2518,6 +2645,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
}
ctx->retval = rsp->retval;
+ ctx->is_work_done = true;
complete(&ctx->work);
/*
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index c6e2925f47e6..c37e24a764ae 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -16,6 +16,7 @@
#define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
#define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map)
#define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap)
+#define FASTRPC_IOCTL_SET_OPTION _IOWR('R', 12, struct fastrpc_ioctl_set_option)
#define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability)
/**
@@ -67,6 +68,9 @@ enum fastrpc_proc_attr {
/* Fastrpc attribute for memory protection of buffers */
#define FASTRPC_ATTR_SECUREMAP (1)
+/* Set option request ID to enable poll mode */
+#define FASTRPC_POLL_MODE (1)
+
struct fastrpc_invoke_args {
__u64 ptr;
__u64 length;
@@ -133,6 +137,12 @@ struct fastrpc_mem_unmap {
__s32 reserved[5];
};
+struct fastrpc_ioctl_set_option {
+ __u32 req; /* request id */
+ __u32 value; /* value */
+ __s32 reserved[6];
+};
+
struct fastrpc_ioctl_capability {
__u32 unused; /* deprecated, ignored by the kernel */
__u32 attribute_id;
--
2.34.1
On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
> For any remote call to DSP, after sending an invocation message,
> fastRPC driver waits for glink response and during this time the
> CPU can go into low power modes. This adds latency to overall fastrpc
> call as CPU wakeup and scheduling latencies are included. Add polling
> mode support with which fastRPC driver will poll continuously on a
> memory after sending a message to remote subsystem which will eliminate
> CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll
> mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl
> request with FASTRPC_POLL_MODE request id.
>
> Signed-off-by: Ekansh Gupta <ekansh.gupta@oss.qualcomm.com>
> ---
> drivers/misc/fastrpc.c | 142 ++++++++++++++++++++++++++++++++++--
> include/uapi/misc/fastrpc.h | 10 +++
> 2 files changed, 145 insertions(+), 7 deletions(-)
>
> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
> return 0;
> }
>
> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
> +{
> + struct fastrpc_ioctl_set_option opt = {0};
> + int i;
> +
> + if (copy_from_user(&opt, argp, sizeof(opt)))
> + return -EFAULT;
> +
> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
> + if (opt.reserved[i] != 0)
> + return -EINVAL;
> + }
> +
> + if (opt.req != FASTRPC_POLL_MODE)
> + return -EINVAL;
> +
> + if (opt.value)
> + fl->poll_mode = true;
> + else
> + fl->poll_mode = false;
I think I've raised this question beforehand. This implementation will
return success to the userspace even on the platforms where polling is
not supported. This is not correct.
> +
> + return 0;
> +}
> +
> static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
> {
> struct fastrpc_ioctl_capability cap = {0};
--
With best wishes
Dmitry
On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
> For any remote call to DSP, after sending an invocation message,
> fastRPC driver waits for glink response and during this time the
> CPU can go into low power modes. This adds latency to overall fastrpc
> call as CPU wakeup and scheduling latencies are included. Add polling
> mode support with which fastRPC driver will poll continuously on a
> memory after sending a message to remote subsystem which will eliminate
> CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll
> mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl
> request with FASTRPC_POLL_MODE request id.
>
> Signed-off-by: Ekansh Gupta <ekansh.gupta@oss.qualcomm.com>
> ---
> drivers/misc/fastrpc.c | 142 ++++++++++++++++++++++++++++++++++--
> include/uapi/misc/fastrpc.h | 10 +++
> 2 files changed, 145 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
> index e935ae3776b4..c1e67dbacf2c 100644
> --- a/drivers/misc/fastrpc.c
> +++ b/drivers/misc/fastrpc.c
> @@ -23,6 +23,8 @@
> #include <uapi/misc/fastrpc.h>
> #include <linux/of_reserved_mem.h>
> #include <linux/bits.h>
> +#include <linux/compiler.h>
> +#include <linux/iopoll.h>
>
> #define ADSP_DOMAIN_ID (0)
> #define MDSP_DOMAIN_ID (1)
> @@ -37,6 +39,7 @@
> #define FASTRPC_CTX_MAX (256)
> #define FASTRPC_INIT_HANDLE 1
> #define FASTRPC_DSP_UTILITIES_HANDLE 2
> +#define FASTRPC_MAX_STATIC_HANDLE (20)
> #define FASTRPC_CTXID_MASK GENMASK(15, 8)
> #define INIT_FILELEN_MAX (2 * 1024 * 1024)
> #define INIT_FILE_NAMELEN_MAX (128)
> @@ -105,6 +108,12 @@
>
> #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>
> +/* Poll response number from remote processor for call completion */
> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
> +
> +/* Polling mode timeout limit */
> +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000)
> +
> struct fastrpc_phy_page {
> dma_addr_t addr; /* dma address */
> u64 size; /* size of contiguous region */
> @@ -235,8 +244,14 @@ struct fastrpc_invoke_ctx {
> u32 sc;
> u64 *fdlist;
> u32 *crc;
> + /* Poll memory that DSP updates */
> + u32 *poll;
> u64 ctxid;
> u64 msg_sz;
> + /* work done status flag */
> + bool is_work_done;
> + /* process updates poll memory instead of glink response */
> + bool is_polled;
> struct kref refcount;
> struct list_head node; /* list of ctxs */
> struct completion work;
> @@ -307,6 +322,8 @@ struct fastrpc_user {
> int client_id;
> int pd;
> bool is_secure_dev;
> + /* Flags poll mode state */
> + bool poll_mode;
> /* Lock for lists */
> spinlock_t lock;
> /* lock for allocations */
> @@ -924,7 +941,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
> sizeof(struct fastrpc_invoke_buf) +
> sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
> sizeof(u64) * FASTRPC_MAX_FDLIST +
> - sizeof(u32) * FASTRPC_MAX_CRCLIST;
> + sizeof(u32) * FASTRPC_MAX_CRCLIST +
> + sizeof(u32);
>
> return size;
> }
> @@ -1020,6 +1038,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
> list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
> pages = fastrpc_phy_page_start(list, ctx->nscalars);
> ctx->fdlist = (u64 *)(pages + ctx->nscalars);
> + ctx->poll = (u32 *)((uintptr_t)ctx->fdlist + sizeof(u64) * FASTRPC_MAX_FDLIST +
> + sizeof(u32) * FASTRPC_MAX_CRCLIST);
> +
> args = (uintptr_t)ctx->buf->virt + metalen;
> rlen = pkt_size - metalen;
> ctx->rpra = rpra;
> @@ -1188,6 +1209,75 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>
> }
>
> +static inline u32 fastrpc_poll_op(void *p)
> +{
> + struct fastrpc_invoke_ctx *ctx = p;
> +
> + dma_rmb();
> + return READ_ONCE(*ctx->poll);
> +}
> +
> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx)
> +{
> + u32 val;
> + int ret;
> +
> + /*
> + * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll
> + * or until another path marks the work done.
> + */
> + ret = read_poll_timeout_atomic(fastrpc_poll_op, val,
> + (val == FASTRPC_POLL_RESPONSE) ||
> + ctx->is_work_done, 1,
Weird line wrap of the conditional, please put the val == and the
ctx->is_work_done on the same line - it's just 90 characters.
> + FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx);
> +
> + if (!ret && val == FASTRPC_POLL_RESPONSE) {
> + ctx->is_work_done = true;
> + ctx->retval = 0;
> + }
> +
> + if (ret == -ETIMEDOUT)
> + ret = -EIO;
> +
> + return ret;
> +}
> +
> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
> + u32 kernel)
> +{
> + int err = 0;
> +
> + if (kernel) {
> + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> + err = -ETIMEDOUT;
> + } else {
> + err = wait_for_completion_interruptible(&ctx->work);
> + }
> +
> + return err;
> +}
> +
> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
> + u32 kernel)
> +{
> + int err;
> +
> + do {
> + if (ctx->is_polled) {
> + err = poll_for_remote_response(ctx);
> + /* If polling timed out, move to normal response mode */
I had already written to question the lack of fallback to non-polling
mode and how this would prohibit me from mixing expected long and short
calls...
Would certainly be nice to clarify this behavior in the commit
message...
> + if (err)
> + ctx->is_polled = false;
> + } else {
> + err = fastrpc_wait_for_response(ctx, kernel);
> + if (err)
> + return err;
> + }
> + } while (!ctx->is_work_done);
> +
> + return err;
Isn't 0 the only value of err you can get here with?
> +}
> +
> static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> u32 handle, u32 sc,
> struct fastrpc_invoke_args *args)
> @@ -1223,16 +1313,26 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
> if (err)
> goto bail;
>
> - if (kernel) {
> - if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
> - err = -ETIMEDOUT;
> - } else {
> - err = wait_for_completion_interruptible(&ctx->work);
> - }
> + /*
> + * Set message context as polled if the call is for a user PD
> + * dynamic module and user has enabled poll mode.
> + */
> + if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD &&
> + fl->poll_mode)
The line is 85 characters if you don't break it. You're allowed to use
up to 100 characters if it makes the code easier to read - and it does.
> + ctx->is_polled = true;
> +
> + err = fastrpc_wait_for_completion(ctx, kernel);
>
Ugly blank line between the assignment and error check...
> if (err)
> goto bail;
>
> + if (!ctx->is_work_done) {
"err" is the return value of the wait, and this checks the outcome of
the wait... Returning "success" and pass "failure" through a sideband
channel is confusing.
That said, as far as I can see, there are three ways
fastrpc_wait_for_completion() can exit:
1) err = 0 && ctx->is_work_done = true after polling
2) err = 0 && ctx->is_work_done = true after wait
3) err != 0 && ctx->is_work_done is undefined after wait
For #1 and #2 we won't hit either if statement here.
For #3 we already hit above condition and went to bail.
So do we ever enter here?
> + err = -ETIMEDOUT;
> + dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n",
> + handle, sc);
jfyi, you can use %#x to format 0x%x
> + goto bail;
> + }
> +
> /* make sure that all memory writes by DSP are seen by CPU */
> dma_rmb();
> /* populate all the output buffers with results */
> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
> return 0;
> }
>
> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
> +{
> + struct fastrpc_ioctl_set_option opt = {0};
> + int i;
> +
> + if (copy_from_user(&opt, argp, sizeof(opt)))
> + return -EFAULT;
> +
> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
> + if (opt.reserved[i] != 0)
> + return -EINVAL;
> + }
> +
> + if (opt.req != FASTRPC_POLL_MODE)
> + return -EINVAL;
> +
> + if (opt.value)
Would it make sense to allow the caller to affect the poll timeout using
the other 31 bits of this value?
Regards,
Bjorn
> + fl->poll_mode = true;
> + else
> + fl->poll_mode = false;
> +
> + return 0;
> +}
> +
> static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
> {
> struct fastrpc_ioctl_capability cap = {0};
> @@ -2167,6 +2291,9 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
> case FASTRPC_IOCTL_MEM_UNMAP:
> err = fastrpc_req_mem_unmap(fl, argp);
> break;
> + case FASTRPC_IOCTL_SET_OPTION:
> + err = fastrpc_set_option(fl, argp);
> + break;
> case FASTRPC_IOCTL_GET_DSP_INFO:
> err = fastrpc_get_dsp_info(fl, argp);
> break;
> @@ -2518,6 +2645,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
> }
>
> ctx->retval = rsp->retval;
> + ctx->is_work_done = true;
> complete(&ctx->work);
>
> /*
> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
> index c6e2925f47e6..c37e24a764ae 100644
> --- a/include/uapi/misc/fastrpc.h
> +++ b/include/uapi/misc/fastrpc.h
> @@ -16,6 +16,7 @@
> #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
> #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map)
> #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap)
> +#define FASTRPC_IOCTL_SET_OPTION _IOWR('R', 12, struct fastrpc_ioctl_set_option)
> #define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability)
>
> /**
> @@ -67,6 +68,9 @@ enum fastrpc_proc_attr {
> /* Fastrpc attribute for memory protection of buffers */
> #define FASTRPC_ATTR_SECUREMAP (1)
>
> +/* Set option request ID to enable poll mode */
> +#define FASTRPC_POLL_MODE (1)
> +
> struct fastrpc_invoke_args {
> __u64 ptr;
> __u64 length;
> @@ -133,6 +137,12 @@ struct fastrpc_mem_unmap {
> __s32 reserved[5];
> };
>
> +struct fastrpc_ioctl_set_option {
> + __u32 req; /* request id */
> + __u32 value; /* value */
> + __s32 reserved[6];
> +};
> +
> struct fastrpc_ioctl_capability {
> __u32 unused; /* deprecated, ignored by the kernel */
> __u32 attribute_id;
> --
> 2.34.1
>
>
On 2/16/2026 8:51 AM, Bjorn Andersson wrote:
> On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
>> For any remote call to DSP, after sending an invocation message,
>> fastRPC driver waits for glink response and during this time the
>> CPU can go into low power modes. This adds latency to overall fastrpc
>> call as CPU wakeup and scheduling latencies are included. Add polling
>> mode support with which fastRPC driver will poll continuously on a
>> memory after sending a message to remote subsystem which will eliminate
>> CPU wakeup and scheduling latencies and reduce fastRPC overhead. Poll
>> mode can be enabled by user by using FASTRPC_IOCTL_SET_OPTION ioctl
>> request with FASTRPC_POLL_MODE request id.
>>
>> Signed-off-by: Ekansh Gupta <ekansh.gupta@oss.qualcomm.com>
>> ---
>> drivers/misc/fastrpc.c | 142 ++++++++++++++++++++++++++++++++++--
>> include/uapi/misc/fastrpc.h | 10 +++
>> 2 files changed, 145 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
>> index e935ae3776b4..c1e67dbacf2c 100644
>> --- a/drivers/misc/fastrpc.c
>> +++ b/drivers/misc/fastrpc.c
>> @@ -23,6 +23,8 @@
>> #include <uapi/misc/fastrpc.h>
>> #include <linux/of_reserved_mem.h>
>> #include <linux/bits.h>
>> +#include <linux/compiler.h>
>> +#include <linux/iopoll.h>
>>
>> #define ADSP_DOMAIN_ID (0)
>> #define MDSP_DOMAIN_ID (1)
>> @@ -37,6 +39,7 @@
>> #define FASTRPC_CTX_MAX (256)
>> #define FASTRPC_INIT_HANDLE 1
>> #define FASTRPC_DSP_UTILITIES_HANDLE 2
>> +#define FASTRPC_MAX_STATIC_HANDLE (20)
>> #define FASTRPC_CTXID_MASK GENMASK(15, 8)
>> #define INIT_FILELEN_MAX (2 * 1024 * 1024)
>> #define INIT_FILE_NAMELEN_MAX (128)
>> @@ -105,6 +108,12 @@
>>
>> #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
>>
>> +/* Poll response number from remote processor for call completion */
>> +#define FASTRPC_POLL_RESPONSE (0xdecaf)
>> +
>> +/* Polling mode timeout limit */
>> +#define FASTRPC_POLL_MAX_TIMEOUT_US (10000)
>> +
>> struct fastrpc_phy_page {
>> dma_addr_t addr; /* dma address */
>> u64 size; /* size of contiguous region */
>> @@ -235,8 +244,14 @@ struct fastrpc_invoke_ctx {
>> u32 sc;
>> u64 *fdlist;
>> u32 *crc;
>> + /* Poll memory that DSP updates */
>> + u32 *poll;
>> u64 ctxid;
>> u64 msg_sz;
>> + /* work done status flag */
>> + bool is_work_done;
>> + /* process updates poll memory instead of glink response */
>> + bool is_polled;
>> struct kref refcount;
>> struct list_head node; /* list of ctxs */
>> struct completion work;
>> @@ -307,6 +322,8 @@ struct fastrpc_user {
>> int client_id;
>> int pd;
>> bool is_secure_dev;
>> + /* Flags poll mode state */
>> + bool poll_mode;
>> /* Lock for lists */
>> spinlock_t lock;
>> /* lock for allocations */
>> @@ -924,7 +941,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
>> sizeof(struct fastrpc_invoke_buf) +
>> sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
>> sizeof(u64) * FASTRPC_MAX_FDLIST +
>> - sizeof(u32) * FASTRPC_MAX_CRCLIST;
>> + sizeof(u32) * FASTRPC_MAX_CRCLIST +
>> + sizeof(u32);
>>
>> return size;
>> }
>> @@ -1020,6 +1038,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
>> list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
>> pages = fastrpc_phy_page_start(list, ctx->nscalars);
>> ctx->fdlist = (u64 *)(pages + ctx->nscalars);
>> + ctx->poll = (u32 *)((uintptr_t)ctx->fdlist + sizeof(u64) * FASTRPC_MAX_FDLIST +
>> + sizeof(u32) * FASTRPC_MAX_CRCLIST);
>> +
>> args = (uintptr_t)ctx->buf->virt + metalen;
>> rlen = pkt_size - metalen;
>> ctx->rpra = rpra;
>> @@ -1188,6 +1209,75 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
>>
>> }
>>
>> +static inline u32 fastrpc_poll_op(void *p)
>> +{
>> + struct fastrpc_invoke_ctx *ctx = p;
>> +
>> + dma_rmb();
>> + return READ_ONCE(*ctx->poll);
>> +}
>> +
>> +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx)
>> +{
>> + u32 val;
>> + int ret;
>> +
>> + /*
>> + * Poll until DSP writes FASTRPC_POLL_RESPONSE into *ctx->poll
>> + * or until another path marks the work done.
>> + */
>> + ret = read_poll_timeout_atomic(fastrpc_poll_op, val,
>> + (val == FASTRPC_POLL_RESPONSE) ||
>> + ctx->is_work_done, 1,
> Weird line wrap of the conditional, please put the val == and the
> ctx->is_work_done on the same line - it's just 90 characters.
Ack.
>
>> + FASTRPC_POLL_MAX_TIMEOUT_US, false, ctx);
>> +
>> + if (!ret && val == FASTRPC_POLL_RESPONSE) {
>> + ctx->is_work_done = true;
>> + ctx->retval = 0;
>> + }
>> +
>> + if (ret == -ETIMEDOUT)
>> + ret = -EIO;
>> +
>> + return ret;
>> +}
>> +
>> +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
>> + u32 kernel)
>> +{
>> + int err = 0;
>> +
>> + if (kernel) {
>> + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> + err = -ETIMEDOUT;
>> + } else {
>> + err = wait_for_completion_interruptible(&ctx->work);
>> + }
>> +
>> + return err;
>> +}
>> +
>> +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
>> + u32 kernel)
>> +{
>> + int err;
>> +
>> + do {
>> + if (ctx->is_polled) {
>> + err = poll_for_remote_response(ctx);
>> + /* If polling timed out, move to normal response mode */
> I had already written to question the lack of fallback to non-polling
> mode and how this would prohibit me from mixing expected long and short
> calls...
>
> Would certainly be nice to clarify this behavior in the commit
> message...
I'll add more details for this.
>
>> + if (err)
>> + ctx->is_polled = false;
>> + } else {
>> + err = fastrpc_wait_for_response(ctx, kernel);
>> + if (err)
>> + return err;
>> + }
>> + } while (!ctx->is_work_done);
>> +
>> + return err;
> Isn't 0 the only value of err you can get here with?
yes, it's always going to be return 0; I'll update this.
>
>> +}
>> +
>> static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
>> u32 handle, u32 sc,
>> struct fastrpc_invoke_args *args)
>> @@ -1223,16 +1313,26 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel,
>> if (err)
>> goto bail;
>>
>> - if (kernel) {
>> - if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
>> - err = -ETIMEDOUT;
>> - } else {
>> - err = wait_for_completion_interruptible(&ctx->work);
>> - }
>> + /*
>> + * Set message context as polled if the call is for a user PD
>> + * dynamic module and user has enabled poll mode.
>> + */
>> + if (handle > FASTRPC_MAX_STATIC_HANDLE && fl->pd == USER_PD &&
>> + fl->poll_mode)
> The line is 85 characters if you don't break it. You're allowed to use
> up to 100 characters if it makes the code easier to read - and it does.
Ack.
>
>> + ctx->is_polled = true;
>> +
>> + err = fastrpc_wait_for_completion(ctx, kernel);
>>
> Ugly blank line between the assignment and error check...
I'll remove this.
>
>> if (err)
>> goto bail;
>>
>> + if (!ctx->is_work_done) {
> "err" is the return value of the wait, and this checks the outcome of
> the wait... Returning "success" and pass "failure" through a sideband
> channel is confusing.
>
> That said, as far as I can see, there are three ways
> fastrpc_wait_for_completion() can exit:
>
> 1) err = 0 && ctx->is_work_done = true after polling
> 2) err = 0 && ctx->is_work_done = true after wait
> 3) err != 0 && ctx->is_work_done is undefined after wait
>
> For #1 and #2 we won't hit either if statement here.
> For #3 we already hit above condition and went to bail.
>
> So do we ever enter here?
You're right, the check is not getting encountered due to the following reasons:
1) Poll success -> err = 0, is_work_done =true.
2) Wait success -> err = 0, is_work_done =true.
3) Poll failed -> fallback to wait.
4) Wait failed -> err check before this if condition.
I'll remove this check.
>
>> + err = -ETIMEDOUT;
>> + dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n",
>> + handle, sc);
> jfyi, you can use %#x to format 0x%x
Ack.
>
>> + goto bail;
>> + }
>> +
>> /* make sure that all memory writes by DSP are seen by CPU */
>> dma_rmb();
>> /* populate all the output buffers with results */
>> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
>> return 0;
>> }
>>
>> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
>> +{
>> + struct fastrpc_ioctl_set_option opt = {0};
>> + int i;
>> +
>> + if (copy_from_user(&opt, argp, sizeof(opt)))
>> + return -EFAULT;
>> +
>> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
>> + if (opt.reserved[i] != 0)
>> + return -EINVAL;
>> + }
>> +
>> + if (opt.req != FASTRPC_POLL_MODE)
>> + return -EINVAL;
>> +
>> + if (opt.value)
> Would it make sense to allow the caller to affect the poll timeout using
> the other 31 bits of this value?
I was planning to bring that control[1], but it's might be difficult for the caller
[1] https://lore.kernel.org/all/20250127044239.578540-5-quic_ekangupt@quicinc.com/
//Ekansh
>
> Regards,
> Bjorn
>
>> + fl->poll_mode = true;
>> + else
>> + fl->poll_mode = false;
>> +
>> + return 0;
>> +}
>> +
>> static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
>> {
>> struct fastrpc_ioctl_capability cap = {0};
>> @@ -2167,6 +2291,9 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
>> case FASTRPC_IOCTL_MEM_UNMAP:
>> err = fastrpc_req_mem_unmap(fl, argp);
>> break;
>> + case FASTRPC_IOCTL_SET_OPTION:
>> + err = fastrpc_set_option(fl, argp);
>> + break;
>> case FASTRPC_IOCTL_GET_DSP_INFO:
>> err = fastrpc_get_dsp_info(fl, argp);
>> break;
>> @@ -2518,6 +2645,7 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
>> }
>>
>> ctx->retval = rsp->retval;
>> + ctx->is_work_done = true;
>> complete(&ctx->work);
>>
>> /*
>> diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
>> index c6e2925f47e6..c37e24a764ae 100644
>> --- a/include/uapi/misc/fastrpc.h
>> +++ b/include/uapi/misc/fastrpc.h
>> @@ -16,6 +16,7 @@
>> #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
>> #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map)
>> #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap)
>> +#define FASTRPC_IOCTL_SET_OPTION _IOWR('R', 12, struct fastrpc_ioctl_set_option)
>> #define FASTRPC_IOCTL_GET_DSP_INFO _IOWR('R', 13, struct fastrpc_ioctl_capability)
>>
>> /**
>> @@ -67,6 +68,9 @@ enum fastrpc_proc_attr {
>> /* Fastrpc attribute for memory protection of buffers */
>> #define FASTRPC_ATTR_SECUREMAP (1)
>>
>> +/* Set option request ID to enable poll mode */
>> +#define FASTRPC_POLL_MODE (1)
>> +
>> struct fastrpc_invoke_args {
>> __u64 ptr;
>> __u64 length;
>> @@ -133,6 +137,12 @@ struct fastrpc_mem_unmap {
>> __s32 reserved[5];
>> };
>>
>> +struct fastrpc_ioctl_set_option {
>> + __u32 req; /* request id */
>> + __u32 value; /* value */
>> + __s32 reserved[6];
>> +};
>> +
>> struct fastrpc_ioctl_capability {
>> __u32 unused; /* deprecated, ignored by the kernel */
>> __u32 attribute_id;
>> --
>> 2.34.1
>>
>>
On Mon, Feb 16, 2026 at 02:36:40PM +0530, Ekansh Gupta wrote:
> On 2/16/2026 8:51 AM, Bjorn Andersson wrote:
> > On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
> >> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
> >> return 0;
> >> }
> >>
> >> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
> >> +{
> >> + struct fastrpc_ioctl_set_option opt = {0};
> >> + int i;
> >> +
> >> + if (copy_from_user(&opt, argp, sizeof(opt)))
> >> + return -EFAULT;
> >> +
> >> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
> >> + if (opt.reserved[i] != 0)
> >> + return -EINVAL;
> >> + }
> >> +
> >> + if (opt.req != FASTRPC_POLL_MODE)
> >> + return -EINVAL;
> >> +
> >> + if (opt.value)
> > Would it make sense to allow the caller to affect the poll timeout using
> > the other 31 bits of this value?
> I was planning to bring that control[1], but it's might be difficult for the caller
>
Skimming through the thread, it seems you're discussing how to determine
if the DSP supports polling or not; that sounds like a separate problem
in my view. Not sure if you settled that discussion, but couldn't that
be handled through FASTRPC_IOCTL_GET_DSP_INFO?
I assume though, this would be subject to firmware changes. How do you
determine downstream if polling should be used or not today?
For my specific question here, I'm merely wondering if the timeout value
should be a boolean or have a unit. We could punt on that question, to
not block this feature from making progress upstream, by defining that
only 0 and 1 are valid values today (all other result in -EINVAL).
This would leave the door open for having 0 == off, 1 == default, > 1
represent the actual timeout in microseconds in the future.
Treating any non-zero value as "the default timeout" means that you
would have to assume that there's userspace who might pass other values
and you can't add additional meaning to the field in the future.
Regards,
Bjorn
> [1] https://lore.kernel.org/all/20250127044239.578540-5-quic_ekangupt@quicinc.com/
>
> //Ekansh
On 18-Feb-26 15:36, Bjorn Andersson wrote:
> On Mon, Feb 16, 2026 at 02:36:40PM +0530, Ekansh Gupta wrote:
>> On 2/16/2026 8:51 AM, Bjorn Andersson wrote:
>>> On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
>>>> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
>>>> return 0;
>>>> }
>>>>
>>>> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
>>>> +{
>>>> + struct fastrpc_ioctl_set_option opt = {0};
>>>> + int i;
>>>> +
>>>> + if (copy_from_user(&opt, argp, sizeof(opt)))
>>>> + return -EFAULT;
>>>> +
>>>> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
>>>> + if (opt.reserved[i] != 0)
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>> + if (opt.req != FASTRPC_POLL_MODE)
>>>> + return -EINVAL;
>>>> +
>>>> + if (opt.value)
>>> Would it make sense to allow the caller to affect the poll timeout using
>>> the other 31 bits of this value?
>> I was planning to bring that control[1], but it's might be difficult for the caller
>>
>
> Skimming through the thread, it seems you're discussing how to determine
> if the DSP supports polling or not; that sounds like a separate problem
> in my view. Not sure if you settled that discussion, but couldn't that
> be handled through FASTRPC_IOCTL_GET_DSP_INFO?
>
> I assume though, this would be subject to firmware changes. How do you
> determine downstream if polling should be used or not today?
>
>
> For my specific question here, I'm merely wondering if the timeout value
> should be a boolean or have a unit. We could punt on that question, to
> not block this feature from making progress upstream, by defining that
> only 0 and 1 are valid values today (all other result in -EINVAL).
>
> This would leave the door open for having 0 == off, 1 == default, > 1
Giving '1' a special non-numerical meaning sounds odd.. maybe 0:default,
-1:off (or the opposite)?
Konrad
On Wed, Feb 18, 2026 at 03:38:28PM +0100, Konrad Dybcio wrote:
>
>
> On 18-Feb-26 15:36, Bjorn Andersson wrote:
> > On Mon, Feb 16, 2026 at 02:36:40PM +0530, Ekansh Gupta wrote:
> >> On 2/16/2026 8:51 AM, Bjorn Andersson wrote:
> >>> On Sun, Feb 15, 2026 at 11:51:35PM +0530, Ekansh Gupta wrote:
> >>>> @@ -1812,6 +1912,30 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
> >>>> return 0;
> >>>> }
> >>>>
> >>>> +static int fastrpc_set_option(struct fastrpc_user *fl, char __user *argp)
> >>>> +{
> >>>> + struct fastrpc_ioctl_set_option opt = {0};
> >>>> + int i;
> >>>> +
> >>>> + if (copy_from_user(&opt, argp, sizeof(opt)))
> >>>> + return -EFAULT;
> >>>> +
> >>>> + for (i = 0; i < ARRAY_SIZE(opt.reserved); i++) {
> >>>> + if (opt.reserved[i] != 0)
> >>>> + return -EINVAL;
> >>>> + }
> >>>> +
> >>>> + if (opt.req != FASTRPC_POLL_MODE)
> >>>> + return -EINVAL;
> >>>> +
> >>>> + if (opt.value)
> >>> Would it make sense to allow the caller to affect the poll timeout using
> >>> the other 31 bits of this value?
> >> I was planning to bring that control[1], but it's might be difficult for the caller
> >>
> >
> > Skimming through the thread, it seems you're discussing how to determine
> > if the DSP supports polling or not; that sounds like a separate problem
> > in my view. Not sure if you settled that discussion, but couldn't that
> > be handled through FASTRPC_IOCTL_GET_DSP_INFO?
> >
> > I assume though, this would be subject to firmware changes. How do you
> > determine downstream if polling should be used or not today?
> >
> >
> > For my specific question here, I'm merely wondering if the timeout value
> > should be a boolean or have a unit. We could punt on that question, to
> > not block this feature from making progress upstream, by defining that
> > only 0 and 1 are valid values today (all other result in -EINVAL).
> >
> > This would leave the door open for having 0 == off, 1 == default, > 1
>
> Giving '1' a special non-numerical meaning sounds odd.. maybe 0:default,
> -1:off (or the opposite)?
>
I guess it comes down to the question of how big the likelihood that you
would want a different value than the default. We should provide sane
defaults and avoid sprinkling unergonomic knobs throughout the system,
but [0,1] and the rest of the bits reserved would leave the door open
for future use of the upper 31 bits.
I find 0 == "enabled" to be unintuitive...
And using -1 means that the bits aren't reserved for future use.
Regards,
Bjorn
> Konrad
© 2016 - 2026 Red Hat, Inc.