For any remote call to DSP, after sending an invocation message,
fastRPC driver waits for glink response and during this time the
CPU can go into low power modes. Adding a polling mode support
with which fastRPC driver will poll continuously on a memory
after sending a message to remote subsystem which will eliminate
CPU wakeup and scheduling latencies and reduce fastRPC overhead.
With this change, DSP always sends a glink response which will
get ignored if polling mode didn't time out.
Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com>
---
drivers/misc/fastrpc.c | 127 +++++++++++++++++++++++++++++++++---
include/uapi/misc/fastrpc.h | 3 +-
2 files changed, 121 insertions(+), 9 deletions(-)
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index c008fcd95e15..a25673c22db8 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -111,6 +111,19 @@
#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX)
#define FASTRPC_DSP_PERF_LIST 12
+/* Poll response number from remote processor for call completion */
+#define FASTRPC_POLL_RESPONSE (0xdecaf)
+/* timeout in us for polling until memory barrier */
+#define FASTRPC_POLL_TIME_MEM_UPDATE (500)
+
+/* Response types supported for RPC calls */
+enum fastrpc_response_flags {
+ /* normal job completion glink response */
+ NORMAL_RESPONSE = 0,
+ /* process updates poll memory instead of glink response */
+ POLL_MODE = 1,
+};
+
static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
"sdsp", "cdsp", "cdsp1" };
struct fastrpc_phy_page {
@@ -258,6 +271,12 @@ struct fastrpc_invoke_ctx {
u64 *perf_dsp;
u64 ctxid;
u64 msg_sz;
+ /* Threads poll for specified timeout and fall back to glink wait */
+ u64 poll_timeout;
+ /* work done status flag */
+ bool is_work_done;
+ /* response flags from remote processor */
+ enum fastrpc_response_flags rsp_flags;
struct kref refcount;
struct list_head node; /* list of ctxs */
struct completion work;
@@ -682,6 +701,7 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
ctx->crc = (u32 *)(uintptr_t)inv2->crc;
ctx->perf_dsp = (u64 *)(uintptr_t)inv2->perf_dsp;
ctx->perf_kernel = (u64 *)(uintptr_t)inv2->perf_kernel;
+ ctx->poll_timeout = (u64)inv2->poll_timeout;
if (ctx->perf_kernel) {
ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL);
if (!ctx->perf)
@@ -692,6 +712,8 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
ctx->pid = current->pid;
ctx->tgid = user->tgid;
ctx->cctx = cctx;
+ ctx->rsp_flags = NORMAL_RESPONSE;
+ ctx->is_work_done = false;
init_completion(&ctx->work);
INIT_WORK(&ctx->put_work, fastrpc_context_put_wq);
@@ -1256,6 +1278,87 @@ static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter,
*count++;
}
+static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx, u64 timeout)
+{
+ int err = -EIO, i, j;
+ u32 sc = ctx->sc;
+ struct fastrpc_invoke_buf *list;
+ struct fastrpc_phy_page *pages;
+ u64 *fdlist = NULL;
+ u32 *crclist = NULL, *poll = NULL;
+ unsigned int inbufs, outbufs, handles;
+
+ /* calculate poll memory location */
+ inbufs = REMOTE_SCALARS_INBUFS(sc);
+ outbufs = REMOTE_SCALARS_OUTBUFS(sc);
+ handles = REMOTE_SCALARS_INHANDLES(sc) + REMOTE_SCALARS_OUTHANDLES(sc);
+ list = fastrpc_invoke_buf_start(ctx->rpra, ctx->nscalars);
+ pages = fastrpc_phy_page_start(list, ctx->nscalars);
+ fdlist = (u64 *)(pages + inbufs + outbufs + handles);
+ crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST);
+ poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST);
+
+ /* poll on memory for DSP response. Return failure on timeout */
+ for (i = 0, j = 0; i < timeout; i++, j++) {
+ if (*poll == FASTRPC_POLL_RESPONSE) {
+ err = 0;
+ ctx->is_work_done = true;
+ ctx->retval = 0;
+ break;
+ }
+ if (j == FASTRPC_POLL_TIME_MEM_UPDATE) {
+ /* make sure that all poll memory writes by DSP are seen by CPU */
+ dma_rmb();
+ j = 0;
+ }
+ udelay(1);
+ }
+ return err;
+}
+
+static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx,
+ u32 kernel)
+{
+ int err = 0;
+
+ if (kernel) {
+ if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
+ err = -ETIMEDOUT;
+ } else {
+ err = wait_for_completion_interruptible(&ctx->work);
+ }
+
+ return err;
+}
+
+static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx,
+ u32 kernel)
+{
+ int err;
+
+ do {
+ switch (ctx->rsp_flags) {
+ case NORMAL_RESPONSE:
+ err = fastrpc_wait_for_response(ctx, kernel);
+ if (err || ctx->is_work_done)
+ return err;
+ break;
+ case POLL_MODE:
+ err = poll_for_remote_response(ctx, ctx->poll_timeout);
+ /* If polling timed out, move to normal response mode */
+ if (err)
+ ctx->rsp_flags = NORMAL_RESPONSE;
+ break;
+ default:
+ err = -EBADR;
+ dev_dbg(ctx->fl->sctx->dev, "unsupported response type:0x%x\n", ctx->rsp_flags);
+ break;
+ }
+ } while (!ctx->is_work_done);
+
+ return err;
+}
+
static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_invoke_v2 *inv2)
{
struct fastrpc_invoke_ctx *ctx = NULL;
@@ -1301,16 +1404,22 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct
if (err)
goto bail;
- if (kernel) {
- if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
- err = -ETIMEDOUT;
- } else {
- err = wait_for_completion_interruptible(&ctx->work);
- }
+ if (ctx->poll_timeout != 0 && handle > FASTRPC_MAX_STATIC_HANDLE &&
+ fl->cctx->domain_id == CDSP_DOMAIN_ID &&
+ fl->pd == USER_PD)
+ ctx->rsp_flags = POLL_MODE;
+ err = fastrpc_wait_for_completion(ctx, kernel);
if (err)
goto bail;
+ if (!ctx->is_work_done) {
+ err = -ETIMEDOUT;
+ dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n",
+ handle, sc);
+ goto bail;
+ }
+
/* make sure that all memory writes by DSP are seen by CPU */
dma_rmb();
PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_PUTARGS),
@@ -2627,12 +2736,14 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
ctx = idr_find(&cctx->ctx_idr, ctxid);
spin_unlock_irqrestore(&cctx->lock, flags);
+ /* Ignore this failure as context returned will be NULL for polling mode */
if (!ctx) {
- dev_err(&rpdev->dev, "No context ID matches response\n");
- return -ENOENT;
+ dev_dbg(&rpdev->dev, "No context ID matches response\n");
+ return 0;
}
ctx->retval = rsp->retval;
+ ctx->is_work_done = true;
complete(&ctx->work);
/*
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 1edc7c04b171..dde684554011 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -86,7 +86,8 @@ struct fastrpc_invoke_v2 {
__u64 crc;
__u64 perf_kernel;
__u64 perf_dsp;
- __u32 reserved[12];
+ __u64 poll_timeout;
+ __u32 reserved[10];
};
struct fastrpc_init_create {
--
2.34.1
© 2016 - 2024 Red Hat, Inc.