From: Per Larsen <perlarsen@google.com>
SMCCC 1.1 and prior allows four registers to be sent back as a result
of an FF-A interface. SMCCC 1.2 increases the number of results that can
be sent back to 8 and 16 for 32-bit and 64-bit SMC/HVCs respectively.
FF-A 1.0 references SMCCC 1.2 (reference [4] on page xi) and FF-A 1.2
explicitly requires SMCCC 1.2 so it should be safe to use this version
unconditionally. Moreover, it is simpler to implement FF-A features
without having to worry about compatibility with SMCCC 1.1 and older.
Update the FF-A initialization and host handler code to use SMCCC 1.2.
Signed-off-by: Per Larsen <perlarsen@google.com>
---
arch/arm64/kvm/hyp/nvhe/Makefile | 1 +
arch/arm64/kvm/hyp/nvhe/ffa.c | 193 +++++++++++++++++++++++++--------------
2 files changed, 125 insertions(+), 69 deletions(-)
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index a76522d63c3e630795db5972a99abc3d24bc5e26..f859a8fb41a25effea1edd977bef889423153399 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
+hyp-obj-y += ../../../kernel/smccc-call.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
static bool has_version_negotiated;
static hyp_spinlock_t version_lock;
-static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
+static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
{
- *res = (struct arm_smccc_res) {
+ *res = (struct arm_smccc_1_2_regs) {
.a0 = FFA_ERROR,
.a2 = ffa_errno,
};
}
-static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
+static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
{
if (ret == FFA_RET_SUCCESS) {
- *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
- .a2 = prop };
+ *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
+ .a2 = prop };
} else {
ffa_to_smccc_error(res, ret);
}
}
-static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
+static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
{
ffa_to_smccc_res_prop(res, ret, 0);
}
static void ffa_set_retval(struct kvm_cpu_context *ctxt,
- struct arm_smccc_res *res)
+ struct arm_smccc_1_2_regs *res)
{
+ DECLARE_REG(u64, func_id, ctxt, 0);
cpu_reg(ctxt, 0) = res->a0;
cpu_reg(ctxt, 1) = res->a1;
cpu_reg(ctxt, 2) = res->a2;
cpu_reg(ctxt, 3) = res->a3;
+ cpu_reg(ctxt, 4) = res->a4;
+ cpu_reg(ctxt, 5) = res->a5;
+ cpu_reg(ctxt, 6) = res->a6;
+ cpu_reg(ctxt, 7) = res->a7;
+
+ /*
+ * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
+ *
+ * The most straightforward approach is to look at the function ID
+ * sent by the caller. However, the caller could send FFA_MSG_WAIT
+ * which is a 32-bit interface but the reply could very well be 64-bit
+ * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
+ *
+ * Instead, we could look at the function ID in the response (a0) but
+ * that doesn't work either as FFA_VERSION responses put the version
+ * number (or error code) in w0.
+ *
+ * Set x8-x17 iff response contains 64-bit function ID in a0.
+ */
+ if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
+ cpu_reg(ctxt, 8) = res->a8;
+ cpu_reg(ctxt, 9) = res->a9;
+ cpu_reg(ctxt, 10) = res->a10;
+ cpu_reg(ctxt, 11) = res->a11;
+ cpu_reg(ctxt, 12) = res->a12;
+ cpu_reg(ctxt, 13) = res->a13;
+ cpu_reg(ctxt, 14) = res->a14;
+ cpu_reg(ctxt, 15) = res->a15;
+ cpu_reg(ctxt, 16) = res->a16;
+ cpu_reg(ctxt, 17) = res->a17;
+ }
}
static bool is_ffa_call(u64 func_id)
@@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id)
static int ffa_map_hyp_buffers(u64 ffa_page_count)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
- hyp_virt_to_phys(hyp_buffers.tx),
- hyp_virt_to_phys(hyp_buffers.rx),
- ffa_page_count,
- 0, 0, 0, 0,
- &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_FN64_RXTX_MAP,
+ .a1 = hyp_virt_to_phys(hyp_buffers.tx),
+ .a2 = hyp_virt_to_phys(hyp_buffers.rx),
+ .a3 = ffa_page_count,
+ }, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static int ffa_unmap_hyp_buffers(void)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
- HOST_FFA_ID,
- 0, 0, 0, 0, 0, 0,
- &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_RXTX_UNMAP,
+ .a1 = HOST_FFA_ID,
+ }, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
-static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fraglen, u32 endpoint_id)
{
- arm_smccc_1_1_smc(FFA_MEM_FRAG_TX,
- handle_lo, handle_hi, fraglen, endpoint_id,
- 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_FRAG_TX,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = fraglen,
+ .a4 = endpoint_id,
+ }, res);
}
-static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fragoff)
{
- arm_smccc_1_1_smc(FFA_MEM_FRAG_RX,
- handle_lo, handle_hi, fragoff, HOST_FFA_ID,
- 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_FRAG_RX,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = fragoff,
+ .a4 = HOST_FFA_ID,
+ }, res);
}
-static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len,
+static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
u32 fraglen)
{
- arm_smccc_1_1_smc(func_id, len, fraglen,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = func_id,
+ .a1 = len,
+ .a2 = fraglen,
+ }, res);
}
-static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 flags)
{
- arm_smccc_1_1_smc(FFA_MEM_RECLAIM,
- handle_lo, handle_hi, flags,
- 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_RECLAIM,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = flags,
+ }, res);
}
-static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
+static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
{
- arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ,
- len, len,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_FN64_MEM_RETRIEVE_REQ,
+ .a1 = len,
+ .a2 = len,
+ }, res);
}
-static void ffa_rx_release(struct arm_smccc_res *res)
+static void ffa_rx_release(struct arm_smccc_1_2_regs *res)
{
- arm_smccc_1_1_smc(FFA_RX_RELEASE,
- 0, 0,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_RX_RELEASE,
+ }, res);
}
-static void do_ffa_rxtx_map(struct arm_smccc_res *res,
+static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(phys_addr_t, tx, ctxt, 1);
@@ -267,7 +309,7 @@ static void do_ffa_rxtx_map(struct arm_smccc_res *res,
goto out_unlock;
}
-static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
+static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
return ret;
}
-static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
+static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@@ -427,7 +469,7 @@ static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
}
static void __do_ffa_mem_xfer(const u64 func_id,
- struct arm_smccc_res *res,
+ struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
@@ -521,7 +563,7 @@ static void __do_ffa_mem_xfer(const u64 func_id,
__do_ffa_mem_xfer((fid), (res), (ctxt)); \
} while (0);
-static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
+static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@@ -634,7 +676,7 @@ static bool ffa_call_supported(u64 func_id)
return true;
}
-static bool do_ffa_features(struct arm_smccc_res *res,
+static bool do_ffa_features(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@@ -666,17 +708,21 @@ static bool do_ffa_features(struct arm_smccc_res *res,
static int hyp_ffa_post_init(void)
{
size_t min_rxtx_sz;
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
+ .a0 = FFA_ID_GET,
+ }, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
if (res.a2 != HOST_FFA_ID)
return -EINVAL;
- arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
- 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
+ .a0 = FFA_FEATURES,
+ .a1 = FFA_FN64_RXTX_MAP,
+ }, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
@@ -700,7 +746,7 @@ static int hyp_ffa_post_init(void)
return 0;
}
-static void do_ffa_version(struct arm_smccc_res *res,
+static void do_ffa_version(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, ffa_req_version, ctxt, 1);
@@ -724,9 +770,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
* first if TEE supports it.
*/
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
- arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_VERSION,
+ .a1 = ffa_req_version,
+ }, res);
if (res->a0 == FFA_RET_NOT_SUPPORTED)
goto unlock;
@@ -743,7 +790,7 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_spin_unlock(&version_lock);
}
-static void do_ffa_part_get(struct arm_smccc_res *res,
+static void do_ffa_part_get(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, uuid0, ctxt, 1);
@@ -759,9 +806,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
goto out_unlock;
}
- arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1,
- uuid2, uuid3, flags, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_PARTITION_INFO_GET,
+ .a1 = uuid0,
+ .a2 = uuid1,
+ .a3 = uuid2,
+ .a4 = uuid3,
+ .a5 = flags,
+ }, res);
if (res->a0 != FFA_SUCCESS)
goto out_unlock;
@@ -794,7 +846,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
/*
* There's no way we can tell what a non-standard SMC call might
@@ -863,13 +915,16 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
int hyp_ffa_init(void *pages)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
void *tx, *rx;
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
return 0;
- arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_VERSION,
+ .a1 = FFA_VERSION_1_1,
+ }, &res);
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
--
2.50.0.727.gbf7dc18ff4-goog
On Tue, 01 Jul 2025 23:06:35 +0100,
Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
>
> From: Per Larsen <perlarsen@google.com>
>
> SMCCC 1.1 and prior allows four registers to be sent back as a result
> of an FF-A interface. SMCCC 1.2 increases the number of results that can
> be sent back to 8 and 16 for 32-bit and 64-bit SMC/HVCs respectively.
>
> FF-A 1.0 references SMCCC 1.2 (reference [4] on page xi) and FF-A 1.2
> explicitly requires SMCCC 1.2 so it should be safe to use this version
> unconditionally. Moreover, it is simpler to implement FF-A features
> without having to worry about compatibility with SMCCC 1.1 and older.
>
> Update the FF-A initialization and host handler code to use SMCCC 1.2.
>
> Signed-off-by: Per Larsen <perlarsen@google.com>
> ---
> arch/arm64/kvm/hyp/nvhe/Makefile | 1 +
> arch/arm64/kvm/hyp/nvhe/ffa.c | 193 +++++++++++++++++++++++++--------------
> 2 files changed, 125 insertions(+), 69 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index a76522d63c3e630795db5972a99abc3d24bc5e26..f859a8fb41a25effea1edd977bef889423153399 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
> cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
> hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
> ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
> +hyp-obj-y += ../../../kernel/smccc-call.o
> hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
> hyp-obj-y += $(lib-objs)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
> static bool has_version_negotiated;
> static hyp_spinlock_t version_lock;
>
> -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
> +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
> {
> - *res = (struct arm_smccc_res) {
> + *res = (struct arm_smccc_1_2_regs) {
> .a0 = FFA_ERROR,
> .a2 = ffa_errno,
> };
> }
>
> -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
> +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
> {
> if (ret == FFA_RET_SUCCESS) {
> - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
> - .a2 = prop };
> + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
> + .a2 = prop };
> } else {
> ffa_to_smccc_error(res, ret);
> }
> }
>
> -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
> +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
> {
> ffa_to_smccc_res_prop(res, ret, 0);
> }
>
> static void ffa_set_retval(struct kvm_cpu_context *ctxt,
> - struct arm_smccc_res *res)
> + struct arm_smccc_1_2_regs *res)
> {
> + DECLARE_REG(u64, func_id, ctxt, 0);
> cpu_reg(ctxt, 0) = res->a0;
> cpu_reg(ctxt, 1) = res->a1;
> cpu_reg(ctxt, 2) = res->a2;
> cpu_reg(ctxt, 3) = res->a3;
> + cpu_reg(ctxt, 4) = res->a4;
> + cpu_reg(ctxt, 5) = res->a5;
> + cpu_reg(ctxt, 6) = res->a6;
> + cpu_reg(ctxt, 7) = res->a7;
From DEN0028G 2.6:
<quote>
Registers W4-W7 must be preserved unless they contain results, as
specified in the function definition.
</quote>
On what grounds can you blindly change these registers?
> +
> + /*
> + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
> + *
> + * The most straightforward approach is to look at the function ID
> + * sent by the caller. However, the caller could send FFA_MSG_WAIT
> + * which is a 32-bit interface but the reply could very well be 64-bit
> + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
> + *
> + * Instead, we could look at the function ID in the response (a0) but
> + * that doesn't work either as FFA_VERSION responses put the version
> + * number (or error code) in w0.
> + *
> + * Set x8-x17 iff response contains 64-bit function ID in a0.
> + */
> + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
> + cpu_reg(ctxt, 8) = res->a8;
> + cpu_reg(ctxt, 9) = res->a9;
> + cpu_reg(ctxt, 10) = res->a10;
> + cpu_reg(ctxt, 11) = res->a11;
> + cpu_reg(ctxt, 12) = res->a12;
> + cpu_reg(ctxt, 13) = res->a13;
> + cpu_reg(ctxt, 14) = res->a14;
> + cpu_reg(ctxt, 15) = res->a15;
> + cpu_reg(ctxt, 16) = res->a16;
> + cpu_reg(ctxt, 17) = res->a17;
> + }
> }
I don't see how that can ever work.
Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
anything in the spec that supports the above), the requester will
fully expect its registers to be preserved based on the initial
function type, and that alone. No ifs, no buts.
If what you describe can happen (please provide a convincing example),
then the spec is doomed.
M.
--
Without deviation from the norm, progress is not possible.
On 7/3/25 5:38 AM, Marc Zyngier wrote:
> On Tue, 01 Jul 2025 23:06:35 +0100,
> Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
>>
>> From: Per Larsen <perlarsen@google.com>
>>
>> SMCCC 1.1 and prior allows four registers to be sent back as a result
>> of an FF-A interface. SMCCC 1.2 increases the number of results that can
>> be sent back to 8 and 16 for 32-bit and 64-bit SMC/HVCs respectively.
>>
>> FF-A 1.0 references SMCCC 1.2 (reference [4] on page xi) and FF-A 1.2
>> explicitly requires SMCCC 1.2 so it should be safe to use this version
>> unconditionally. Moreover, it is simpler to implement FF-A features
>> without having to worry about compatibility with SMCCC 1.1 and older.
>>
>> Update the FF-A initialization and host handler code to use SMCCC 1.2.
>>
>> Signed-off-by: Per Larsen <perlarsen@google.com>
>> ---
>> arch/arm64/kvm/hyp/nvhe/Makefile | 1 +
>> arch/arm64/kvm/hyp/nvhe/ffa.c | 193 +++++++++++++++++++++++++--------------
>> 2 files changed, 125 insertions(+), 69 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
>> index a76522d63c3e630795db5972a99abc3d24bc5e26..f859a8fb41a25effea1edd977bef889423153399 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
>> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
>> @@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
>> cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
>> hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
>> ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
>> +hyp-obj-y += ../../../kernel/smccc-call.o
>> hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
>> hyp-obj-y += $(lib-objs)
>>
>> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
>> index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
>> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
>> @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
>> static bool has_version_negotiated;
>> static hyp_spinlock_t version_lock;
>>
>> -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
>> +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
>> {
>> - *res = (struct arm_smccc_res) {
>> + *res = (struct arm_smccc_1_2_regs) {
>> .a0 = FFA_ERROR,
>> .a2 = ffa_errno,
>> };
>> }
>>
>> -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
>> +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
>> {
>> if (ret == FFA_RET_SUCCESS) {
>> - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
>> - .a2 = prop };
>> + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
>> + .a2 = prop };
>> } else {
>> ffa_to_smccc_error(res, ret);
>> }
>> }
>>
>> -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
>> +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
>> {
>> ffa_to_smccc_res_prop(res, ret, 0);
>> }
>>
>> static void ffa_set_retval(struct kvm_cpu_context *ctxt,
>> - struct arm_smccc_res *res)
>> + struct arm_smccc_1_2_regs *res)
>> {
>> + DECLARE_REG(u64, func_id, ctxt, 0);
>> cpu_reg(ctxt, 0) = res->a0;
>> cpu_reg(ctxt, 1) = res->a1;
>> cpu_reg(ctxt, 2) = res->a2;
>> cpu_reg(ctxt, 3) = res->a3;
>> + cpu_reg(ctxt, 4) = res->a4;
>> + cpu_reg(ctxt, 5) = res->a5;
>> + cpu_reg(ctxt, 6) = res->a6;
>> + cpu_reg(ctxt, 7) = res->a7;
>
> From DEN0028G 2.6:
>
> <quote>
> Registers W4-W7 must be preserved unless they contain results, as
> specified in the function definition.
> </quote>
>
> On what grounds can you blindly change these registers?
From DEN0077A 1.2 Section 11.2: Reserved parameter convention
<quote>
Unused parameter registers in FF-A ABIs are reserved for future use by
the Framework.
[...]
The caller is expected to write zeroes to these registers. The callee
ignores the values in these registers.
</quote>
My read is that, as long as we are writing zeroes into reserved
registers (which I believe we are), we comply with DEN0026G 2.6.>
>> +
>> + /*
>> + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
>> + *
>> + * The most straightforward approach is to look at the function ID
>> + * sent by the caller. However, the caller could send FFA_MSG_WAIT
>> + * which is a 32-bit interface but the reply could very well be 64-bit
>> + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
>> + *
>> + * Instead, we could look at the function ID in the response (a0) but
>> + * that doesn't work either as FFA_VERSION responses put the version
>> + * number (or error code) in w0.
>> + *
>> + * Set x8-x17 iff response contains 64-bit function ID in a0.
>> + */
>> + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
>> + cpu_reg(ctxt, 8) = res->a8;
>> + cpu_reg(ctxt, 9) = res->a9;
>> + cpu_reg(ctxt, 10) = res->a10;
>> + cpu_reg(ctxt, 11) = res->a11;
>> + cpu_reg(ctxt, 12) = res->a12;
>> + cpu_reg(ctxt, 13) = res->a13;
>> + cpu_reg(ctxt, 14) = res->a14;
>> + cpu_reg(ctxt, 15) = res->a15;
>> + cpu_reg(ctxt, 16) = res->a16;
>> + cpu_reg(ctxt, 17) = res->a17;
>> + }
>> }
>
> I don't see how that can ever work.
>
> Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
> anything in the spec that supports the above), the requester will
> fully expect its registers to be preserved based on the initial
> function type, and that alone. No ifs, no buts.
>
> If what you describe can happen (please provide a convincing example),
> then the spec is doomed.
DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime
Model for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure
shows transitions between states "waiting", "blocked", "running", and
"preempted". Key to my understanding is that the waiting state in Figure
8.1 and Figure 8.2 is the exact same state. This appears to be the case
per Section 4.10.
So we have to consider the ways to get in and out of the waiting state
by joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an
edge between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2
has an edge between "waiting" and "running" caused by a "Direct request
ABI".
Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the
FF-A 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT
call can be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
The Trusty TEE OS calls FFA_MSG_WAIT here:
https://cs.android.com/android/platform/superproject/main/+/main:trusty/kernel/lib/sm/sm.c;l=844?q=sm.c&ss=android
and anticipates a direct request ABI as one of the potential replies.
Thanks,
Per
PS: I'm replying from perl@immunant.com as I cannot use
perlarsen@google.com to send replies to review comments.
On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
> On 7/3/25 5:38 AM, Marc Zyngier wrote:
> > On Tue, 01 Jul 2025 23:06:35 +0100,
> > Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
> > > static bool has_version_negotiated;
> > > static hyp_spinlock_t version_lock;
> > > -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
> > > +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
> > > {
> > > - *res = (struct arm_smccc_res) {
> > > + *res = (struct arm_smccc_1_2_regs) {
> > > .a0 = FFA_ERROR,
> > > .a2 = ffa_errno,
> > > };
> > > }
> > > -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
> > > +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
> > > {
> > > if (ret == FFA_RET_SUCCESS) {
> > > - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
> > > - .a2 = prop };
> > > + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
> > > + .a2 = prop };
> > > } else {
> > > ffa_to_smccc_error(res, ret);
> > > }
> > > }
> > > -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
> > > +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
> > > {
> > > ffa_to_smccc_res_prop(res, ret, 0);
> > > }
> > > static void ffa_set_retval(struct kvm_cpu_context *ctxt,
> > > - struct arm_smccc_res *res)
> > > + struct arm_smccc_1_2_regs *res)
> > > {
> > > + DECLARE_REG(u64, func_id, ctxt, 0);
> > > cpu_reg(ctxt, 0) = res->a0;
> > > cpu_reg(ctxt, 1) = res->a1;
> > > cpu_reg(ctxt, 2) = res->a2;
> > > cpu_reg(ctxt, 3) = res->a3;
> > > + cpu_reg(ctxt, 4) = res->a4;
> > > + cpu_reg(ctxt, 5) = res->a5;
> > > + cpu_reg(ctxt, 6) = res->a6;
> > > + cpu_reg(ctxt, 7) = res->a7;
> >
> > From DEN0028G 2.6:
> >
> > <quote>
> > Registers W4-W7 must be preserved unless they contain results, as
> > specified in the function definition.
> > </quote>
> >
> > On what grounds can you blindly change these registers?
> From DEN0077A 1.2 Section 11.2: Reserved parameter convention
>
> <quote>
> Unused parameter registers in FF-A ABIs are reserved for future use by the
> Framework.
>
> [...]
>
> The caller is expected to write zeroes to these registers. The callee
> ignores the values in these registers.
> </quote>
>
> My read is that, as long as we are writing zeroes into reserved registers
> (which I believe we are), we comply with DEN0026G 2.6.>
Right, the specs make this far more difficult to decipher than necessary
but I think SMCCC defers to the definition of the specific call being
made and then FF-A is basically saying that unused argument registers
are always zeroed.
Rather than have the EL2 proxy treat each call differently based on the
used argument registers, we can rely on EL3 doing the right thing and
blindly copy everything back, which is what you've done. So I think
that's ok.
> > > +
> > > + /*
> > > + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
> > > + *
> > > + * The most straightforward approach is to look at the function ID
> > > + * sent by the caller. However, the caller could send FFA_MSG_WAIT
> > > + * which is a 32-bit interface but the reply could very well be 64-bit
> > > + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
> > > + *
> > > + * Instead, we could look at the function ID in the response (a0) but
> > > + * that doesn't work either as FFA_VERSION responses put the version
> > > + * number (or error code) in w0.
> > > + *
> > > + * Set x8-x17 iff response contains 64-bit function ID in a0.
> > > + */
> > > + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
> > > + cpu_reg(ctxt, 8) = res->a8;
> > > + cpu_reg(ctxt, 9) = res->a9;
> > > + cpu_reg(ctxt, 10) = res->a10;
> > > + cpu_reg(ctxt, 11) = res->a11;
> > > + cpu_reg(ctxt, 12) = res->a12;
> > > + cpu_reg(ctxt, 13) = res->a13;
> > > + cpu_reg(ctxt, 14) = res->a14;
> > > + cpu_reg(ctxt, 15) = res->a15;
> > > + cpu_reg(ctxt, 16) = res->a16;
> > > + cpu_reg(ctxt, 17) = res->a17;
> > > + }
> > > }
> >
> > I don't see how that can ever work.
> >
> > Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
> > anything in the spec that supports the above), the requester will
> > fully expect its registers to be preserved based on the initial
> > function type, and that alone. No ifs, no buts.
> >
> > If what you describe can happen (please provide a convincing example),
> > then the spec is doomed.
>
> DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
> for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
> transitions between states "waiting", "blocked", "running", and "preempted".
> Key to my understanding is that the waiting state in Figure 8.1 and Figure
> 8.2 is the exact same state. This appears to be the case per Section 4.10.
>
> So we have to consider the ways to get in and out of the waiting state by
> joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
> between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
> edge between "waiting" and "running" caused by a "Direct request ABI".
>
> Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
> 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
> be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
That seems bonkers to me and I agree with Marc's assessment above. The
SMCCC is crystal clear that a caller using the SM32/HVC32 calling
convention can rely on x8-x30 (as well as the stack pointers) being
preserved. If FF-A has a problem with that then it needs to be fixed
there and not bodged in Linux.
Setting that aside, I'm still not sure I follow this part of your check:
if (... && ARM_SMCCC_IS_64(res->a0))
won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
FFA_SUCCESS64, is used only if any result register encodes a 64-bit
parameter.
which doesn't really seem related to whether or not the initial call
was using SMC32 or SMC64. What am I missing?
Will
On 7/18/25 6:37 AM, Will Deacon wrote:
> On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
>> On 7/3/25 5:38 AM, Marc Zyngier wrote:
>>> On Tue, 01 Jul 2025 23:06:35 +0100,
>>> Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
>>>> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>> index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
>>>> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>> @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
>>>> static bool has_version_negotiated;
>>>> static hyp_spinlock_t version_lock;
>>>> -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
>>>> +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
>>>> {
>>>> - *res = (struct arm_smccc_res) {
>>>> + *res = (struct arm_smccc_1_2_regs) {
>>>> .a0 = FFA_ERROR,
>>>> .a2 = ffa_errno,
>>>> };
>>>> }
>>>> -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
>>>> +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
>>>> {
>>>> if (ret == FFA_RET_SUCCESS) {
>>>> - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
>>>> - .a2 = prop };
>>>> + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
>>>> + .a2 = prop };
>>>> } else {
>>>> ffa_to_smccc_error(res, ret);
>>>> }
>>>> }
>>>> -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
>>>> +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
>>>> {
>>>> ffa_to_smccc_res_prop(res, ret, 0);
>>>> }
>>>> static void ffa_set_retval(struct kvm_cpu_context *ctxt,
>>>> - struct arm_smccc_res *res)
>>>> + struct arm_smccc_1_2_regs *res)
>>>> {
>>>> + DECLARE_REG(u64, func_id, ctxt, 0);
>>>> cpu_reg(ctxt, 0) = res->a0;
>>>> cpu_reg(ctxt, 1) = res->a1;
>>>> cpu_reg(ctxt, 2) = res->a2;
>>>> cpu_reg(ctxt, 3) = res->a3;
>>>> + cpu_reg(ctxt, 4) = res->a4;
>>>> + cpu_reg(ctxt, 5) = res->a5;
>>>> + cpu_reg(ctxt, 6) = res->a6;
>>>> + cpu_reg(ctxt, 7) = res->a7;
>>>
>>> From DEN0028G 2.6:
>>>
>>> <quote>
>>> Registers W4-W7 must be preserved unless they contain results, as
>>> specified in the function definition.
>>> </quote>
>>>
>>> On what grounds can you blindly change these registers?
>> From DEN0077A 1.2 Section 11.2: Reserved parameter convention
>>
>> <quote>
>> Unused parameter registers in FF-A ABIs are reserved for future use by the
>> Framework.
>>
>> [...]
>>
>> The caller is expected to write zeroes to these registers. The callee
>> ignores the values in these registers.
>> </quote>
>>
>> My read is that, as long as we are writing zeroes into reserved registers
>> (which I believe we are), we comply with DEN0026G 2.6.>
>
> Right, the specs make this far more difficult to decipher than necessary
> but I think SMCCC defers to the definition of the specific call being
> made and then FF-A is basically saying that unused argument registers
> are always zeroed.
>
> Rather than have the EL2 proxy treat each call differently based on the
> used argument registers, we can rely on EL3 doing the right thing and
> blindly copy everything back, which is what you've done. So I think
> that's ok.
>
>>>> +
>>>> + /*
>>>> + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
>>>> + *
>>>> + * The most straightforward approach is to look at the function ID
>>>> + * sent by the caller. However, the caller could send FFA_MSG_WAIT
>>>> + * which is a 32-bit interface but the reply could very well be 64-bit
>>>> + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
>>>> + *
>>>> + * Instead, we could look at the function ID in the response (a0) but
>>>> + * that doesn't work either as FFA_VERSION responses put the version
>>>> + * number (or error code) in w0.
>>>> + *
>>>> + * Set x8-x17 iff response contains 64-bit function ID in a0.
>>>> + */
>>>> + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
>>>> + cpu_reg(ctxt, 8) = res->a8;
>>>> + cpu_reg(ctxt, 9) = res->a9;
>>>> + cpu_reg(ctxt, 10) = res->a10;
>>>> + cpu_reg(ctxt, 11) = res->a11;
>>>> + cpu_reg(ctxt, 12) = res->a12;
>>>> + cpu_reg(ctxt, 13) = res->a13;
>>>> + cpu_reg(ctxt, 14) = res->a14;
>>>> + cpu_reg(ctxt, 15) = res->a15;
>>>> + cpu_reg(ctxt, 16) = res->a16;
>>>> + cpu_reg(ctxt, 17) = res->a17;
>>>> + }
>>>> }
>>>
>>> I don't see how that can ever work.
>>>
>>> Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
>>> anything in the spec that supports the above), the requester will
>>> fully expect its registers to be preserved based on the initial
>>> function type, and that alone. No ifs, no buts.
>>>
>>> If what you describe can happen (please provide a convincing example),
>>> then the spec is doomed.
>>
>> DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
>> for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
>> transitions between states "waiting", "blocked", "running", and "preempted".
>> Key to my understanding is that the waiting state in Figure 8.1 and Figure
>> 8.2 is the exact same state. This appears to be the case per Section 4.10.
>>
>> So we have to consider the ways to get in and out of the waiting state by
>> joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
>> between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
>> edge between "waiting" and "running" caused by a "Direct request ABI".
>>
>> Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
>> 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
>> be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
>
> That seems bonkers to me and I agree with Marc's assessment above. The
> SMCCC is crystal clear that a caller using the SM32/HVC32 calling
> convention can rely on x8-x30 (as well as the stack pointers) being
> preserved. If FF-A has a problem with that then it needs to be fixed
> there and not bodged in Linux.
Ack. Patchset v8 addresses Marc's feedback by using the func_id detect
SMC64 instead.>
> Setting that aside, I'm still not sure I follow this part of your check:
>
> if (... && ARM_SMCCC_IS_64(res->a0))
>
> won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
>
> FFA_SUCCESS64, is used only if any result register encodes a 64-bit
> parameter.
>
> which doesn't really seem related to whether or not the initial call
> was using SMC32 or SMC64. What am I missing?I agree that we cannot use res->a0 to distinguish SMC32/SMC64 for the
reason you stated.
On Fri, Jul 18, 2025 at 10:54 PM Per Larsen <perl@immunant.com> wrote:
>
> On 7/18/25 6:37 AM, Will Deacon wrote:
> > On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
> >> On 7/3/25 5:38 AM, Marc Zyngier wrote:
> >>> On Tue, 01 Jul 2025 23:06:35 +0100,
> >>> Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
> >>>> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> >>>> index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
> >>>> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> >>>> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> >>>> @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
> >>>> static bool has_version_negotiated;
> >>>> static hyp_spinlock_t version_lock;
> >>>> -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
> >>>> +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
> >>>> {
> >>>> - *res = (struct arm_smccc_res) {
> >>>> + *res = (struct arm_smccc_1_2_regs) {
> >>>> .a0 = FFA_ERROR,
> >>>> .a2 = ffa_errno,
> >>>> };
> >>>> }
> >>>> -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
> >>>> +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
> >>>> {
> >>>> if (ret == FFA_RET_SUCCESS) {
> >>>> - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
> >>>> - .a2 = prop };
> >>>> + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
> >>>> + .a2 = prop };
> >>>> } else {
> >>>> ffa_to_smccc_error(res, ret);
> >>>> }
> >>>> }
> >>>> -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
> >>>> +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
> >>>> {
> >>>> ffa_to_smccc_res_prop(res, ret, 0);
> >>>> }
> >>>> static void ffa_set_retval(struct kvm_cpu_context *ctxt,
> >>>> - struct arm_smccc_res *res)
> >>>> + struct arm_smccc_1_2_regs *res)
> >>>> {
> >>>> + DECLARE_REG(u64, func_id, ctxt, 0);
> >>>> cpu_reg(ctxt, 0) = res->a0;
> >>>> cpu_reg(ctxt, 1) = res->a1;
> >>>> cpu_reg(ctxt, 2) = res->a2;
> >>>> cpu_reg(ctxt, 3) = res->a3;
> >>>> + cpu_reg(ctxt, 4) = res->a4;
> >>>> + cpu_reg(ctxt, 5) = res->a5;
> >>>> + cpu_reg(ctxt, 6) = res->a6;
> >>>> + cpu_reg(ctxt, 7) = res->a7;
> >>>
> >>> From DEN0028G 2.6:
> >>>
> >>> <quote>
> >>> Registers W4-W7 must be preserved unless they contain results, as
> >>> specified in the function definition.
> >>> </quote>
> >>>
> >>> On what grounds can you blindly change these registers?
> >> From DEN0077A 1.2 Section 11.2: Reserved parameter convention
> >>
> >> <quote>
> >> Unused parameter registers in FF-A ABIs are reserved for future use by the
> >> Framework.
> >>
> >> [...]
> >>
> >> The caller is expected to write zeroes to these registers. The callee
> >> ignores the values in these registers.
> >> </quote>
> >>
> >> My read is that, as long as we are writing zeroes into reserved registers
> >> (which I believe we are), we comply with DEN0026G 2.6.>
> >
> > Right, the specs make this far more difficult to decipher than necessary
> > but I think SMCCC defers to the definition of the specific call being
> > made and then FF-A is basically saying that unused argument registers
> > are always zeroed.
> >
> > Rather than have the EL2 proxy treat each call differently based on the
> > used argument registers, we can rely on EL3 doing the right thing and
> > blindly copy everything back, which is what you've done. So I think
> > that's ok.
> >
> >>>> +
> >>>> + /*
> >>>> + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
> >>>> + *
> >>>> + * The most straightforward approach is to look at the function ID
> >>>> + * sent by the caller. However, the caller could send FFA_MSG_WAIT
> >>>> + * which is a 32-bit interface but the reply could very well be 64-bit
> >>>> + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
> >>>> + *
> >>>> + * Instead, we could look at the function ID in the response (a0) but
> >>>> + * that doesn't work either as FFA_VERSION responses put the version
> >>>> + * number (or error code) in w0.
> >>>> + *
> >>>> + * Set x8-x17 iff response contains 64-bit function ID in a0.
> >>>> + */
> >>>> + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
> >>>> + cpu_reg(ctxt, 8) = res->a8;
> >>>> + cpu_reg(ctxt, 9) = res->a9;
> >>>> + cpu_reg(ctxt, 10) = res->a10;
> >>>> + cpu_reg(ctxt, 11) = res->a11;
> >>>> + cpu_reg(ctxt, 12) = res->a12;
> >>>> + cpu_reg(ctxt, 13) = res->a13;
> >>>> + cpu_reg(ctxt, 14) = res->a14;
> >>>> + cpu_reg(ctxt, 15) = res->a15;
> >>>> + cpu_reg(ctxt, 16) = res->a16;
> >>>> + cpu_reg(ctxt, 17) = res->a17;
> >>>> + }
> >>>> }
> >>>
> >>> I don't see how that can ever work.
> >>>
> >>> Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
> >>> anything in the spec that supports the above), the requester will
> >>> fully expect its registers to be preserved based on the initial
> >>> function type, and that alone. No ifs, no buts.
> >>>
> >>> If what you describe can happen (please provide a convincing example),
> >>> then the spec is doomed.
> >>
> >> DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
> >> for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
> >> transitions between states "waiting", "blocked", "running", and "preempted".
> >> Key to my understanding is that the waiting state in Figure 8.1 and Figure
> >> 8.2 is the exact same state. This appears to be the case per Section 4.10.
> >>
> >> So we have to consider the ways to get in and out of the waiting state by
> >> joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
> >> between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
> >> edge between "waiting" and "running" caused by a "Direct request ABI".
> >>
> >> Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
> >> 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
> >> be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
> >
> > That seems bonkers to me and I agree with Marc's assessment above. The
> > SMCCC is crystal clear that a caller using the SM32/HVC32 calling
> > convention can rely on x8-x30 (as well as the stack pointers) being
> > preserved. If FF-A has a problem with that then it needs to be fixed
> > there and not bodged in Linux.
> Ack. Patchset v8 addresses Marc's feedback by using the func_id detect
> SMC64 instead.>
> > Setting that aside, I'm still not sure I follow this part of your check:
> >
> > if (... && ARM_SMCCC_IS_64(res->a0))
> >
> > won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
> >
> > FFA_SUCCESS64, is used only if any result register encodes a 64-bit
> > parameter.
> >
> > which doesn't really seem related to whether or not the initial call
> > was using SMC32 or SMC64. What am I missing?I agree that we cannot use res->a0 to distinguish SMC32/SMC64 for the
> reason you stated.
I don't think using the function-id of the original call works
correctly in this context though. If you look at
drivers/firmware/arm_ffa/driver.c:ffa_msg_send_direct_req2 it has the
same problem as the FFA_MSG_WAIT example in your comment. In the
simple case it will use FFA_MSG_SEND_DIRECT_REQ2 for the call and
FFA_MSG_SEND_DIRECT_RESP2 for the response, both 64 bit opcodes, and
either approach here will have the same correct result. However if
FFA_MSG_SEND_DIRECT_REQ2 responds with FFA_INTERRUPT or FFA_YIELD,
then the driver will resume the call with FFA_RUN, a 32 bit opcode,
and still expect a 64 bit FFA_MSG_SEND_DIRECT_RESP2 response with a
full response in x4-17. If you use ARM_SMCCC_IS_64(func_id) here
instead of ARM_SMCCC_IS_64(res->a0), then the part of response in
x8-x17 will be lost.
The FF-A 1.3 ALP2 fixes this by adding a 64 bit FF-A run opcode, but
at the current patchstack only adds ff-a 1.2 support and the linux
ff-a driver does not yet support the new 1.3 ALP2 call flow either so
I think the current v7 patch here is the best option for now.
--
Arve Hjønnevåg
On 7/21/25 4:01 AM, Arve Hjønnevåg wrote:
> On Fri, Jul 18, 2025 at 10:54 PM Per Larsen <perl@immunant.com> wrote:
>>
>> On 7/18/25 6:37 AM, Will Deacon wrote:
>>> On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
>>>> On 7/3/25 5:38 AM, Marc Zyngier wrote:
>>>>> On Tue, 01 Jul 2025 23:06:35 +0100,
>>>>> Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
>>>>>> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>>>> index 2c199d40811efb5bfae199c4a67d8ae3d9307357..65d241ba32403d014b43cc4ef4d5bf9693813809 100644
>>>>>> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>>>> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
>>>>>> @@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
>>>>>> static bool has_version_negotiated;
>>>>>> static hyp_spinlock_t version_lock;
>>>>>> -static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
>>>>>> +static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
>>>>>> {
>>>>>> - *res = (struct arm_smccc_res) {
>>>>>> + *res = (struct arm_smccc_1_2_regs) {
>>>>>> .a0 = FFA_ERROR,
>>>>>> .a2 = ffa_errno,
>>>>>> };
>>>>>> }
>>>>>> -static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
>>>>>> +static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
>>>>>> {
>>>>>> if (ret == FFA_RET_SUCCESS) {
>>>>>> - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
>>>>>> - .a2 = prop };
>>>>>> + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
>>>>>> + .a2 = prop };
>>>>>> } else {
>>>>>> ffa_to_smccc_error(res, ret);
>>>>>> }
>>>>>> }
>>>>>> -static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
>>>>>> +static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
>>>>>> {
>>>>>> ffa_to_smccc_res_prop(res, ret, 0);
>>>>>> }
>>>>>> static void ffa_set_retval(struct kvm_cpu_context *ctxt,
>>>>>> - struct arm_smccc_res *res)
>>>>>> + struct arm_smccc_1_2_regs *res)
>>>>>> {
>>>>>> + DECLARE_REG(u64, func_id, ctxt, 0);
>>>>>> cpu_reg(ctxt, 0) = res->a0;
>>>>>> cpu_reg(ctxt, 1) = res->a1;
>>>>>> cpu_reg(ctxt, 2) = res->a2;
>>>>>> cpu_reg(ctxt, 3) = res->a3;
>>>>>> + cpu_reg(ctxt, 4) = res->a4;
>>>>>> + cpu_reg(ctxt, 5) = res->a5;
>>>>>> + cpu_reg(ctxt, 6) = res->a6;
>>>>>> + cpu_reg(ctxt, 7) = res->a7;
>>>>>
>>>>> From DEN0028G 2.6:
>>>>>
>>>>> <quote>
>>>>> Registers W4-W7 must be preserved unless they contain results, as
>>>>> specified in the function definition.
>>>>> </quote>
>>>>>
>>>>> On what grounds can you blindly change these registers?
>>>> From DEN0077A 1.2 Section 11.2: Reserved parameter convention
>>>>
>>>> <quote>
>>>> Unused parameter registers in FF-A ABIs are reserved for future use by the
>>>> Framework.
>>>>
>>>> [...]
>>>>
>>>> The caller is expected to write zeroes to these registers. The callee
>>>> ignores the values in these registers.
>>>> </quote>
>>>>
>>>> My read is that, as long as we are writing zeroes into reserved registers
>>>> (which I believe we are), we comply with DEN0026G 2.6.>
>>>
>>> Right, the specs make this far more difficult to decipher than necessary
>>> but I think SMCCC defers to the definition of the specific call being
>>> made and then FF-A is basically saying that unused argument registers
>>> are always zeroed.
>>>
>>> Rather than have the EL2 proxy treat each call differently based on the
>>> used argument registers, we can rely on EL3 doing the right thing and
>>> blindly copy everything back, which is what you've done. So I think
>>> that's ok.
>>>
>>>>>> +
>>>>>> + /*
>>>>>> + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
>>>>>> + *
>>>>>> + * The most straightforward approach is to look at the function ID
>>>>>> + * sent by the caller. However, the caller could send FFA_MSG_WAIT
>>>>>> + * which is a 32-bit interface but the reply could very well be 64-bit
>>>>>> + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
>>>>>> + *
>>>>>> + * Instead, we could look at the function ID in the response (a0) but
>>>>>> + * that doesn't work either as FFA_VERSION responses put the version
>>>>>> + * number (or error code) in w0.
>>>>>> + *
>>>>>> + * Set x8-x17 iff response contains 64-bit function ID in a0.
>>>>>> + */
>>>>>> + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
>>>>>> + cpu_reg(ctxt, 8) = res->a8;
>>>>>> + cpu_reg(ctxt, 9) = res->a9;
>>>>>> + cpu_reg(ctxt, 10) = res->a10;
>>>>>> + cpu_reg(ctxt, 11) = res->a11;
>>>>>> + cpu_reg(ctxt, 12) = res->a12;
>>>>>> + cpu_reg(ctxt, 13) = res->a13;
>>>>>> + cpu_reg(ctxt, 14) = res->a14;
>>>>>> + cpu_reg(ctxt, 15) = res->a15;
>>>>>> + cpu_reg(ctxt, 16) = res->a16;
>>>>>> + cpu_reg(ctxt, 17) = res->a17;
>>>>>> + }
>>>>>> }
>>>>>
>>>>> I don't see how that can ever work.
>>>>>
>>>>> Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
>>>>> anything in the spec that supports the above), the requester will
>>>>> fully expect its registers to be preserved based on the initial
>>>>> function type, and that alone. No ifs, no buts.
>>>>>
>>>>> If what you describe can happen (please provide a convincing example),
>>>>> then the spec is doomed.
>>>>
>>>> DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
>>>> for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
>>>> transitions between states "waiting", "blocked", "running", and "preempted".
>>>> Key to my understanding is that the waiting state in Figure 8.1 and Figure
>>>> 8.2 is the exact same state. This appears to be the case per Section 4.10.
>>>>
>>>> So we have to consider the ways to get in and out of the waiting state by
>>>> joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
>>>> between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
>>>> edge between "waiting" and "running" caused by a "Direct request ABI".
>>>>
>>>> Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
>>>> 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
>>>> be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
>>>
>>> That seems bonkers to me and I agree with Marc's assessment above. The
>>> SMCCC is crystal clear that a caller using the SM32/HVC32 calling
>>> convention can rely on x8-x30 (as well as the stack pointers) being
>>> preserved. If FF-A has a problem with that then it needs to be fixed
>>> there and not bodged in Linux.
>> Ack. Patchset v8 addresses Marc's feedback by using the func_id detect
>> SMC64 instead.>
>>> Setting that aside, I'm still not sure I follow this part of your check:
>>>
>>> if (... && ARM_SMCCC_IS_64(res->a0))
>>>
>>> won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
>>>
>>> FFA_SUCCESS64, is used only if any result register encodes a 64-bit
>>> parameter.
>>>
>>> which doesn't really seem related to whether or not the initial call
>>> was using SMC32 or SMC64. What am I missing?I agree that we cannot use res->a0 to distinguish SMC32/SMC64 for the
>> reason you stated.
>
> I don't think using the function-id of the original call works
> correctly in this context though. If you look at
> drivers/firmware/arm_ffa/driver.c:ffa_msg_send_direct_req2 it has the
> same problem as the FFA_MSG_WAIT example in your comment. In the
> simple case it will use FFA_MSG_SEND_DIRECT_REQ2 for the call and
> FFA_MSG_SEND_DIRECT_RESP2 for the response, both 64 bit opcodes, and
> either approach here will have the same correct result. However if
> FFA_MSG_SEND_DIRECT_REQ2 responds with FFA_INTERRUPT or FFA_YIELD,
> then the driver will resume the call with FFA_RUN, a 32 bit opcode,
> and still expect a 64 bit FFA_MSG_SEND_DIRECT_RESP2 response with a
> full response in x4-17. If you use ARM_SMCCC_IS_64(func_id) here
> instead of ARM_SMCCC_IS_64(res->a0), then the part of response in
> x8-x17 will be lost.
>
> The FF-A 1.3 ALP2 fixes this by adding a 64 bit FF-A run opcode, but
> at the current patchstack only adds ff-a 1.2 support and the linux
> ff-a driver does not yet support the new 1.3 ALP2 call flow either so
> I think the current v7 patch here is the best option for now.
>
FFA_RUN is passed through to EL3 by kvm_host_ffa_handler so I'm not sure
there is a code path where func_id == FFA_RUN in set_ffa_retval.
[Sudeep & Mark to To:]
On Mon, Jul 21, 2025 at 05:20:01PM -0700, Per Larsen wrote:
> On 7/21/25 4:01 AM, Arve Hjønnevåg wrote:
> > On Fri, Jul 18, 2025 at 10:54 PM Per Larsen <perl@immunant.com> wrote:
> > > On 7/18/25 6:37 AM, Will Deacon wrote:
> > > > On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
> > > > > On 7/3/25 5:38 AM, Marc Zyngier wrote:
> > > > > > On Tue, 01 Jul 2025 23:06:35 +0100,
> > > > > > Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
> > > > > > > + /*
> > > > > > > + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
> > > > > > > + *
> > > > > > > + * The most straightforward approach is to look at the function ID
> > > > > > > + * sent by the caller. However, the caller could send FFA_MSG_WAIT
> > > > > > > + * which is a 32-bit interface but the reply could very well be 64-bit
> > > > > > > + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
> > > > > > > + *
> > > > > > > + * Instead, we could look at the function ID in the response (a0) but
> > > > > > > + * that doesn't work either as FFA_VERSION responses put the version
> > > > > > > + * number (or error code) in w0.
> > > > > > > + *
> > > > > > > + * Set x8-x17 iff response contains 64-bit function ID in a0.
> > > > > > > + */
> > > > > > > + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
> > > > > > > + cpu_reg(ctxt, 8) = res->a8;
> > > > > > > + cpu_reg(ctxt, 9) = res->a9;
> > > > > > > + cpu_reg(ctxt, 10) = res->a10;
> > > > > > > + cpu_reg(ctxt, 11) = res->a11;
> > > > > > > + cpu_reg(ctxt, 12) = res->a12;
> > > > > > > + cpu_reg(ctxt, 13) = res->a13;
> > > > > > > + cpu_reg(ctxt, 14) = res->a14;
> > > > > > > + cpu_reg(ctxt, 15) = res->a15;
> > > > > > > + cpu_reg(ctxt, 16) = res->a16;
> > > > > > > + cpu_reg(ctxt, 17) = res->a17;
> > > > > > > + }
> > > > > > > }
> > > > > >
> > > > > > I don't see how that can ever work.
> > > > > >
> > > > > > Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
> > > > > > anything in the spec that supports the above), the requester will
> > > > > > fully expect its registers to be preserved based on the initial
> > > > > > function type, and that alone. No ifs, no buts.
> > > > > >
> > > > > > If what you describe can happen (please provide a convincing example),
> > > > > > then the spec is doomed.
> > > > >
> > > > > DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
> > > > > for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
> > > > > transitions between states "waiting", "blocked", "running", and "preempted".
> > > > > Key to my understanding is that the waiting state in Figure 8.1 and Figure
> > > > > 8.2 is the exact same state. This appears to be the case per Section 4.10.
> > > > >
> > > > > So we have to consider the ways to get in and out of the waiting state by
> > > > > joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
> > > > > between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
> > > > > edge between "waiting" and "running" caused by a "Direct request ABI".
> > > > >
> > > > > Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
> > > > > 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
> > > > > be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
> > > >
> > > > That seems bonkers to me and I agree with Marc's assessment above. The
> > > > SMCCC is crystal clear that a caller using the SM32/HVC32 calling
> > > > convention can rely on x8-x30 (as well as the stack pointers) being
> > > > preserved. If FF-A has a problem with that then it needs to be fixed
> > > > there and not bodged in Linux.
> > > Ack. Patchset v8 addresses Marc's feedback by using the func_id detect
> > > SMC64 instead.>
> > > > Setting that aside, I'm still not sure I follow this part of your check:
> > > >
> > > > if (... && ARM_SMCCC_IS_64(res->a0))
> > > >
> > > > won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
> > > >
> > > > FFA_SUCCESS64, is used only if any result register encodes a 64-bit
> > > > parameter.
> > > >
> > > > which doesn't really seem related to whether or not the initial call
> > > > was using SMC32 or SMC64. What am I missing?I agree that we cannot use res->a0 to distinguish SMC32/SMC64 for the
> > > reason you stated.
> >
> > I don't think using the function-id of the original call works
> > correctly in this context though. If you look at
> > drivers/firmware/arm_ffa/driver.c:ffa_msg_send_direct_req2 it has the
> > same problem as the FFA_MSG_WAIT example in your comment. In the
> > simple case it will use FFA_MSG_SEND_DIRECT_REQ2 for the call and
> > FFA_MSG_SEND_DIRECT_RESP2 for the response, both 64 bit opcodes, and
> > either approach here will have the same correct result. However if
> > FFA_MSG_SEND_DIRECT_REQ2 responds with FFA_INTERRUPT or FFA_YIELD,
> > then the driver will resume the call with FFA_RUN, a 32 bit opcode,
> > and still expect a 64 bit FFA_MSG_SEND_DIRECT_RESP2 response with a
> > full response in x4-17. If you use ARM_SMCCC_IS_64(func_id) here
> > instead of ARM_SMCCC_IS_64(res->a0), then the part of response in
> > x8-x17 will be lost.
Can't we just update the FF-A driver? This is clearly all the result of
a half-baked spec...
Sudeep -- any chance you can add support for the hot-off-the-press
64-bit FFA_RUN call to the Linux driver, please? Without that, I don't
see how the REQ2/RESP2 calls can work properly.
> > The FF-A 1.3 ALP2 fixes this by adding a 64 bit FF-A run opcode, but
> > at the current patchstack only adds ff-a 1.2 support and the linux
> > ff-a driver does not yet support the new 1.3 ALP2 call flow either so
> > I think the current v7 patch here is the best option for now.
> >
> FFA_RUN is passed through to EL3 by kvm_host_ffa_handler so I'm not sure
> there is a code path where func_id == FFA_RUN in set_ffa_retval.
That's actually a really interesting point...
The passthrough code in __kvm_hyp_host_forward_smc() assumes SMC64 and
populates/reloads X0-X17 across the SMC. If the firmware replies to an
SMC32 call with an SMC64, then so be it, but it's not the hypervisor's
job to fix that up and the same problem would presumably happen even if
the hypervisor wasn't present.
So perhaps there's an argument that the proxy should be consistent with
that behaviour, otherwise we end up with different semantics depending
on whether we chose to proxy the call or pass it through. That would
mean always populating X8-X17 in the return value, regardless of the
function ID or anything else.
The spec should still be fixed because the current wording makes very
little sense in this area, but in the meantime maybe it's best just to
pass the hot potato between the host and the firmware rather than try to
fix it up ourselves.
Marc -- what do you think? We're damned if we do, damned if we don't,
but there's something to be said for being consistent when we get into
this messy situation.
Will
On Tue, Jul 22, 2025 at 04:55:31PM +0100, Will Deacon wrote:
> [Sudeep & Mark to To:]
>
> On Mon, Jul 21, 2025 at 05:20:01PM -0700, Per Larsen wrote:
> > On 7/21/25 4:01 AM, Arve Hjønnevåg wrote:
> > > On Fri, Jul 18, 2025 at 10:54 PM Per Larsen <perl@immunant.com> wrote:
> > > > On 7/18/25 6:37 AM, Will Deacon wrote:
> > > > > On Mon, Jul 07, 2025 at 05:06:23PM -0700, Per Larsen wrote:
> > > > > > On 7/3/25 5:38 AM, Marc Zyngier wrote:
> > > > > > > On Tue, 01 Jul 2025 23:06:35 +0100,
> > > > > > > Per Larsen via B4 Relay <devnull+perlarsen.google.com@kernel.org> wrote:
> > > > > > > > + /*
> > > > > > > > + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
> > > > > > > > + *
> > > > > > > > + * The most straightforward approach is to look at the function ID
> > > > > > > > + * sent by the caller. However, the caller could send FFA_MSG_WAIT
> > > > > > > > + * which is a 32-bit interface but the reply could very well be 64-bit
> > > > > > > > + * such as FFA_FN64_MSG_SEND_DIRECT_REQ or FFA_MSG_SEND_DIRECT_REQ2.
> > > > > > > > + *
> > > > > > > > + * Instead, we could look at the function ID in the response (a0) but
> > > > > > > > + * that doesn't work either as FFA_VERSION responses put the version
> > > > > > > > + * number (or error code) in w0.
> > > > > > > > + *
> > > > > > > > + * Set x8-x17 iff response contains 64-bit function ID in a0.
> > > > > > > > + */
> > > > > > > > + if (func_id != FFA_VERSION && ARM_SMCCC_IS_64(res->a0)) {
> > > > > > > > + cpu_reg(ctxt, 8) = res->a8;
> > > > > > > > + cpu_reg(ctxt, 9) = res->a9;
> > > > > > > > + cpu_reg(ctxt, 10) = res->a10;
> > > > > > > > + cpu_reg(ctxt, 11) = res->a11;
> > > > > > > > + cpu_reg(ctxt, 12) = res->a12;
> > > > > > > > + cpu_reg(ctxt, 13) = res->a13;
> > > > > > > > + cpu_reg(ctxt, 14) = res->a14;
> > > > > > > > + cpu_reg(ctxt, 15) = res->a15;
> > > > > > > > + cpu_reg(ctxt, 16) = res->a16;
> > > > > > > > + cpu_reg(ctxt, 17) = res->a17;
> > > > > > > > + }
> > > > > > > > }
> > > > > > >
> > > > > > > I don't see how that can ever work.
> > > > > > >
> > > > > > > Irrespective of how FFA_MSG_WAIT actually works (and I couldn't find
> > > > > > > anything in the spec that supports the above), the requester will
> > > > > > > fully expect its registers to be preserved based on the initial
> > > > > > > function type, and that alone. No ifs, no buts.
> > > > > > >
> > > > > > > If what you describe can happen (please provide a convincing example),
> > > > > > > then the spec is doomed.
> > > > > >
> > > > > > DEN0077A 1.2 Section 8.2 (Runtime Model for FFA_RUN) and 8.3 (Runtime Model
> > > > > > for Direct request ABIs) contains Figures 8.1 and 8.2. Each figure shows
> > > > > > transitions between states "waiting", "blocked", "running", and "preempted".
> > > > > > Key to my understanding is that the waiting state in Figure 8.1 and Figure
> > > > > > 8.2 is the exact same state. This appears to be the case per Section 4.10.
> > > > > >
> > > > > > So we have to consider the ways to get in and out of the waiting state by
> > > > > > joining the state machines in Figures 8.1 and 8.2. Figure 8.1 has an edge
> > > > > > between "running" and "waiting" caused by FFA_MSG_WAIT. Figure 8.2 has an
> > > > > > edge between "waiting" and "running" caused by a "Direct request ABI".
> > > > > >
> > > > > > Direct request ABIs include FFA_MSG_SEND_DIRECT_REQ2 which is why the FF-A
> > > > > > 1.2 spec, in my read, permits the response to a 32-bit FFA_MSG_WAIT call can
> > > > > > be a 64-bit FFA_MSG_SEND_DIRECT_REQ2 reply.
> > > > >
> > > > > That seems bonkers to me and I agree with Marc's assessment above. The
> > > > > SMCCC is crystal clear that a caller using the SM32/HVC32 calling
> > > > > convention can rely on x8-x30 (as well as the stack pointers) being
> > > > > preserved. If FF-A has a problem with that then it needs to be fixed
> > > > > there and not bodged in Linux.
> > > > Ack. Patchset v8 addresses Marc's feedback by using the func_id detect
> > > > SMC64 instead.>
> > > > > Setting that aside, I'm still not sure I follow this part of your check:
> > > > >
> > > > > if (... && ARM_SMCCC_IS_64(res->a0))
> > > > >
> > > > > won't res->a0 contain something like FFA_SUCCESS? The FFA spec states:
> > > > >
> > > > > FFA_SUCCESS64, is used only if any result register encodes a 64-bit
> > > > > parameter.
> > > > >
> > > > > which doesn't really seem related to whether or not the initial call
> > > > > was using SMC32 or SMC64. What am I missing?I agree that we cannot use res->a0 to distinguish SMC32/SMC64 for the
> > > > reason you stated.
> > >
> > > I don't think using the function-id of the original call works
> > > correctly in this context though. If you look at
> > > drivers/firmware/arm_ffa/driver.c:ffa_msg_send_direct_req2 it has the
> > > same problem as the FFA_MSG_WAIT example in your comment. In the
> > > simple case it will use FFA_MSG_SEND_DIRECT_REQ2 for the call and
> > > FFA_MSG_SEND_DIRECT_RESP2 for the response, both 64 bit opcodes, and
> > > either approach here will have the same correct result. However if
> > > FFA_MSG_SEND_DIRECT_REQ2 responds with FFA_INTERRUPT or FFA_YIELD,
> > > then the driver will resume the call with FFA_RUN, a 32 bit opcode,
> > > and still expect a 64 bit FFA_MSG_SEND_DIRECT_RESP2 response with a
> > > full response in x4-17. If you use ARM_SMCCC_IS_64(func_id) here
> > > instead of ARM_SMCCC_IS_64(res->a0), then the part of response in
> > > x8-x17 will be lost.
>
> Can't we just update the FF-A driver? This is clearly all the result of
> a half-baked spec...
>
> Sudeep -- any chance you can add support for the hot-off-the-press
> 64-bit FFA_RUN call to the Linux driver, please? Without that, I don't
> see how the REQ2/RESP2 calls can work properly.
>
Apologies for the delay in following up on this thread. Yes, we can certainly
add the 64-bit FFA_RUN, but we'll need to align with the spec authors first.
I'll follow up internally to ensure there's no conflict between how it's
defined in the spec and how it's used in the kernel.
It looks like the change in the spec is already WIP which I was made aware
just now, so I don't see any issue updating the driver.
--
Regards,
Sudeep
© 2016 - 2025 Red Hat, Inc.