Patch 1: This patch adds map/unmap ioctls which map the adapter set
indicator pages so the pages can be accessed when interrupts are
disabled. The mappings are cleaned up when the guest is removed.
Fencing of Fast Inject in Secure Execution environments is enabled in
this patch by not mapping adapter indicator pages. In Secure Execution
environments the path of execution available before this patch is followed.
Statistical counters to count map/unmap functions for adapter indicator
pages are added in this patch. The counters can be used to analyze
map/unmap functions in non-Secure Execution environments and similarly
can be used to analyze Secure Execution environments where the counters
should not be incremented as the adapter indicator pages are not mapped.
Signed-off-by: Douglas Freimuth <freimuth@linux.ibm.com>
---
arch/s390/include/asm/kvm_host.h | 5 ++
arch/s390/kvm/interrupt.c | 143 +++++++++++++++++++++++++------
arch/s390/kvm/kvm-s390.c | 2 +
3 files changed, 124 insertions(+), 26 deletions(-)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 64a50f0862aa..616be8ca4614 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -448,6 +448,8 @@ struct kvm_vcpu_arch {
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
u64 inject_io;
+ u64 io_390_adapter_map;
+ u64 io_390_adapter_unmap;
u64 inject_float_mchk;
u64 inject_pfault_done;
u64 inject_service_signal;
@@ -479,6 +481,9 @@ struct s390_io_adapter {
bool masked;
bool swap;
bool suppressible;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ unsigned int nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 18932a65ca68..cafc03e20f8f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2426,6 +2426,9 @@ static int register_io_adapter(struct kvm_device *dev,
if (!adapter)
return -ENOMEM;
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ adapter->nr_maps = 0;
adapter->id = adapter_info.id;
adapter->isc = adapter_info.isc;
adapter->maskable = adapter_info.maskable;
@@ -2450,12 +2453,103 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
return ret;
}
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
+{
+ struct mm_struct *mm = kvm->mm;
+ struct page *page = NULL;
+ int locked = 1;
+
+ if (mmget_not_zero(mm)) {
+ mmap_read_lock(mm);
+ get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
+ &page, &locked);
+ if (locked)
+ mmap_read_unlock(mm);
+ mmput(mm);
+ }
+
+ return page;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ unsigned long flags;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc_obj(*map, GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+
+ map->page = get_map_page(kvm, addr);
+ if (!map->page) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = addr;
+ down_write(&adapter->maps_lock);
+ if (adapter->nr_maps++ < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ adapter->nr_maps--;
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -ENOENT;
+}
+
void kvm_s390_destroy_adapters(struct kvm *kvm)
{
int i;
+ struct s390_map_info *map, *tmp;
- for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
kfree(kvm->arch.adapters[i]);
+ }
}
static int modify_io_adapter(struct kvm_device *dev,
@@ -2463,7 +2557,8 @@ static int modify_io_adapter(struct kvm_device *dev,
{
struct kvm_s390_io_adapter_req req;
struct s390_io_adapter *adapter;
- int ret;
+ __u64 host_addr;
+ int ret, idx;
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
return -EFAULT;
@@ -2477,14 +2572,28 @@ static int modify_io_adapter(struct kvm_device *dev,
if (ret > 0)
ret = 0;
break;
- /*
- * The following operations are no longer needed and therefore no-ops.
- * The gpa to hva translation is done when an IRQ route is set up. The
- * set_irq code uses get_user_pages_remote() to do the actual write.
- */
case KVM_S390_IO_ADAPTER_MAP:
case KVM_S390_IO_ADAPTER_UNMAP:
- ret = 0;
+ mutex_lock(&dev->kvm->lock);
+ if (kvm_s390_pv_is_protected(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ break;
+ }
+ mutex_unlock(&dev->kvm->lock);
+ idx = srcu_read_lock(&dev->kvm->srcu);
+ host_addr = gpa_to_hva(dev->kvm, req.addr);
+ if (kvm_is_error_hva(host_addr)) {
+ srcu_read_unlock(&dev->kvm->srcu, idx);
+ return -EFAULT;
+ }
+ srcu_read_unlock(&dev->kvm->srcu, idx);
+ if (req.type == KVM_S390_IO_ADAPTER_MAP) {
+ dev->kvm->stat.io_390_adapter_map++;
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, host_addr);
+ } else {
+ dev->kvm->stat.io_390_adapter_unmap++;
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, host_addr);
+ }
break;
default:
ret = -EINVAL;
@@ -2727,24 +2836,6 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
-static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
-{
- struct mm_struct *mm = kvm->mm;
- struct page *page = NULL;
- int locked = 1;
-
- if (mmget_not_zero(mm)) {
- mmap_read_lock(mm);
- get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
- &page, &locked);
- if (locked)
- mmap_read_unlock(mm);
- mmput(mm);
- }
-
- return page;
-}
-
static int adapter_indicators_set(struct kvm *kvm,
struct s390_io_adapter *adapter,
struct kvm_s390_adapter_int *adapter_int)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bc7d6fa66eaf..8e6532f55a5a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -68,6 +68,8 @@
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, inject_io),
+ STATS_DESC_COUNTER(VM, io_390_adapter_map),
+ STATS_DESC_COUNTER(VM, io_390_adapter_unmap),
STATS_DESC_COUNTER(VM, inject_float_mchk),
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
--
2.52.0
On 3/7/26 10:04 PM, Douglas Freimuth wrote:
> Patch 1: This patch adds map/unmap ioctls which map the adapter set
This comment applies to all patches in the series:
Drop the Patch #: intro, it would otherwise end up in git history.
Also remove phrases like 'this patch' and switch to imperative phrasing
ref: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
Also +1 to what Sean said, "KVM: s390: Add map..."
> indicator pages so the pages can be accessed when interrupts are
> disabled. The mappings are cleaned up when the guest is removed.
>
> Fencing of Fast Inject in Secure Execution environments is enabled in
This is a bit misleading. You haven't implemented kvm_ach_set_irq_inatomic at this point, so this patch can't be fencing it yet.
What you are doing is fencing the map/unmap ioctls so that you avoid the longterm pin in SE. I'd focus on that part here.
And then in patch 3, when you enable the feature, you can point out that it is fenced in SE because the mappings will never be available.
> this patch by not mapping adapter indicator pages. In Secure Execution
> environments the path of execution available before this patch is followed.
> Statistical counters to count map/unmap functions for adapter indicator
> pages are added in this patch. The counters can be used to analyze
> map/unmap functions in non-Secure Execution environments and similarly
> can be used to analyze Secure Execution environments where the counters
> should not be incremented as the adapter indicator pages are not mapped.
>
> Signed-off-by: Douglas Freimuth <freimuth@linux.ibm.com>
> @@ -2477,14 +2572,28 @@ static int modify_io_adapter(struct kvm_device *dev,
> if (ret > 0)
> ret = 0;
> break;
> - /*
> - * The following operations are no longer needed and therefore no-ops.
> - * The gpa to hva translation is done when an IRQ route is set up. The
> - * set_irq code uses get_user_pages_remote() to do the actual write.
> - */
> case KVM_S390_IO_ADAPTER_MAP:
> case KVM_S390_IO_ADAPTER_UNMAP:
> - ret = 0;
> + mutex_lock(&dev->kvm->lock);
> + if (kvm_s390_pv_is_protected(dev->kvm)) {
> + mutex_unlock(&dev->kvm->lock);
> + break;
Was mentioned by the kernel bot also -- because you removed the initialization of ret above you will now return something uninitialized if you take this if statement.
Either initialize ret above or return the intended value (0?) here.
Also I think this is worth a comment block above this check stating why you are doing it (e.g. avoid long-term pin for secure exeuction guest)
Please update the shortlogs on the patches to set the scope to: KVM: s390: I had a moment of confusion because I was like "but kvm_arch_set_irq_inatomic() is already a thing!?!?" :-)
Am 08.03.26 um 04:04 schrieb Douglas Freimuth:
> Fencing of Fast Inject in Secure Execution environments is enabled in
> this patch by not mapping adapter indicator pages. In Secure Execution
[...]
> @@ -2477,14 +2572,28 @@ static int modify_io_adapter(struct kvm_device *dev,
> if (ret > 0)
> ret = 0;
> break;
> - /*
> - * The following operations are no longer needed and therefore no-ops.
> - * The gpa to hva translation is done when an IRQ route is set up. The
> - * set_irq code uses get_user_pages_remote() to do the actual write.
> - */
> case KVM_S390_IO_ADAPTER_MAP:
> case KVM_S390_IO_ADAPTER_UNMAP:
> - ret = 0;
> + mutex_lock(&dev->kvm->lock);
> + if (kvm_s390_pv_is_protected(dev->kvm)) {
> + mutex_unlock(&dev->kvm->lock);
> + break;
> + }
I guess this works for a well behaving userspaces, but a bad QEMU could in theory
not do the unmap on switch to secure.
Shall we maybe do -EINVAL on KVM_PV_ENABLE if there are still mapping left, or
to make it easier for userspace remove the old ADAPTER maps?
On 3/9/26 5:27 AM, Christian Borntraeger wrote:
> Am 08.03.26 um 04:04 schrieb Douglas Freimuth:
>> Fencing of Fast Inject in Secure Execution environments is enabled in
>> this patch by not mapping adapter indicator pages. In Secure Execution
> [...]
>
>> @@ -2477,14 +2572,28 @@ static int modify_io_adapter(struct kvm_device
>> *dev,
>> if (ret > 0)
>> ret = 0;
>> break;
>> - /*
>> - * The following operations are no longer needed and therefore
>> no-ops.
>> - * The gpa to hva translation is done when an IRQ route is set
>> up. The
>> - * set_irq code uses get_user_pages_remote() to do the actual write.
>> - */
>> case KVM_S390_IO_ADAPTER_MAP:
>> case KVM_S390_IO_ADAPTER_UNMAP:
>> - ret = 0;
>> + mutex_lock(&dev->kvm->lock);
>> + if (kvm_s390_pv_is_protected(dev->kvm)) {
>> + mutex_unlock(&dev->kvm->lock);
>> + break;
>> + }
>
>
> I guess this works for a well behaving userspaces, but a bad QEMU could
> in theory
> not do the unmap on switch to secure.
> Shall we maybe do -EINVAL on KVM_PV_ENABLE if there are still mapping
> left, or
> to make it easier for userspace remove the old ADAPTER maps?
>
Christian, thank you for your input. For this scenario, I will look into
adding/testing removing the old adapter maps. I will start in
kvm_s390_handle_pv() for CASE KVM_PV_ENABLE and I will essentially use
most of the functionality in kvm_s390_destroy_adapters() where the maps
are deleted if they exist.
Discussion: During development and test I realized it appears a guest
can only change state between non-SE and SE during a reboot. Thus the
unmap and map is called which hits the fencing in the current patch.
Additionally, a more draconian fencing could possibly be done if needed,
by checking for the existence of SE firmware in the CMDLINE and prevent
any mapping from occurring on those systems that support SE.
Am 09.03.26 um 18:12 schrieb Douglas Freimuth:
>
>
> On 3/9/26 5:27 AM, Christian Borntraeger wrote:
>> Am 08.03.26 um 04:04 schrieb Douglas Freimuth:
>>> Fencing of Fast Inject in Secure Execution environments is enabled in
>>> this patch by not mapping adapter indicator pages. In Secure Execution
>> [...]
>>
>>> @@ -2477,14 +2572,28 @@ static int modify_io_adapter(struct kvm_device *dev,
>>> if (ret > 0)
>>> ret = 0;
>>> break;
>>> - /*
>>> - * The following operations are no longer needed and therefore no-ops.
>>> - * The gpa to hva translation is done when an IRQ route is set up. The
>>> - * set_irq code uses get_user_pages_remote() to do the actual write.
>>> - */
>>> case KVM_S390_IO_ADAPTER_MAP:
>>> case KVM_S390_IO_ADAPTER_UNMAP:
>>> - ret = 0;
>>> + mutex_lock(&dev->kvm->lock);
>>> + if (kvm_s390_pv_is_protected(dev->kvm)) {
>>> + mutex_unlock(&dev->kvm->lock);
>>> + break;
>>> + }
>>
>>
>> I guess this works for a well behaving userspaces, but a bad QEMU could in theory
>> not do the unmap on switch to secure.
>> Shall we maybe do -EINVAL on KVM_PV_ENABLE if there are still mapping left, or
>> to make it easier for userspace remove the old ADAPTER maps?
>>
>
> Christian, thank you for your input. For this scenario, I will look into adding/testing removing the old adapter maps. I will start in kvm_s390_handle_pv() for CASE KVM_PV_ENABLE and I will essentially use most of the functionality in kvm_s390_destroy_adapters() where the maps are deleted if they exist.
Right, maybe just add a unmap_all_adapters function and call that.
>
> Discussion: During development and test I realized it appears a guest can only change state between non-SE and SE during a reboot. Thus the unmap and map is called which hits the fencing in the current patch. Additionally, a more draconian fencing could possibly be done if needed, by checking for the existence of SE firmware in the CMDLINE and prevent any mapping from occurring on those systems that support SE.
Yes, diagnose 308 code 10 switches in a reboot like style to secure and diagnose 308 code 3 or 4 switch back to non secure.
Hi Douglas,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v7.0-rc2]
[also build test WARNING on linus/master next-20260306]
[cannot apply to kvms390/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Douglas-Freimuth/Add-map-unmap-ioctl-and-clean-mappings-post-guest/20260308-110653
base: v7.0-rc2
patch link: https://lore.kernel.org/r/20260308030438.88580-2-freimuth%40linux.ibm.com
patch subject: [PATCH v1 1/3] Add map/unmap ioctl and clean mappings post-guest
config: s390-allmodconfig (https://download.01.org/0day-ci/archive/20260308/202603082108.iY5mWhWR-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260308/202603082108.iY5mWhWR-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603082108.iY5mWhWR-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/s390/kvm/interrupt.c:2478:16: warning: unused variable 'flags' [-Wunused-variable]
2478 | unsigned long flags;
| ^~~~~
>> arch/s390/kvm/interrupt.c:2578:7: warning: variable 'ret' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
2578 | if (kvm_s390_pv_is_protected(dev->kvm)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/s390/kvm/interrupt.c:2602:9: note: uninitialized use occurs here
2602 | return ret;
| ^~~
arch/s390/kvm/interrupt.c:2578:3: note: remove the 'if' if its condition is always false
2578 | if (kvm_s390_pv_is_protected(dev->kvm)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2579 | mutex_unlock(&dev->kvm->lock);
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2580 | break;
| ~~~~~~
2581 | }
| ~
arch/s390/kvm/interrupt.c:2561:9: note: initialize the variable 'ret' to silence this warning
2561 | int ret, idx;
| ^
| = 0
2 warnings generated.
vim +2578 arch/s390/kvm/interrupt.c
2554
2555 static int modify_io_adapter(struct kvm_device *dev,
2556 struct kvm_device_attr *attr)
2557 {
2558 struct kvm_s390_io_adapter_req req;
2559 struct s390_io_adapter *adapter;
2560 __u64 host_addr;
2561 int ret, idx;
2562
2563 if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
2564 return -EFAULT;
2565
2566 adapter = get_io_adapter(dev->kvm, req.id);
2567 if (!adapter)
2568 return -EINVAL;
2569 switch (req.type) {
2570 case KVM_S390_IO_ADAPTER_MASK:
2571 ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
2572 if (ret > 0)
2573 ret = 0;
2574 break;
2575 case KVM_S390_IO_ADAPTER_MAP:
2576 case KVM_S390_IO_ADAPTER_UNMAP:
2577 mutex_lock(&dev->kvm->lock);
> 2578 if (kvm_s390_pv_is_protected(dev->kvm)) {
2579 mutex_unlock(&dev->kvm->lock);
2580 break;
2581 }
2582 mutex_unlock(&dev->kvm->lock);
2583 idx = srcu_read_lock(&dev->kvm->srcu);
2584 host_addr = gpa_to_hva(dev->kvm, req.addr);
2585 if (kvm_is_error_hva(host_addr)) {
2586 srcu_read_unlock(&dev->kvm->srcu, idx);
2587 return -EFAULT;
2588 }
2589 srcu_read_unlock(&dev->kvm->srcu, idx);
2590 if (req.type == KVM_S390_IO_ADAPTER_MAP) {
2591 dev->kvm->stat.io_390_adapter_map++;
2592 ret = kvm_s390_adapter_map(dev->kvm, req.id, host_addr);
2593 } else {
2594 dev->kvm->stat.io_390_adapter_unmap++;
2595 ret = kvm_s390_adapter_unmap(dev->kvm, req.id, host_addr);
2596 }
2597 break;
2598 default:
2599 ret = -EINVAL;
2600 }
2601
2602 return ret;
2603 }
2604
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.