From nobody Tue Sep 9 21:22:24 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C594CC64EC7 for ; Tue, 28 Feb 2023 09:42:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230314AbjB1Jmf (ORCPT ); Tue, 28 Feb 2023 04:42:35 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59434 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230421AbjB1Jmb (ORCPT ); Tue, 28 Feb 2023 04:42:31 -0500 Received: from mail-pl1-x633.google.com (mail-pl1-x633.google.com [IPv6:2607:f8b0:4864:20::633]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 46DDC2D159 for ; Tue, 28 Feb 2023 01:42:27 -0800 (PST) Received: by mail-pl1-x633.google.com with SMTP id i10so9754952plr.9 for ; Tue, 28 Feb 2023 01:42:27 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance.com; s=google; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=2hJ42EGznmmKdit+DkkUJ/Qt7N29KFOU8Z1Bu00RmiQ=; b=aNO16KpcRslYIothZ5ipnOru7C83teAcH/Izpsl5+VmpmgqL6Aro+jC4UBMPVIl8gg H8VOlspRP99FW5HRqlnu2KR/cRKQ2bu6V9NWQrUPmHDHf4W+swM9rDCCBxq4a6yfVp7y gexE/286EPl6w8XSysh5hwFI6x/ddWDN6UEUg1LgAOqMAnbo6JuUJr4lTgNy89NaxFt+ JbD0sjqk6DOAMecWDqpqK8fGL27nbwpF4vHpArxc1h5oQ36e8sizg9Ik+FHzmCzW7h7Y CCnAm4KnbAosnfpz0YrdB5vc8o1i4M8D/tCbvxQZIjcqNL6gJ5tkuw9lGquVDDtjzYwT +tOg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=2hJ42EGznmmKdit+DkkUJ/Qt7N29KFOU8Z1Bu00RmiQ=; b=GHhZ8y6P7wbqpyxNHSfnoA5gOk/N3GmjqwcaTEhbbuGZv4jYn4Q3XsE7wgDq0nwirY WOR8fziVyLjYiO/nDGhj7I+aYvj4lQ5utivrYTSLoMdFyF4tA+zsSgxGWjbrtIDrJyLO l1vpgTq6TGnJqlIojJ4LIc05t5NSnXrUkr2pquSi1Ngfo/c+MIyp7UMNjkcCLlVESDuM h+TKdfZvDHS9PSfoIjGx1qfi3NdtGWzwCWtIUXabGtEx4D/MkNP233g4/9k0grrHfgwh 7t928kepQi9xnp5d7jz3Fr4MgfhXXAIioEhLEYSRQA+LYQuA5f3aoxQt5nm9tiWv6v6i rfOw== X-Gm-Message-State: AO0yUKVOZryQlUSq9OrdS9w9ZAluoXDPVlRqetwuBOY3vKWRI+K20Vst ajeTwyE3z1R5ipytboAzPtlj X-Google-Smtp-Source: AK7set+TPNFC/xncKPCXhfN++ABlTZBeiZtGV3YKs6GK4wnSj6THLwqFKjksu7//B6tGl/kOzqVSJA== X-Received: by 2002:a17:903:2290:b0:19b:33c0:4097 with SMTP id b16-20020a170903229000b0019b33c04097mr2524721plh.27.1677577346715; Tue, 28 Feb 2023 01:42:26 -0800 (PST) Received: from localhost ([139.177.225.255]) by smtp.gmail.com with ESMTPSA id jj13-20020a170903048d00b001967692d6f5sm6054858plb.227.2023.02.28.01.42.25 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 28 Feb 2023 01:42:26 -0800 (PST) From: Xie Yongji To: mst@redhat.com, jasowang@redhat.com, tglx@linutronix.de, hch@lst.de Cc: virtualization@lists.linux-foundation.org, linux-kernel@vger.kernel.org Subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity Date: Tue, 28 Feb 2023 17:41:04 +0800 Message-Id: <20230228094110.37-6-xieyongji@bytedance.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20230228094110.37-1-xieyongji@bytedance.com> References: <20230228094110.37-1-xieyongji@bytedance.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" This brings current interrupt affinity spreading mechanism to vduse device. We will make use of group_cpus_evenly() to create an irq callback affinity mask for each virtqueue of vduse device. Then we will spread IRQs between CPUs in the affinity mask, in a round-robin manner, to run the irq callback. Signed-off-by: Xie Yongji --- drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vd= use_dev.c index 98359d87a06f..bde28a8692d5 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -41,6 +43,8 @@ #define VDUSE_IOVA_SIZE (128 * 1024 * 1024) #define VDUSE_MSG_DEFAULT_TIMEOUT 30 =20 +#define IRQ_UNBOUND -1 + struct vduse_virtqueue { u16 index; u16 num_max; @@ -57,6 +61,8 @@ struct vduse_virtqueue { struct vdpa_callback cb; struct work_struct inject; struct work_struct kick; + int irq_effective_cpu; + struct cpumask irq_affinity; }; =20 struct vduse_dev; @@ -128,6 +134,7 @@ static struct class *vduse_class; static struct cdev vduse_ctrl_cdev; static struct cdev vduse_cdev; static struct workqueue_struct *vduse_irq_wq; +static struct workqueue_struct *vduse_irq_bound_wq; =20 static u32 allowed_device_id[] =3D { VIRTIO_ID_BLOCK, @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_devic= e *vdpa) return dev->generation; } =20 +static void default_calc_sets(struct irq_affinity *affd, unsigned int affv= ecs) +{ + affd->nr_sets =3D 1; + affd->set_size[0] =3D affvecs; +} + +struct cpumask * +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) +{ + unsigned int affvecs =3D 0, curvec, usedvecs, i; + struct cpumask *masks =3D NULL; + + if (nvecs > affd->pre_vectors + affd->post_vectors) + affvecs =3D nvecs - affd->pre_vectors - affd->post_vectors; + + if (!affd->calc_sets) + affd->calc_sets =3D default_calc_sets; + + affd->calc_sets(affd, affvecs); + + if (!affvecs) + return NULL; + + masks =3D kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); + if (!masks) + return NULL; + + /* Fill out vectors at the beginning that don't need affinity */ + for (curvec =3D 0; curvec < affd->pre_vectors; curvec++) + cpumask_setall(&masks[curvec]); + + for (i =3D 0, usedvecs =3D 0; i < affd->nr_sets; i++) { + unsigned int this_vecs =3D affd->set_size[i]; + int j; + struct cpumask *result =3D group_cpus_evenly(this_vecs); + + if (!result) { + kfree(masks); + return NULL; + } + + for (j =3D 0; j < this_vecs; j++) + cpumask_copy(&masks[curvec + j], &result[j]); + kfree(result); + + curvec +=3D this_vecs; + usedvecs +=3D this_vecs; + } + + /* Fill out vectors at the end that don't need affinity */ + if (usedvecs >=3D affvecs) + curvec =3D affd->pre_vectors + affvecs; + else + curvec =3D affd->pre_vectors + usedvecs; + for (; curvec < nvecs; curvec++) + cpumask_setall(&masks[curvec]); + + return masks; +} + +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa, + struct irq_affinity *desc) +{ + struct vduse_dev *dev =3D vdpa_to_vduse(vdpa); + struct cpumask *masks; + int i; + + masks =3D create_affinity_masks(dev->vq_num, desc); + if (!masks) + return; + + for (i =3D 0; i < dev->vq_num; i++) + cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]); + kfree(masks); +} + static int vduse_vdpa_set_map(struct vdpa_device *vdpa, unsigned int asid, struct vhost_iotlb *iotlb) @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_o= ps =3D { .get_config =3D vduse_vdpa_get_config, .set_config =3D vduse_vdpa_set_config, .get_generation =3D vduse_vdpa_get_generation, + .set_irq_affinity =3D vduse_vdpa_set_irq_affinity, .reset =3D vduse_vdpa_reset, .set_map =3D vduse_vdpa_set_map, .free =3D vduse_vdpa_free, @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *wo= rk) } =20 static int vduse_dev_queue_irq_work(struct vduse_dev *dev, - struct work_struct *irq_work) + struct work_struct *irq_work, + int irq_effective_cpu) { int ret =3D -EINVAL; =20 @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev = *dev, goto unlock; =20 ret =3D 0; - queue_work(vduse_irq_wq, irq_work); + if (irq_effective_cpu =3D=3D IRQ_UNBOUND) + queue_work(vduse_irq_wq, irq_work); + else + queue_work_on(irq_effective_cpu, + vduse_irq_bound_wq, irq_work); unlock: up_read(&dev->rwsem); =20 @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev, return ret; } =20 +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq) +{ + int curr_cpu =3D vq->irq_effective_cpu; + + while (true) { + curr_cpu =3D cpumask_next(curr_cpu, &vq->irq_affinity); + if (cpu_online(curr_cpu)) + break; + + if (curr_cpu >=3D nr_cpu_ids) + curr_cpu =3D -1; + } + + vq->irq_effective_cpu =3D curr_cpu; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsign= ed int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: - ret =3D vduse_dev_queue_irq_work(dev, &dev->inject); + ret =3D vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND); break; case VDUSE_VQ_SETUP: { struct vduse_vq_config config; @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsig= ned int cmd, break; =20 index =3D array_index_nospec(index, dev->vq_num); - ret =3D vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject); + + vduse_vq_update_effective_cpu(dev->vqs[index]); + ret =3D vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject, + dev->vqs[index]->irq_effective_cpu); break; } case VDUSE_IOTLB_REG_UMEM: { @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev= , u32 vq_align, u32 vq_num) goto err; =20 dev->vqs[i]->index =3D i; + dev->vqs[i]->irq_effective_cpu =3D -1; INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); spin_lock_init(&dev->vqs[i]->kick_lock); spin_lock_init(&dev->vqs[i]->irq_lock); + cpumask_setall(&dev->vqs[i]->irq_affinity); } =20 return 0; @@ -1858,12 +1968,15 @@ static int vduse_init(void) if (ret) goto err_cdev; =20 + ret =3D -ENOMEM; vduse_irq_wq =3D alloc_workqueue("vduse-irq", WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); - if (!vduse_irq_wq) { - ret =3D -ENOMEM; + if (!vduse_irq_wq) goto err_wq; - } + + vduse_irq_bound_wq =3D alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0); + if (!vduse_irq_bound_wq) + goto err_bound_wq; =20 ret =3D vduse_domain_init(); if (ret) @@ -1877,6 +1990,8 @@ static int vduse_init(void) err_mgmtdev: vduse_domain_exit(); err_domain: + destroy_workqueue(vduse_irq_bound_wq); +err_bound_wq: destroy_workqueue(vduse_irq_wq); err_wq: cdev_del(&vduse_cdev); @@ -1896,6 +2011,7 @@ static void vduse_exit(void) { vduse_mgmtdev_exit(); vduse_domain_exit(); + destroy_workqueue(vduse_irq_bound_wq); destroy_workqueue(vduse_irq_wq); cdev_del(&vduse_cdev); device_destroy(vduse_class, vduse_major); --=20 2.20.1