1
Based-on: <20250104-reuse-v18-0-c349eafd8673@daynix.com>
2
("[PATCH v18 00/14] hw/pci: SR-IOV related fixes and improvements")
3
1
Introduction
4
Introduction
2
------------
5
------------
3
6
4
This series is based on the RFC series submitted by Yui Washizu[1].
7
This series is based on the RFC series submitted by Yui Washizu[1].
5
See also [2] for the context.
8
See also [2] for the context.
...
...
50
behavior we need for the SR-IOV emulation.
53
behavior we need for the SR-IOV emulation.
51
54
52
Summary
55
Summary
53
-------
56
-------
54
57
55
Patch [1, 5] refactors the PCI infrastructure code.
58
Patch 1 disables ROM BAR, which virtio-net-pci enables by default, for
56
Patch [6, 10] adds user-created SR-IOV VF infrastructure.
59
VFs.
57
Patch 11 makes virtio-pci work as SR-IOV PF for user-created VFs.
60
Patch 2 makes zero stride valid for 1 VF configuration.
58
Patch 12 allows user to create SR-IOV VFs with virtio-net-pci.
61
Patch 3 and 4 adds validations.
62
Patch 5 adds user-created SR-IOV VF infrastructure.
63
Patch 6 makes virtio-pci work as SR-IOV PF for user-created VFs.
64
Patch 7 allows user to create SR-IOV VFs with virtio-net-pci.
59
65
60
[1] https://patchew.org/QEMU/1689731808-3009-1-git-send-email-yui.washidu@gmail.com/
66
[1] https://patchew.org/QEMU/1689731808-3009-1-git-send-email-yui.washidu@gmail.com/
61
[2] https://lore.kernel.org/all/5d46f455-f530-4e5e-9ae7-13a2297d4bc5@daynix.com/
67
[2] https://lore.kernel.org/all/5d46f455-f530-4e5e-9ae7-13a2297d4bc5@daynix.com/
62
68
63
Co-developed-by: Yui Washizu <yui.washidu@gmail.com>
69
Co-developed-by: Yui Washizu <yui.washidu@gmail.com>
64
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
70
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
65
---
71
---
72
Changes in v9:
73
- Rebased.
74
- Link to v8: https://lore.kernel.org/r/20250104-sriov-v8-0-56144cfdc7d9@daynix.com
75
76
Changes in v8:
77
- Rebased.
78
- Link to v7: https://lore.kernel.org/r/20240813-sriov-v7-0-8515e3774df7@daynix.com
79
80
Changes in v7:
81
- Removed #include <error-report.h>, which is no longer needed.
82
- Rebased.
83
- Link to v6: https://lore.kernel.org/r/20240802-sriov-v6-0-0c8ff49c4276@daynix.com
84
85
Changes in v6:
86
- Added ARI extended capability.
87
- Rebased.
88
- Link to v5: https://lore.kernel.org/r/20240715-sriov-v5-0-3f5539093ffc@daynix.com
89
90
Changes in v5:
91
- Dropped the RFC tag.
92
- Fixed device unrealization.
93
- Rebased.
94
- Link to v4: https://lore.kernel.org/r/20240428-sriov-v4-0-ac8ac6212982@daynix.com
95
96
Changes in v4:
97
- Added patch "hw/pci: Fix SR-IOV VF number calculation" to fix division
98
by zero reported by Yui Washizu.
99
- Rebased.
100
- Link to v3: https://lore.kernel.org/r/20240305-sriov-v3-0-abdb75770372@daynix.com
101
102
Changes in v3:
103
- Rebased.
104
- Link to v2: https://lore.kernel.org/r/20231210-sriov-v2-0-b959e8a6dfaf@daynix.com
105
66
Changes in v2:
106
Changes in v2:
67
- Changed to keep VF instances.
107
- Changed to keep VF instances.
68
- Link to v1: https://lore.kernel.org/r/20231202-sriov-v1-0-32b3570f7bd6@daynix.com
108
- Link to v1: https://lore.kernel.org/r/20231202-sriov-v1-0-32b3570f7bd6@daynix.com
69
109
70
---
110
---
71
Akihiko Odaki (12):
111
Akihiko Odaki (9):
72
hw/pci: Initialize PCI multifunction after realization
73
hw/pci: Determine if rombar is explicitly enabled
74
hw/pci: Do not add ROM BAR for SR-IOV VF
112
hw/pci: Do not add ROM BAR for SR-IOV VF
75
vfio: Avoid inspecting option QDict for rombar
113
hw/pci: Fix SR-IOV VF number calculation
76
hw/qdev: Remove opts member
77
pcie_sriov: Reuse SR-IOV VF device instances
78
pcie_sriov: Release VFs failed to realize
79
pcie_sriov: Ensure PF and VF are mutually exclusive
114
pcie_sriov: Ensure PF and VF are mutually exclusive
80
pcie_sriov: Check PCI Express for SR-IOV PF
115
pcie_sriov: Check PCI Express for SR-IOV PF
81
pcie_sriov: Allow user to create SR-IOV device
116
pcie_sriov: Allow user to create SR-IOV device
82
virtio-pci: Implement SR-IOV PF
117
virtio-pci: Implement SR-IOV PF
83
virtio-net: Implement SR-IOV VF
118
virtio-net: Implement SR-IOV VF
119
docs: Document composable SR-IOV device
120
pcie_sriov: Make a PCI device with user-created VF ARI-capable
84
121
85
docs/pcie_sriov.txt | 8 +-
122
MAINTAINERS | 1 +
86
include/hw/pci/pci.h | 2 +-
123
docs/system/index.rst | 1 +
87
include/hw/pci/pci_device.h | 13 +-
124
docs/system/sriov.rst | 37 ++++++
88
include/hw/pci/pcie_sriov.h | 25 ++-
125
include/hw/pci/pci_device.h | 6 +-
89
include/hw/qdev-core.h | 4 -
126
include/hw/pci/pcie_sriov.h | 21 +++
90
hw/core/qdev.c | 1 -
127
include/hw/virtio/virtio-pci.h | 1 +
91
hw/net/igb.c | 3 +-
128
hw/pci/pci.c | 76 +++++++----
92
hw/nvme/ctrl.c | 3 +-
129
hw/pci/pcie_sriov.c | 294 +++++++++++++++++++++++++++++++++--------
93
hw/pci/pci.c | 98 +++++++-----
130
hw/virtio/virtio-net-pci.c | 1 +
94
hw/pci/pci_host.c | 4 +-
131
hw/virtio/virtio-pci.c | 24 +++-
95
hw/pci/pcie.c | 4 +-
132
10 files changed, 378 insertions(+), 84 deletions(-)
96
hw/pci/pcie_sriov.c | 360 +++++++++++++++++++++++++++++++++-----------
97
hw/vfio/pci.c | 3 +-
98
hw/virtio/virtio-net-pci.c | 1 +
99
hw/virtio/virtio-pci.c | 7 +
100
system/qdev-monitor.c | 12 +-
101
16 files changed, 395 insertions(+), 153 deletions(-)
102
---
133
---
103
base-commit: 4705fc0c8511d073bee4751c3c974aab2b10a970
134
base-commit: 825b96dbcee23d134b691fc75618b59c5f53da32
104
change-id: 20231202-sriov-9402fb262be8
135
change-id: 20231202-sriov-9402fb262be8
105
136
106
Best regards,
137
Best regards,
107
--
138
--
108
Akihiko Odaki <akihiko.odaki@daynix.com>
139
Akihiko Odaki <akihiko.odaki@daynix.com>
diff view generated by jsdifflib
...
...
13
@@ -XXX,XX +XXX,XX @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
13
@@ -XXX,XX +XXX,XX @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
14
return;
14
return;
15
}
15
}
16
16
17
+ if (pci_is_vf(pdev)) {
17
+ if (pci_is_vf(pdev)) {
18
+ if (pdev->rom_bar && pdev->rom_bar != -1) {
18
+ if (pdev->rom_bar > 0) {
19
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
19
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
20
+ }
20
+ }
21
+
21
+
22
+ return;
22
+ return;
23
+ }
23
+ }
24
+
24
+
25
if (load_file || pdev->romsize == -1) {
25
if (load_file || pdev->romsize == UINT32_MAX) {
26
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
26
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
27
if (path == NULL) {
27
if (path == NULL) {
28
28
29
--
29
--
30
2.43.0
30
2.48.1
diff view generated by jsdifflib
1
The device realization code may enable PCI multifunction for SR-IOV.
1
pci_config_get_bar_addr() had a division by vf_stride. vf_stride needs
2
to be non-zero when there are multiple VFs, but the specification does
3
not prohibit to make it zero when there is only one VF.
4
5
Do not perform the division for the first VF to avoid division by zero.
2
6
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
---
8
---
5
hw/pci/pci.c | 17 +++++++++--------
9
hw/pci/pci.c | 6 +++++-
6
1 file changed, 9 insertions(+), 8 deletions(-)
10
1 file changed, 5 insertions(+), 1 deletion(-)
7
11
8
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
12
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/hw/pci/pci.c
14
--- a/hw/pci/pci.c
11
+++ b/hw/pci/pci.c
15
+++ b/hw/pci/pci.c
12
@@ -XXX,XX +XXX,XX @@ static void pci_init_mask_bridge(PCIDevice *d)
16
@@ -XXX,XX +XXX,XX @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
13
PCI_PREF_RANGE_TYPE_MASK);
17
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
14
}
18
uint16_t vf_stride =
15
19
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
16
-static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
20
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
17
+static void pci_init_multifunction(PCIDevice *dev, Error **errp)
21
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
18
{
19
+ PCIBus *bus = pci_get_bus(dev);
20
uint8_t slot = PCI_SLOT(dev->devfn);
21
uint8_t func;
22
23
@@ -XXX,XX +XXX,XX @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
24
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
25
PCIConfigReadFunc *config_read = pc->config_read;
26
PCIConfigWriteFunc *config_write = pc->config_write;
27
- Error *local_err = NULL;
28
DeviceState *dev = DEVICE(pci_dev);
29
PCIBus *bus = pci_get_bus(pci_dev);
30
bool is_bridge = IS_PCI_BRIDGE(pci_dev);
31
@@ -XXX,XX +XXX,XX @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
32
if (is_bridge) {
33
pci_init_mask_bridge(pci_dev);
34
}
35
- pci_init_multifunction(bus, pci_dev, &local_err);
36
- if (local_err) {
37
- error_propagate(errp, local_err);
38
- do_pci_unregister_device(pci_dev);
39
- return NULL;
40
- }
41
42
if (!config_read)
43
config_read = pci_default_read_config;
44
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
45
}
46
}
47
48
+ pci_init_multifunction(pci_dev, &local_err);
49
+ if (local_err) {
50
+ error_propagate(errp, local_err);
51
+ pci_qdev_unrealize(DEVICE(pci_dev));
52
+ return;
53
+ }
54
+
22
+
55
/*
23
+ if (vf_num) {
56
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
24
+ vf_num /= vf_stride;
57
* associate only Device 0 with the device attached to the bus
25
+ }
26
27
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
28
new_addr = pci_get_quad(pf->config + bar);
58
29
59
--
30
--
60
2.43.0
31
2.48.1
diff view generated by jsdifflib
Deleted patch
1
vfio determines if rombar is explicitly enabled by inspecting QDict.
2
Inspecting QDict is not nice because QDict is untyped and depends on the
3
details on the external interface. Add an infrastructure to determine if
4
rombar is explicitly enabled to hw/pci. PCIDevice::rom_bar is changed to
5
have -1 by the default to tell rombar is explicitly enabled. It is
6
consistent with other properties like addr and romsize.
7
1
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
---
10
include/hw/pci/pci_device.h | 5 +++++
11
hw/pci/pci.c | 2 +-
12
2 files changed, 6 insertions(+), 1 deletion(-)
13
14
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/hw/pci/pci_device.h
17
+++ b/include/hw/pci/pci_device.h
18
@@ -XXX,XX +XXX,XX @@ static inline uint16_t pci_get_bdf(PCIDevice *dev)
19
return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
20
}
21
22
+static inline bool pci_rom_bar_explicitly_enabled(PCIDevice *dev)
23
+{
24
+ return dev->rom_bar && dev->rom_bar != -1;
25
+}
26
+
27
uint16_t pci_requester_id(PCIDevice *dev);
28
29
/* DMA access functions */
30
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/pci/pci.c
33
+++ b/hw/pci/pci.c
34
@@ -XXX,XX +XXX,XX @@ static Property pci_props[] = {
35
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
36
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
37
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1),
38
- DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
39
+ DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, -1),
40
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
41
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
42
DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
43
44
--
45
2.43.0
diff view generated by jsdifflib
Deleted patch
1
Use pci_rom_bar_explicitly_enabled() to determine if rombar is explicitly
2
enabled.
3
1
4
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
5
---
6
hw/vfio/pci.c | 3 +--
7
1 file changed, 1 insertion(+), 2 deletions(-)
8
9
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/hw/vfio/pci.c
12
+++ b/hw/vfio/pci.c
13
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
14
{
15
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
16
off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
17
- DeviceState *dev = DEVICE(vdev);
18
char *name;
19
int fd = vdev->vbasedev.fd;
20
21
@@ -XXX,XX +XXX,XX @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
22
}
23
24
if (vfio_opt_rom_in_denylist(vdev)) {
25
- if (dev->opts && qdict_haskey(dev->opts, "rombar")) {
26
+ if (pci_rom_bar_explicitly_enabled(&vdev->pdev)) {
27
warn_report("Device at %s is known to cause system instability"
28
" issues during option rom execution",
29
vdev->vbasedev.name);
30
31
--
32
2.43.0
diff view generated by jsdifflib
Deleted patch
1
It is no longer used.
2
1
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
---
5
include/hw/qdev-core.h | 4 ----
6
hw/core/qdev.c | 1 -
7
system/qdev-monitor.c | 12 +++++++-----
8
3 files changed, 7 insertions(+), 10 deletions(-)
9
10
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/hw/qdev-core.h
13
+++ b/include/hw/qdev-core.h
14
@@ -XXX,XX +XXX,XX @@ struct DeviceState {
15
* @pending_deleted_expires_ms: optional timeout for deletion events
16
*/
17
int64_t pending_deleted_expires_ms;
18
- /**
19
- * @opts: QDict of options for the device
20
- */
21
- QDict *opts;
22
/**
23
* @hotplugged: was device added after PHASE_MACHINE_READY?
24
*/
25
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/hw/core/qdev.c
28
+++ b/hw/core/qdev.c
29
@@ -XXX,XX +XXX,XX @@ static void device_finalize(Object *obj)
30
dev->canonical_path = NULL;
31
}
32
33
- qobject_unref(dev->opts);
34
g_free(dev->id);
35
}
36
37
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/system/qdev-monitor.c
40
+++ b/system/qdev-monitor.c
41
@@ -XXX,XX +XXX,XX @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
42
char *id;
43
DeviceState *dev = NULL;
44
BusState *bus = NULL;
45
+ QDict *properties;
46
47
driver = qdict_get_try_str(opts, "driver");
48
if (!driver) {
49
@@ -XXX,XX +XXX,XX @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
50
}
51
52
/* set properties */
53
- dev->opts = qdict_clone_shallow(opts);
54
- qdict_del(dev->opts, "driver");
55
- qdict_del(dev->opts, "bus");
56
- qdict_del(dev->opts, "id");
57
+ properties = qdict_clone_shallow(opts);
58
+ qdict_del(properties, "driver");
59
+ qdict_del(properties, "bus");
60
+ qdict_del(properties, "id");
61
62
- object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json,
63
+ object_set_properties_from_keyval(&dev->parent_obj, properties, from_json,
64
errp);
65
+ qobject_unref(properties);
66
if (*errp) {
67
goto err_del_dev;
68
}
69
70
--
71
2.43.0
diff view generated by jsdifflib
...
...
16
+ if (pci_is_vf(dev)) {
16
+ if (pci_is_vf(dev)) {
17
+ error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
17
+ error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
18
+ return false;
18
+ return false;
19
+ }
19
+ }
20
+
20
+
21
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
21
if (total_vfs &&
22
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
22
(uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
23
dev->exp.sriov_cap = offset;
23
error_setg(errp, "VF addr overflows");
24
24
25
--
25
--
26
2.43.0
26
2.48.1
diff view generated by jsdifflib
...
...
21
if (pci_is_vf(dev)) {
21
if (pci_is_vf(dev)) {
22
error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
22
error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
23
return false;
23
return false;
24
24
25
--
25
--
26
2.43.0
26
2.48.1
diff view generated by jsdifflib
...
...
10
pcie_sriov_pf_exit() when exiting.
10
pcie_sriov_pf_exit() when exiting.
11
11
12
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
12
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
13
---
13
---
14
include/hw/pci/pci_device.h | 6 +-
14
include/hw/pci/pci_device.h | 6 +-
15
include/hw/pci/pcie_sriov.h | 19 +++
15
include/hw/pci/pcie_sriov.h | 18 +++
16
hw/pci/pci.c | 53 ++++++---
16
hw/pci/pci.c | 62 ++++++----
17
hw/pci/pcie_sriov.c | 279 +++++++++++++++++++++++++++++++++++---------
17
hw/pci/pcie_sriov.c | 278 +++++++++++++++++++++++++++++++++++---------
18
4 files changed, 283 insertions(+), 74 deletions(-)
18
4 files changed, 286 insertions(+), 78 deletions(-)
19
19
20
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
20
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
21
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/pci/pci_device.h
22
--- a/include/hw/pci/pci_device.h
23
+++ b/include/hw/pci/pci_device.h
23
+++ b/include/hw/pci/pci_device.h
...
...
29
+ bool sriov_vf_user_creatable;
29
+ bool sriov_vf_user_creatable;
30
};
30
};
31
31
32
enum PCIReqIDType {
32
enum PCIReqIDType {
33
@@ -XXX,XX +XXX,XX @@ struct PCIDevice {
33
@@ -XXX,XX +XXX,XX @@ struct PCIDevice {
34
/* ID of standby device in net_failover pair */
34
* realizing the device.
35
char *failover_pair_id;
35
*/
36
uint32_t acpi_index;
36
uint32_t max_bounce_buffer_size;
37
+
37
+
38
+ char *sriov_pf;
38
+ char *sriov_pf;
39
};
39
};
40
40
41
static inline int pci_intx(PCIDevice *pci_dev)
41
static inline int pci_intx(PCIDevice *pci_dev)
...
...
50
static inline uint32_t pci_config_size(const PCIDevice *d)
50
static inline uint32_t pci_config_size(const PCIDevice *d)
51
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
51
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
52
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
53
--- a/include/hw/pci/pcie_sriov.h
53
--- a/include/hw/pci/pcie_sriov.h
54
+++ b/include/hw/pci/pcie_sriov.h
54
+++ b/include/hw/pci/pcie_sriov.h
55
@@ -XXX,XX +XXX,XX @@ struct PCIESriovPF {
55
@@ -XXX,XX +XXX,XX @@
56
uint16_t num_vfs; /* Number of virtual functions created */
56
typedef struct PCIESriovPF {
57
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
57
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
58
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
58
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
59
+ bool vf_user_created; /* If VFs are created by user */
59
+ bool vf_user_created; /* If VFs are created by user */
60
};
60
} PCIESriovPF;
61
61
62
struct PCIESriovVF {
62
typedef struct PCIESriovVF {
63
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
63
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
64
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
64
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
65
MemoryRegion *memory);
65
MemoryRegion *memory);
66
66
67
+/**
67
+/**
68
+ * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
68
+ * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
69
+ * VFs.
69
+ * VFs.
70
+ * @dev: A PCIe device being realized.
70
+ * @dev: A PCIe device being realized.
71
+ * @offset: The offset of the SR-IOV capability.
71
+ * @offset: The offset of the SR-IOV capability.
72
+ * @errp: pointer to Error*, to store an error if it happens.
72
+ * @errp: pointer to Error*, to store an error if it happens.
73
+ *
73
+ *
74
+ * Return:
74
+ * Return: The size of added capability. 0 if the user did not create VFs.
75
+ * * true - @dev is initialized as a PCIe SR-IOV PF.
75
+ * -1 if failed.
76
+ * * false - @dev is not initialized because there is no SR-IOV VFs or an error
77
+ * occurred.
78
+ */
76
+ */
79
+bool pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, uint16_t offset,
77
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
80
+ Error **errp);
78
+ uint16_t offset,
79
+ Error **errp);
81
+
80
+
82
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
81
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
83
+void pcie_sriov_unregister_device(PCIDevice *dev);
82
+void pcie_sriov_unregister_device(PCIDevice *dev);
84
+
83
+
85
/*
84
/*
86
* Default (minimal) page size support values
85
* Default (minimal) page size support values
87
* as required by the SR/IOV standard:
86
* as required by the SR/IOV standard:
88
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
87
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
89
index XXXXXXX..XXXXXXX 100644
88
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/pci/pci.c
89
--- a/hw/pci/pci.c
91
+++ b/hw/pci/pci.c
90
+++ b/hw/pci/pci.c
92
@@ -XXX,XX +XXX,XX @@ static Property pci_props[] = {
91
@@ -XXX,XX +XXX,XX @@ static const Property pci_props[] = {
93
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
94
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
95
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
92
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
93
DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
94
max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
96
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
95
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
97
DEFINE_PROP_END_OF_LIST()
96
DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
98
};
97
QEMU_PCIE_EXT_TAG_BITNR, true),
99
98
{ .name = "busnr", .info = &prop_pci_busnr },
100
@@ -XXX,XX +XXX,XX @@ static void pci_init_multifunction(PCIDevice *dev, Error **errp)
99
@@ -XXX,XX +XXX,XX @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
101
* device, as it may just be a VF that ended up with function 0 in
100
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
102
* the legacy PCI interpretation. Avoid failing in such cases:
101
}
103
*/
102
103
- /*
104
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
105
- * device, as it may just be a VF that ended up with function 0 in
106
- * the legacy PCI interpretation. Avoid failing in such cases:
107
- */
104
- if (pci_is_vf(dev) &&
108
- if (pci_is_vf(dev) &&
105
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
109
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
110
+ /* SR/IOV is not handled here. */
106
+ if (pci_is_vf(dev)) {
111
+ if (pci_is_vf(dev)) {
107
return;
112
return;
108
}
113
}
109
114
115
@@ -XXX,XX +XXX,XX @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
116
}
117
/* function 0 indicates single function, so function > 0 must be NULL */
118
for (func = 1; func < PCI_FUNC_MAX; ++func) {
119
- if (bus->devices[PCI_DEVFN(slot, func)]) {
120
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
121
+ if (device && !pci_is_vf(device)) {
122
error_setg(errp, "PCI: %x.0 indicates single function, "
123
"but %x.%x is already populated.",
124
slot, slot, func);
110
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_unrealize(DeviceState *dev)
125
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_unrealize(DeviceState *dev)
111
126
112
pci_unregister_io_regions(pci_dev);
127
pci_unregister_io_regions(pci_dev);
113
pci_del_option_rom(pci_dev);
128
pci_del_option_rom(pci_dev);
114
+ pcie_sriov_unregister_device(pci_dev);
129
+ pcie_sriov_unregister_device(pci_dev);
...
...
122
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
137
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
123
assert(region_num >= 0);
138
assert(region_num >= 0);
124
assert(region_num < PCI_NUM_REGIONS);
139
assert(region_num < PCI_NUM_REGIONS);
125
assert(is_power_of_2(size));
140
assert(is_power_of_2(size));
126
@@ -XXX,XX +XXX,XX @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
141
@@ -XXX,XX +XXX,XX @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
127
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
128
142
129
r = &pci_dev->io_regions[region_num];
143
r = &pci_dev->io_regions[region_num];
144
assert(!r->size);
130
- r->addr = PCI_BAR_UNMAPPED;
145
- r->addr = PCI_BAR_UNMAPPED;
131
r->size = size;
146
r->size = size;
132
r->type = type;
147
r->type = type;
133
r->memory = memory;
148
r->memory = memory;
134
@@ -XXX,XX +XXX,XX @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
149
@@ -XXX,XX +XXX,XX @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
...
...
188
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
203
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
189
+ pci_qdev_unrealize(DEVICE(pci_dev));
204
+ pci_qdev_unrealize(DEVICE(pci_dev));
190
+ return;
205
+ return;
191
+ }
206
+ }
192
+
207
+
193
pci_init_multifunction(pci_dev, &local_err);
208
/*
194
if (local_err) {
209
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
195
error_propagate(errp, local_err);
210
* associate only Device 0 with the device attached to the bus
196
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
211
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
197
index XXXXXXX..XXXXXXX 100644
212
index XXXXXXX..XXXXXXX 100644
198
--- a/hw/pci/pcie_sriov.c
213
--- a/hw/pci/pcie_sriov.c
199
+++ b/hw/pci/pcie_sriov.c
214
+++ b/hw/pci/pcie_sriov.c
200
@@ -XXX,XX +XXX,XX @@
215
@@ -XXX,XX +XXX,XX @@
216
#include "hw/pci/pcie.h"
217
#include "hw/pci/pci_bus.h"
218
#include "hw/qdev-properties.h"
219
-#include "qemu/error-report.h"
220
#include "qemu/range.h"
201
#include "qapi/error.h"
221
#include "qapi/error.h"
202
#include "trace.h"
222
#include "trace.h"
203
223
204
+static GHashTable *pfs;
224
+static GHashTable *pfs;
205
+
225
+
206
static void unrealize_vfs(PCIDevice *dev, uint16_t total_vfs)
226
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
207
{
227
{
208
for (uint16_t i = 0; i < total_vfs; i++) {
228
for (uint16_t i = 0; i < total_vfs; i++) {
209
@@ -XXX,XX +XXX,XX @@ static void unrealize_vfs(PCIDevice *dev, uint16_t total_vfs)
229
@@ -XXX,XX +XXX,XX @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
210
dev->exp.sriov_pf.vf = NULL;
230
dev->exp.sriov_pf.vf = NULL;
211
}
231
}
212
232
213
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
233
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
214
- const char *vfname, uint16_t vf_dev_id,
234
- const char *vfname, uint16_t vf_dev_id,
...
...
221
+ uint16_t i;
241
+ uint16_t i;
222
+ uint16_t sriov_cap = dev->exp.sriov_cap;
242
+ uint16_t sriov_cap = dev->exp.sriov_cap;
223
+
243
+
224
+ assert(sriov_cap > 0);
244
+ assert(sriov_cap > 0);
225
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
245
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
226
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
227
+ return;
228
+ }
229
+
246
+
230
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
247
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
231
+ PCI_FUNC(dev->devfn), num_vfs);
248
+ PCI_FUNC(dev->devfn), num_vfs);
232
+ for (i = 0; i < num_vfs; i++) {
249
+ for (i = 0; i < num_vfs; i++) {
233
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
250
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
234
+ }
251
+ }
235
+ dev->exp.sriov_pf.num_vfs = num_vfs;
252
+
253
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
236
+}
254
+}
237
+
255
+
238
+static void unregister_vfs(PCIDevice *dev)
256
+static void unregister_vfs(PCIDevice *dev)
239
+{
257
+{
240
+ uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
258
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
241
+ uint16_t i;
259
+ uint16_t i;
242
+
260
+
243
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
261
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
244
+ PCI_FUNC(dev->devfn), num_vfs);
262
+ PCI_FUNC(dev->devfn));
245
+ for (i = 0; i < num_vfs; i++) {
263
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
246
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
264
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
247
+ }
265
+ }
248
+ dev->exp.sriov_pf.num_vfs = 0;
266
+
249
+ pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
267
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
250
+}
268
+}
251
+
269
+
252
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
270
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
253
+ uint16_t vf_dev_id, uint16_t init_vfs,
271
+ uint16_t vf_dev_id, uint16_t init_vfs,
254
+ uint16_t total_vfs, uint16_t vf_offset,
272
+ uint16_t total_vfs, uint16_t vf_offset,
255
+ uint16_t vf_stride, Error **errp)
273
+ uint16_t vf_stride, Error **errp)
256
{
274
{
257
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
275
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
258
- int32_t devfn = dev->devfn + vf_offset;
276
int32_t devfn = dev->devfn + vf_offset;
259
uint8_t *cfg = dev->config + offset;
277
uint8_t *cfg = dev->config + offset;
260
uint8_t *wmask;
278
uint8_t *wmask;
261
262
@@ -XXX,XX +XXX,XX @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
279
@@ -XXX,XX +XXX,XX @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
263
280
264
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
281
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
265
282
266
+ return true;
283
+ return true;
...
...
284
+ total_vfs, vf_offset, vf_stride, errp)) {
301
+ total_vfs, vf_offset, vf_stride, errp)) {
285
+ return false;
302
+ return false;
286
+ }
303
+ }
287
+
304
+
288
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
305
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
289
assert(dev->exp.sriov_pf.vf);
306
290
307
for (uint16_t i = 0; i < total_vfs; i++) {
291
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_exit(PCIDevice *dev)
308
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_exit(PCIDevice *dev)
292
{
309
{
293
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
310
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
294
311
295
- unrealize_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
312
- unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
296
+ if (dev->exp.sriov_pf.vf_user_created) {
313
+ if (dev->exp.sriov_pf.vf_user_created) {
297
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
314
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
298
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
315
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
299
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
316
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
300
+
317
+
301
+ unregister_vfs(dev);
318
+ unregister_vfs(dev);
302
+
319
+
303
+ for (uint16_t i = 0; i < total_vfs; i++) {
320
+ for (uint16_t i = 0; i < total_vfs; i++) {
321
+ dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL;
322
+
304
+ pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id);
323
+ pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id);
305
+ pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
324
+ pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
306
+ }
325
+ }
307
+ } else {
326
+ } else {
308
+ unrealize_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
327
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
309
+ }
328
+ }
310
}
329
}
311
330
312
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
331
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
313
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
332
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
314
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
333
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
315
MemoryRegion *memory)
334
MemoryRegion *memory)
...
...
352
+{
371
+{
353
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
372
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
354
}
373
}
355
374
356
-static void register_vfs(PCIDevice *dev)
375
-static void register_vfs(PCIDevice *dev)
357
+bool pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, uint16_t offset,
376
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
358
+ Error **errp)
377
+ uint16_t offset,
378
+ Error **errp)
359
{
379
{
360
- uint16_t num_vfs;
380
- uint16_t num_vfs;
361
+ GPtrArray *pf;
381
+ GPtrArray *pf;
362
+ PCIDevice **vfs;
382
+ PCIDevice **vfs;
363
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
383
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
...
...
368
uint16_t i;
388
uint16_t i;
369
- uint16_t sriov_cap = dev->exp.sriov_cap;
389
- uint16_t sriov_cap = dev->exp.sriov_cap;
370
390
371
- assert(sriov_cap > 0);
391
- assert(sriov_cap > 0);
372
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
392
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
373
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
374
- return;
375
+ if (!pfs || !dev->qdev.id) {
393
+ if (!pfs || !dev->qdev.id) {
376
+ return false;
394
+ return 0;
377
}
395
+ }
378
396
379
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
397
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
380
- PCI_FUNC(dev->devfn), num_vfs);
398
- PCI_FUNC(dev->devfn), num_vfs);
381
- for (i = 0; i < num_vfs; i++) {
399
- for (i = 0; i < num_vfs; i++) {
382
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
400
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
383
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
401
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
384
+ if (!pf) {
402
+ if (!pf) {
385
+ return false;
403
+ return 0;
386
}
404
}
387
- dev->exp.sriov_pf.num_vfs = num_vfs;
405
388
+
406
- pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
389
+ if (pf->len > UINT16_MAX) {
407
+ if (pf->len > UINT16_MAX) {
390
+ error_setg(errp, "too many VFs");
408
+ error_setg(errp, "too many VFs");
391
+ return false;
409
+ return -1;
392
+ }
410
+ }
393
+
411
+
394
+ g_ptr_array_sort(pf, compare_vf_devfns);
412
+ g_ptr_array_sort(pf, compare_vf_devfns);
395
+ vfs = (void *)pf->pdata;
413
+ vfs = (void *)pf->pdata;
396
+
414
+
397
+ if (vfs[0]->devfn <= dev->devfn) {
415
+ if (vfs[0]->devfn <= dev->devfn) {
398
+ error_setg(errp, "a VF function number is less than the PF function number");
416
+ error_setg(errp, "a VF function number is less than the PF function number");
399
+ return false;
417
+ return -1;
400
+ }
418
+ }
401
+
419
+
402
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
420
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
403
+ vf_offset = vfs[0]->devfn - dev->devfn;
421
+ vf_offset = vfs[0]->devfn - dev->devfn;
404
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
422
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
405
+
423
+
406
+ for (i = 0; i < pf->len; i++) {
424
+ for (i = 0; i < pf->len; i++) {
407
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
425
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
408
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
426
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
409
+ return false;
427
+ return -1;
410
+ }
428
+ }
411
+
429
+
412
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
430
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
413
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
431
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
414
+ return false;
432
+ return -1;
415
+ }
433
+ }
416
+
434
+
417
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
435
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
418
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
436
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
419
+ return false;
437
+ return -1;
420
+ }
438
+ }
421
+
439
+
422
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
440
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
423
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
441
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
424
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
442
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
425
+ error_setg(errp, "inconsistent SR-IOV BARs");
443
+ error_setg(errp, "inconsistent SR-IOV BARs");
426
+ return false;
444
+ return -1;
427
+ }
445
+ }
428
+ }
446
+ }
429
+
447
+
430
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
448
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
431
+ error_setg(errp, "inconsistent SR-IOV stride");
449
+ error_setg(errp, "inconsistent SR-IOV stride");
432
+ return false;
450
+ return -1;
433
+ }
451
+ }
434
+ }
452
+ }
435
+
453
+
436
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
454
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
437
+ pf->len, vf_offset, vf_stride, errp)) {
455
+ pf->len, vf_offset, vf_stride, errp)) {
438
+ return false;
456
+ return -1;
439
+ }
457
+ }
440
+
458
+
441
+ for (i = 0; i < pf->len; i++) {
459
+ for (i = 0; i < pf->len; i++) {
442
+ vfs[i]->exp.sriov_vf.pf = dev;
460
+ vfs[i]->exp.sriov_vf.pf = dev;
461
+ vfs[i]->exp.sriov_vf.vf_number = i;
443
+
462
+
444
+ /* set vid/did according to sr/iov spec - they are not used */
463
+ /* set vid/did according to sr/iov spec - they are not used */
445
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
464
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
446
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
465
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
447
+ }
466
+ }
448
+
467
+
449
+ dev->exp.sriov_pf.vf = vfs;
468
+ dev->exp.sriov_pf.vf = vfs;
450
+ dev->exp.sriov_pf.vf_user_created = true;
469
+ dev->exp.sriov_pf.vf_user_created = true;
451
+
470
+
452
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
471
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
453
+ uint8_t type = vfs[0]->io_regions[i].type;
472
+ PCIIORegion *region = &vfs[0]->io_regions[i];
454
+ pcibus_t size = vfs[0]->io_regions[i].size;
473
+
455
+
474
+ if (region->size) {
456
+ if (size) {
475
+ pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
457
+ pcie_sriov_pf_init_vf_bar(dev, i, type, size);
476
+ }
458
+ }
477
+ }
459
+ }
478
+
460
+
479
+ return PCI_EXT_CAP_SRIOV_SIZEOF;
461
+ return true;
480
}
462
}
463
481
464
-static void unregister_vfs(PCIDevice *dev)
482
-static void unregister_vfs(PCIDevice *dev)
465
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
483
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
466
{
484
{
467
- uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
485
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
468
- uint16_t i;
486
- uint16_t i;
469
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
487
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
470
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
488
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
471
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
489
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
472
+ return false;
490
+ return false;
473
+ }
491
+ }
474
492
475
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
493
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
476
- PCI_FUNC(dev->devfn), num_vfs);
494
- PCI_FUNC(dev->devfn));
477
- for (i = 0; i < num_vfs; i++) {
495
- for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
478
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
496
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
479
+ if (dev->sriov_pf) {
497
+ if (dev->sriov_pf) {
480
+ PCIDevice *pci_pf;
498
+ PCIDevice *pci_pf;
481
+ GPtrArray *pf;
499
+ GPtrArray *pf;
482
+
500
+
...
...
504
+ pf = g_ptr_array_new();
522
+ pf = g_ptr_array_new();
505
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
523
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
506
+ }
524
+ }
507
+
525
+
508
+ g_ptr_array_add(pf, dev);
526
+ g_ptr_array_add(pf, dev);
509
+ }
527
}
510
+
528
529
- pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
511
+ return true;
530
+ return true;
512
+}
531
+}
513
+
532
+
514
+void pcie_sriov_unregister_device(PCIDevice *dev)
533
+void pcie_sriov_unregister_device(PCIDevice *dev)
515
+{
534
+{
516
+ if (dev->sriov_pf && pfs) {
535
+ if (dev->sriov_pf && pfs) {
517
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->qdev.id);
536
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
518
+
537
+
519
+ if (pf) {
538
+ if (pf) {
520
+ g_ptr_array_remove_fast(pf, dev);
539
+ g_ptr_array_remove_fast(pf, dev);
521
+
540
+
522
+ if (!pf->len) {
541
+ if (!pf->len) {
523
+ g_hash_table_remove(pfs, dev->qdev.id);
542
+ g_hash_table_remove(pfs, dev->sriov_pf);
524
+ g_ptr_array_free(pf, FALSE);
543
+ g_ptr_array_free(pf, FALSE);
525
+ }
544
+ }
526
+ }
545
+ }
527
}
546
+ }
528
- dev->exp.sriov_pf.num_vfs = 0;
547
}
529
- pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
530
}
531
548
532
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
549
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
550
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
551
552
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
553
{
554
- assert(pci_is_vf(dev));
555
+ assert(dev->exp.sriov_vf.pf);
556
return dev->exp.sriov_vf.vf_number;
557
}
558
533
559
534
--
560
--
535
2.43.0
561
2.48.1
diff view generated by jsdifflib
1
Allow user to attach SR-IOV VF to a virtio-pci PF.
1
Allow user to attach SR-IOV VF to a virtio-pci PF.
2
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
---
4
---
5
hw/virtio/virtio-pci.c | 7 +++++++
5
include/hw/virtio/virtio-pci.h | 1 +
6
1 file changed, 7 insertions(+)
6
hw/virtio/virtio-pci.c | 20 +++++++++++++++-----
7
2 files changed, 16 insertions(+), 5 deletions(-)
7
8
9
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/hw/virtio/virtio-pci.h
12
+++ b/include/hw/virtio/virtio-pci.h
13
@@ -XXX,XX +XXX,XX @@ struct VirtIOPCIProxy {
14
uint32_t modern_io_bar_idx;
15
uint32_t modern_mem_bar_idx;
16
int config_cap;
17
+ uint16_t last_pcie_cap_offset;
18
uint32_t flags;
19
bool disable_modern;
20
bool ignore_backend_features;
8
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
21
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
9
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
10
--- a/hw/virtio/virtio-pci.c
23
--- a/hw/virtio/virtio-pci.c
11
+++ b/hw/virtio/virtio-pci.c
24
+++ b/hw/virtio/virtio-pci.c
12
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
25
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
26
uint8_t *config;
27
uint32_t size;
28
VirtIODevice *vdev = virtio_bus_get_device(bus);
29
+ int16_t res;
30
31
/*
32
* Virtio capabilities present without
33
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
13
pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
34
pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
14
PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
35
PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
15
}
36
}
16
+
37
+
17
+ if (pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev,
38
+ res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev,
18
+ PCI_CONFIG_SPACE_SIZE,
39
+ proxy->last_pcie_cap_offset,
19
+ errp)) {
40
+ errp);
41
+ if (res > 0) {
42
+ proxy->last_pcie_cap_offset += res;
20
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
43
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
21
+ }
44
+ }
22
}
45
}
23
46
24
static void virtio_pci_device_unplugged(DeviceState *d)
47
static void virtio_pci_device_unplugged(DeviceState *d)
25
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_device_unplugged(DeviceState *d)
48
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
26
bool modern = virtio_pci_modern(proxy);
49
50
if (pcie_port && pci_is_express(pci_dev)) {
51
int pos;
52
- uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
53
+ proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
54
55
pos = pcie_endpoint_cap_init(pci_dev, 0);
56
assert(pos > 0);
57
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
58
pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
59
60
if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
61
- pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
62
+ pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset,
63
PCI_ERR_SIZEOF, NULL);
64
- last_pcie_cap_offset += PCI_ERR_SIZEOF;
65
+ proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF;
66
}
67
68
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
69
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
70
}
71
72
if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
73
- pcie_ats_init(pci_dev, last_pcie_cap_offset,
74
+ pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset,
75
proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
76
- last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
77
+ proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
78
}
79
80
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
81
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_exit(PCIDevice *pci_dev)
82
!pci_bus_is_root(pci_get_bus(pci_dev));
27
bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
83
bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
28
84
29
+ pcie_sriov_pf_exit(&proxy->pci_dev);
85
+ pcie_sriov_pf_exit(&proxy->pci_dev);
30
virtio_pci_stop_ioeventfd(proxy);
86
msix_uninit_exclusive_bar(pci_dev);
31
87
if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
32
if (modern) {
88
pci_is_express(pci_dev)) {
33
89
34
--
90
--
35
2.43.0
91
2.48.1
diff view generated by jsdifflib
...
...
18
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
18
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
19
device_class_set_props(dc, virtio_net_properties);
19
device_class_set_props(dc, virtio_net_properties);
20
vpciklass->realize = virtio_net_pci_realize;
20
vpciklass->realize = virtio_net_pci_realize;
21
21
22
--
22
--
23
2.43.0
23
2.48.1
diff view generated by jsdifflib
1
Release VFs failed to realize just as we do in unregister_vfs().
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
1
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
---
2
---
5
hw/pci/pcie_sriov.c | 2 ++
3
MAINTAINERS | 1 +
6
1 file changed, 2 insertions(+)
4
docs/system/index.rst | 1 +
5
docs/system/sriov.rst | 36 ++++++++++++++++++++++++++++++++++++
6
3 files changed, 38 insertions(+)
7
7
8
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
8
diff --git a/MAINTAINERS b/MAINTAINERS
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/hw/pci/pcie_sriov.c
10
--- a/MAINTAINERS
11
+++ b/hw/pci/pcie_sriov.c
11
+++ b/MAINTAINERS
12
@@ -XXX,XX +XXX,XX @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
12
@@ -XXX,XX +XXX,XX @@ F: hw/pci-bridge/*
13
vf->exp.sriov_vf.vf_number = i;
13
F: qapi/pci.json
14
14
F: docs/pci*
15
if (!qdev_realize(&vf->qdev, bus, errp)) {
15
F: docs/specs/*pci*
16
+ object_unparent(OBJECT(vf));
16
+F: docs/system/sriov.rst
17
+ object_unref(vf);
17
18
unrealize_vfs(dev, i);
18
PCIE DOE
19
return false;
19
M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw>
20
}
20
diff --git a/docs/system/index.rst b/docs/system/index.rst
21
index XXXXXXX..XXXXXXX 100644
22
--- a/docs/system/index.rst
23
+++ b/docs/system/index.rst
24
@@ -XXX,XX +XXX,XX @@ or Hypervisor.Framework.
25
multi-process
26
confidential-guest-support
27
vm-templating
28
+ sriov
29
diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst
30
new file mode 100644
31
index XXXXXXX..XXXXXXX
32
--- /dev/null
33
+++ b/docs/system/sriov.rst
34
@@ -XXX,XX +XXX,XX @@
35
+.. SPDX-License-Identifier: GPL-2.0-or-later
36
+
37
+Compsable SR-IOV device
38
+=======================
39
+
40
+SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a
41
+PCI Express device. It allows a single physical function (PF) to appear as
42
+multiple virtual functions (VFs) for the main purpose of eliminating software
43
+overhead in I/O from virtual machines.
44
+
45
+There are devices with predefined SR-IOV configurations, but it is also possible
46
+to compose an SR-IOV device yourself. Composing an SR-IOV device is currently
47
+only supported by virtio-net-pci.
48
+
49
+Users can configure an SR-IOV-capable virtio-net device by adding
50
+virtio-net-pci functions to a bus. Below is a command line example:
51
+
52
+.. code-block:: shell
53
+
54
+ -netdev user,id=n -netdev user,id=o
55
+ -netdev user,id=p -netdev user,id=q
56
+ -device pcie-root-port,id=b
57
+ -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
58
+ -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
59
+ -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
60
+ -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f
61
+
62
+The VFs specify the paired PF with ``sriov-pf`` property. The PF must be
63
+added after all VFs. It is the user's responsibility to ensure that VFs have
64
+function numbers larger than one of the PF, and that the function numbers
65
+have a consistent stride.
66
+
67
+You may also need to perform additional steps to activate the SR-IOV feature on
68
+your guest. For Linux, refer to [1]_.
69
+
70
+.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html
21
71
22
--
72
--
23
2.43.0
73
2.48.1
diff view generated by jsdifflib
1
Disable SR-IOV VF devices by reusing code to power down PCI devices
2
instead of removing them when the guest requests to disable VFs. This
3
allows to realize devices and report VF realization errors at PF
4
realization time.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
1
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
---
2
---
8
docs/pcie_sriov.txt | 8 ++--
3
docs/system/sriov.rst | 3 ++-
9
include/hw/pci/pci.h | 2 +-
4
include/hw/pci/pcie_sriov.h | 7 +++++--
10
include/hw/pci/pci_device.h | 2 +-
5
hw/pci/pcie_sriov.c | 8 +++++++-
11
include/hw/pci/pcie_sriov.h | 6 +--
6
hw/virtio/virtio-pci.c | 16 ++++++++++------
12
hw/net/igb.c | 3 +-
7
4 files changed, 24 insertions(+), 10 deletions(-)
13
hw/nvme/ctrl.c | 3 +-
14
hw/pci/pci.c | 18 ++++----
15
hw/pci/pci_host.c | 4 +-
16
hw/pci/pcie.c | 4 +-
17
hw/pci/pcie_sriov.c | 105 +++++++++++++++++++++-----------------------
18
10 files changed, 79 insertions(+), 76 deletions(-)
19
8
20
diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
9
diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst
21
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
22
--- a/docs/pcie_sriov.txt
11
--- a/docs/system/sriov.rst
23
+++ b/docs/pcie_sriov.txt
12
+++ b/docs/system/sriov.rst
24
@@ -XXX,XX +XXX,XX @@ setting up a BAR for a VF.
13
@@ -XXX,XX +XXX,XX @@ virtio-net-pci functions to a bus. Below is a command line example:
25
...
14
The VFs specify the paired PF with ``sriov-pf`` property. The PF must be
26
15
added after all VFs. It is the user's responsibility to ensure that VFs have
27
/* Add and initialize the SR/IOV capability */
16
function numbers larger than one of the PF, and that the function numbers
28
- pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
17
-have a consistent stride.
29
- vf_devid, initial_vfs, total_vfs,
18
+have a consistent stride. Both the PF and VFs are ARI-capable so you can have
30
- fun_offset, stride);
19
+255 VFs at maximum.
31
+ if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev",
20
32
+ vf_devid, initial_vfs, total_vfs,
21
You may also need to perform additional steps to activate the SR-IOV feature on
33
+ fun_offset, stride, errp)) {
22
your guest. For Linux, refer to [1]_.
34
+ return;
35
+ }
36
37
/* Set up individual VF BARs (parameters as for normal BARs) */
38
pcie_sriov_pf_init_vf_bar( ... )
39
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
40
index XXXXXXX..XXXXXXX 100644
41
--- a/include/hw/pci/pci.h
42
+++ b/include/hw/pci/pci.h
43
@@ -XXX,XX +XXX,XX @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
44
}
45
46
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
47
-void pci_set_power(PCIDevice *pci_dev, bool state);
48
+void pci_set_enabled(PCIDevice *pci_dev, bool state);
49
50
#endif
51
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/include/hw/pci/pci_device.h
54
+++ b/include/hw/pci/pci_device.h
55
@@ -XXX,XX +XXX,XX @@ typedef struct PCIReqIDCache PCIReqIDCache;
56
struct PCIDevice {
57
DeviceState qdev;
58
bool partially_hotplugged;
59
- bool has_power;
60
+ bool is_enabled;
61
62
/* PCI config space */
63
uint8_t *config;
64
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
23
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
65
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
66
--- a/include/hw/pci/pcie_sriov.h
25
--- a/include/hw/pci/pcie_sriov.h
67
+++ b/include/hw/pci/pcie_sriov.h
26
+++ b/include/hw/pci/pcie_sriov.h
68
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
69
struct PCIESriovPF {
28
70
uint16_t num_vfs; /* Number of virtual functions created */
29
/**
71
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
30
* pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
72
- const char *vfname; /* Reference to the device type used for the VFs */
31
- * VFs.
73
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
32
+ * VFs, adding ARI to PF
74
};
33
* @dev: A PCIe device being realized.
75
34
* @offset: The offset of the SR-IOV capability.
76
@@ -XXX,XX +XXX,XX @@ struct PCIESriovVF {
35
* @errp: pointer to Error*, to store an error if it happens.
77
uint16_t vf_number; /* Logical VF number of this function */
36
*
78
};
37
- * Return: The size of added capability. 0 if the user did not create VFs.
79
38
+ * Initializes a PF with user-created VFs, adding the ARI extended capability to
80
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
39
+ * the PF. The VFs should call pcie_ari_init() to form an ARI device.
81
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
40
+ *
82
const char *vfname, uint16_t vf_dev_id,
41
+ * Return: The size of added capabilities. 0 if the user did not create VFs.
83
uint16_t init_vfs, uint16_t total_vfs,
42
* -1 if failed.
84
- uint16_t vf_offset, uint16_t vf_stride);
43
*/
85
+ uint16_t vf_offset, uint16_t vf_stride,
44
int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
86
+ Error **errp);
87
void pcie_sriov_pf_exit(PCIDevice *dev);
88
89
/* Set up a VF bar in the SR/IOV bar area */
90
diff --git a/hw/net/igb.c b/hw/net/igb.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/hw/net/igb.c
93
+++ b/hw/net/igb.c
94
@@ -XXX,XX +XXX,XX @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
95
96
pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
97
IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
98
- IGB_VF_OFFSET, IGB_VF_STRIDE);
99
+ IGB_VF_OFFSET, IGB_VF_STRIDE,
100
+ &error_abort);
101
102
pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
103
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
104
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
105
index XXXXXXX..XXXXXXX 100644
106
--- a/hw/nvme/ctrl.c
107
+++ b/hw/nvme/ctrl.c
108
@@ -XXX,XX +XXX,XX @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
109
110
pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
111
n->params.sriov_max_vfs, n->params.sriov_max_vfs,
112
- NVME_VF_OFFSET, NVME_VF_STRIDE);
113
+ NVME_VF_OFFSET, NVME_VF_STRIDE,
114
+ &error_abort);
115
116
pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
117
PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size);
118
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/hw/pci/pci.c
121
+++ b/hw/pci/pci.c
122
@@ -XXX,XX +XXX,XX @@ static void pci_update_mappings(PCIDevice *d)
123
continue;
124
125
new_addr = pci_bar_address(d, i, r->type, r->size);
126
- if (!d->has_power) {
127
+ if (!d->is_enabled) {
128
new_addr = PCI_BAR_UNMAPPED;
129
}
130
131
@@ -XXX,XX +XXX,XX @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
132
pci_update_irq_disabled(d, was_irq_disabled);
133
memory_region_set_enabled(&d->bus_master_enable_region,
134
(pci_get_word(d->config + PCI_COMMAND)
135
- & PCI_COMMAND_MASTER) && d->has_power);
136
+ & PCI_COMMAND_MASTER) && d->is_enabled);
137
}
138
139
msi_write_config(d, addr, val_in, l);
140
@@ -XXX,XX +XXX,XX @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
141
return;
142
}
143
144
- pci_set_power(pci_dev, true);
145
+ if (!pci_is_vf(pci_dev)) {
146
+ pci_set_enabled(pci_dev, true);
147
+ }
148
149
pci_dev->msi_trigger = pci_msi_trigger;
150
}
151
@@ -XXX,XX +XXX,XX @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
152
return msg;
153
}
154
155
-void pci_set_power(PCIDevice *d, bool state)
156
+void pci_set_enabled(PCIDevice *d, bool state)
157
{
158
- if (d->has_power == state) {
159
+ if (d->is_enabled == state) {
160
return;
161
}
162
163
- d->has_power = state;
164
+ d->is_enabled = state;
165
pci_update_mappings(d);
166
memory_region_set_enabled(&d->bus_master_enable_region,
167
(pci_get_word(d->config + PCI_COMMAND)
168
- & PCI_COMMAND_MASTER) && d->has_power);
169
- if (!d->has_power) {
170
+ & PCI_COMMAND_MASTER) && d->is_enabled);
171
+ if (!d->is_enabled) {
172
pci_device_reset(d);
173
}
174
}
175
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
176
index XXXXXXX..XXXXXXX 100644
177
--- a/hw/pci/pci_host.c
178
+++ b/hw/pci/pci_host.c
179
@@ -XXX,XX +XXX,XX @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
180
* allowing direct removal of unexposed functions.
181
*/
182
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
183
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
184
+ !pci_dev->is_enabled || is_pci_dev_ejected(pci_dev)) {
185
return;
186
}
187
188
@@ -XXX,XX +XXX,XX @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
189
* allowing direct removal of unexposed functions.
190
*/
191
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
192
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
193
+ !pci_dev->is_enabled || is_pci_dev_ejected(pci_dev)) {
194
return ~0x0;
195
}
196
197
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
198
index XXXXXXX..XXXXXXX 100644
199
--- a/hw/pci/pcie.c
200
+++ b/hw/pci/pcie.c
201
@@ -XXX,XX +XXX,XX @@ static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
202
{
203
bool *power = opaque;
204
205
- pci_set_power(dev, *power);
206
+ if (!pci_is_vf(dev)) {
207
+ pci_set_enabled(dev, *power);
208
+ }
209
}
210
211
static void pcie_cap_update_power(PCIDevice *hotplug_dev)
212
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
45
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
213
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
214
--- a/hw/pci/pcie_sriov.c
47
--- a/hw/pci/pcie_sriov.c
215
+++ b/hw/pci/pcie_sriov.c
48
+++ b/hw/pci/pcie_sriov.c
216
@@ -XXX,XX +XXX,XX @@
49
@@ -XXX,XX +XXX,XX @@ int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
217
#include "qapi/error.h"
50
PCIDevice **vfs;
218
#include "trace.h"
51
BusState *bus = qdev_get_parent_bus(DEVICE(dev));
219
52
uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
220
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
53
+ uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF;
221
- const char *name, uint16_t vf_num);
54
uint16_t vf_dev_id;
222
-static void unregister_vfs(PCIDevice *dev);
55
uint16_t vf_offset;
223
+static void unrealize_vfs(PCIDevice *dev, uint16_t total_vfs)
56
uint16_t vf_stride;
224
+{
57
@@ -XXX,XX +XXX,XX @@ int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
225
+ for (uint16_t i = 0; i < total_vfs; i++) {
58
return -1;
226
+ Error *err = NULL;
59
}
227
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
60
228
+ if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
61
+ if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) {
229
+ error_reportf_err(err, "Failed to unplug: ");
62
+ pcie_ari_init(dev, offset + size);
230
+ }
63
+ size += PCI_ARI_SIZEOF;
231
+ object_unparent(OBJECT(vf));
232
+ object_unref(OBJECT(vf));
233
+ }
234
+ g_free(dev->exp.sriov_pf.vf);
235
+ dev->exp.sriov_pf.vf = NULL;
236
+}
237
238
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
239
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
240
const char *vfname, uint16_t vf_dev_id,
241
uint16_t init_vfs, uint16_t total_vfs,
242
- uint16_t vf_offset, uint16_t vf_stride)
243
+ uint16_t vf_offset, uint16_t vf_stride,
244
+ Error **errp)
245
{
246
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
247
+ int32_t devfn = dev->devfn + vf_offset;
248
uint8_t *cfg = dev->config + offset;
249
uint8_t *wmask;
250
251
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
252
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
253
dev->exp.sriov_cap = offset;
254
dev->exp.sriov_pf.num_vfs = 0;
255
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
256
dev->exp.sriov_pf.vf = NULL;
257
258
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
259
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
260
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
261
262
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
263
+
264
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
265
+ assert(dev->exp.sriov_pf.vf);
266
+
267
+ for (uint16_t i = 0; i < total_vfs; i++) {
268
+ PCIDevice *vf = pci_new(devfn, vfname);
269
+ vf->exp.sriov_vf.pf = dev;
270
+ vf->exp.sriov_vf.vf_number = i;
271
+
272
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
273
+ unrealize_vfs(dev, i);
274
+ return false;
275
+ }
276
+
277
+ /* set vid/did according to sr/iov spec - they are not used */
278
+ pci_config_set_vendor_id(vf->config, 0xffff);
279
+ pci_config_set_device_id(vf->config, 0xffff);
280
+
281
+ dev->exp.sriov_pf.vf[i] = vf;
282
+ devfn += vf_stride;
283
+ }
64
+ }
284
+
65
+
285
+ return true;
66
for (i = 0; i < pf->len; i++) {
67
vfs[i]->exp.sriov_vf.pf = dev;
68
vfs[i]->exp.sriov_vf.vf_number = i;
69
@@ -XXX,XX +XXX,XX @@ int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
70
}
71
}
72
73
- return PCI_EXT_CAP_SRIOV_SIZEOF;
74
+ return size;
286
}
75
}
287
76
288
void pcie_sriov_pf_exit(PCIDevice *dev)
77
bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
289
{
78
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
290
- unregister_vfs(dev);
79
index XXXXXXX..XXXXXXX 100644
291
- g_free((char *)dev->exp.sriov_pf.vfname);
80
--- a/hw/virtio/virtio-pci.c
292
- dev->exp.sriov_pf.vfname = NULL;
81
+++ b/hw/virtio/virtio-pci.c
293
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
82
@@ -XXX,XX +XXX,XX @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
294
+
83
PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
295
+ unrealize_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
84
}
296
}
85
297
86
- res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev,
298
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
87
- proxy->last_pcie_cap_offset,
299
@@ -XXX,XX +XXX,XX @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
88
- errp);
89
- if (res > 0) {
90
- proxy->last_pcie_cap_offset += res;
91
- virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
92
+ if (pci_is_vf(&proxy->pci_dev)) {
93
+ pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset);
94
+ proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF;
95
+ } else {
96
+ res = pcie_sriov_pf_init_from_user_created_vfs(
97
+ &proxy->pci_dev, proxy->last_pcie_cap_offset, errp);
98
+ if (res > 0) {
99
+ proxy->last_pcie_cap_offset += res;
100
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
101
+ }
300
}
102
}
301
}
103
}
302
104
303
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
304
- uint16_t vf_num)
305
-{
306
- PCIDevice *dev = pci_new(devfn, name);
307
- dev->exp.sriov_vf.pf = pf;
308
- dev->exp.sriov_vf.vf_number = vf_num;
309
- PCIBus *bus = pci_get_bus(pf);
310
- Error *local_err = NULL;
311
-
312
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
313
- if (local_err) {
314
- error_report_err(local_err);
315
- return NULL;
316
- }
317
-
318
- /* set vid/did according to sr/iov spec - they are not used */
319
- pci_config_set_vendor_id(dev->config, 0xffff);
320
- pci_config_set_device_id(dev->config, 0xffff);
321
-
322
- return dev;
323
-}
324
-
325
static void register_vfs(PCIDevice *dev)
326
{
327
uint16_t num_vfs;
328
uint16_t i;
329
uint16_t sriov_cap = dev->exp.sriov_cap;
330
- uint16_t vf_offset =
331
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
332
- uint16_t vf_stride =
333
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
334
- int32_t devfn = dev->devfn + vf_offset;
335
336
assert(sriov_cap > 0);
337
num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
338
-
339
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
340
- assert(dev->exp.sriov_pf.vf);
341
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
342
+ return;
343
+ }
344
345
trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
346
PCI_FUNC(dev->devfn), num_vfs);
347
for (i = 0; i < num_vfs; i++) {
348
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
349
- dev->exp.sriov_pf.vfname, i);
350
- if (!dev->exp.sriov_pf.vf[i]) {
351
- num_vfs = i;
352
- break;
353
- }
354
- devfn += vf_stride;
355
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
356
}
357
dev->exp.sriov_pf.num_vfs = num_vfs;
358
}
359
@@ -XXX,XX +XXX,XX @@ static void unregister_vfs(PCIDevice *dev)
360
trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
361
PCI_FUNC(dev->devfn), num_vfs);
362
for (i = 0; i < num_vfs; i++) {
363
- Error *err = NULL;
364
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
365
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
366
- error_reportf_err(err, "Failed to unplug: ");
367
- }
368
- object_unparent(OBJECT(vf));
369
- object_unref(OBJECT(vf));
370
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
371
}
372
- g_free(dev->exp.sriov_pf.vf);
373
- dev->exp.sriov_pf.vf = NULL;
374
dev->exp.sriov_pf.num_vfs = 0;
375
pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
376
}
377
105
378
--
106
--
379
2.43.0
107
2.48.1
diff view generated by jsdifflib