01: allocate one device table per PCI segment 02: allow callers to request allocate_buffer() to skip its memset() 03: pre-fill all DTEs right after table allocation Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Having a single device table for all segments can't possibly be right. (Even worse, the symbol wasn't static despite being used in just one source file.) Attach the device tables to their respective IVRS mapping ones. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Paul Durrant <paul.durrant@citrix.com> --- v6: New. --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -39,7 +39,6 @@ unsigned int __read_mostly ivrs_bdf_entr u8 __read_mostly ivhd_type; static struct radix_tree_root ivrs_maps; LIST_HEAD_READ_MOSTLY(amd_iommu_head); -struct table_struct device_table; bool_t iommuv2_enabled; static bool iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask) @@ -989,6 +988,12 @@ static void disable_iommu(struct amd_iom spin_unlock_irqrestore(&iommu->lock, flags); } +static unsigned int __init dt_alloc_size(void) +{ + return PAGE_SIZE << get_order_from_bytes(ivrs_bdf_entries * + IOMMU_DEV_TABLE_ENTRY_SIZE); +} + static void __init deallocate_buffer(void *buf, uint32_t sz) { int order = 0; @@ -999,12 +1004,6 @@ static void __init deallocate_buffer(voi } } -static void __init deallocate_device_table(struct table_struct *table) -{ - deallocate_buffer(table->buffer, table->alloc_size); - table->buffer = NULL; -} - static void __init deallocate_ring_buffer(struct ring_buffer *ring_buf) { deallocate_buffer(ring_buf->buffer, ring_buf->alloc_size); @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); } +/* + * Within ivrs_mappings[] we allocate an extra array element to store + * - segment number, + * - device table. + */ +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table + +static void __init free_ivrs_mapping(void *ptr) +{ + const struct ivrs_mappings *ivrs_mappings = ptr; + + if ( IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) + deallocate_buffer(IVRS_MAPPINGS_DEVTAB(ivrs_mappings), + dt_alloc_size()); + + xfree(ptr); +} + static int __init amd_iommu_init_one(struct amd_iommu *iommu, bool intr) { + const struct ivrs_mappings *ivrs_mappings; + if ( allocate_cmd_buffer(iommu) == NULL ) goto error_out; @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str if ( intr && !set_iommu_interrupt_handler(iommu) ) goto error_out; - /* To make sure that device_table.buffer has been successfully allocated */ - if ( device_table.buffer == NULL ) + /* Make sure that the device table has been successfully allocated. */ + ivrs_mappings = get_ivrs_mappings(iommu->seg); + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) goto error_out; - iommu->dev_table.alloc_size = device_table.alloc_size; - iommu->dev_table.entries = device_table.entries; - iommu->dev_table.buffer = device_table.buffer; + iommu->dev_table.alloc_size = dt_alloc_size(); + iommu->dev_table.entries = iommu->dev_table.alloc_size / + IOMMU_DEV_TABLE_ENTRY_SIZE; + iommu->dev_table.buffer = IVRS_MAPPINGS_DEVTAB(ivrs_mappings); enable_iommu(iommu); printk("AMD-Vi: IOMMU %d Enabled.\n", nr_amd_iommus ); @@ -1135,11 +1157,8 @@ static void __init amd_iommu_init_cleanu xfree(iommu); } - /* free device table */ - deallocate_device_table(&device_table); - - /* free ivrs_mappings[] */ - radix_tree_destroy(&ivrs_maps, xfree); + /* Free ivrs_mappings[] and their device tables. */ + radix_tree_destroy(&ivrs_maps, free_ivrs_mapping); iommu_enabled = 0; iommu_hwdom_passthrough = false; @@ -1147,12 +1166,6 @@ static void __init amd_iommu_init_cleanu iommuv2_enabled = 0; } -/* - * We allocate an extra array element to store the segment number - * (and in the future perhaps other global information). - */ -#define IVRS_MAPPINGS_SEG(m) m[ivrs_bdf_entries].dte_requestor_id - struct ivrs_mappings *get_ivrs_mappings(u16 seg) { return radix_tree_lookup(&ivrs_maps, seg); @@ -1235,24 +1248,18 @@ static int __init alloc_ivrs_mappings(u1 static int __init amd_iommu_setup_device_table( u16 seg, struct ivrs_mappings *ivrs_mappings) { + struct amd_iommu_dte *dt = IVRS_MAPPINGS_DEVTAB(ivrs_mappings); unsigned int bdf; BUG_ON( (ivrs_bdf_entries == 0) ); - if ( !device_table.buffer ) + if ( !dt ) { /* allocate 'device table' on a 4K boundary */ - device_table.alloc_size = PAGE_SIZE << - get_order_from_bytes( - PAGE_ALIGN(ivrs_bdf_entries * - IOMMU_DEV_TABLE_ENTRY_SIZE)); - device_table.entries = device_table.alloc_size / - IOMMU_DEV_TABLE_ENTRY_SIZE; - - device_table.buffer = allocate_buffer(device_table.alloc_size, - "Device Table"); + dt = IVRS_MAPPINGS_DEVTAB(ivrs_mappings) = + allocate_buffer(dt_alloc_size(), "Device Table"); } - if ( !device_table.buffer ) + if ( !dt ) return -ENOMEM; /* Add device table entries */ @@ -1260,12 +1267,10 @@ static int __init amd_iommu_setup_device { if ( ivrs_mappings[bdf].valid ) { - void *dte; const struct pci_dev *pdev = NULL; /* add device table entry */ - dte = device_table.buffer + (bdf * IOMMU_DEV_TABLE_ENTRY_SIZE); - iommu_dte_add_device_entry(dte, &ivrs_mappings[bdf]); + iommu_dte_add_device_entry(&dt[bdf], &ivrs_mappings[bdf]); if ( iommu_intremap && ivrs_mappings[bdf].dte_requestor_id == bdf && @@ -1308,7 +1313,7 @@ static int __init amd_iommu_setup_device } amd_iommu_set_intremap_table( - dte, ivrs_mappings[bdf].intremap_table, + &dt[bdf], ivrs_mappings[bdf].intremap_table, ivrs_mappings[bdf].iommu, iommu_intremap); } } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 26/09/2019 15:28, Jan Beulich wrote: > @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st > IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); > } > > +/* > + * Within ivrs_mappings[] we allocate an extra array element to store > + * - segment number, > + * - device table. > + */ > +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id > +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table > + > +static void __init free_ivrs_mapping(void *ptr) > +{ > + const struct ivrs_mappings *ivrs_mappings = ptr; How absolutely certain are we that ptr will never be NULL? It might be better to rename this to radix_tree_free_ivrs_mappings() to make it clear who calls it, and also provide a hint as to why the parameter is void. > + > + if ( IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) > + deallocate_buffer(IVRS_MAPPINGS_DEVTAB(ivrs_mappings), > + dt_alloc_size()); > + > + xfree(ptr); > +} > + > static int __init amd_iommu_init_one(struct amd_iommu *iommu, bool intr) > { > + const struct ivrs_mappings *ivrs_mappings; > + > if ( allocate_cmd_buffer(iommu) == NULL ) > goto error_out; > > @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str > if ( intr && !set_iommu_interrupt_handler(iommu) ) > goto error_out; > > - /* To make sure that device_table.buffer has been successfully allocated */ > - if ( device_table.buffer == NULL ) > + /* Make sure that the device table has been successfully allocated. */ > + ivrs_mappings = get_ivrs_mappings(iommu->seg); > + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) This is still going to crash with a NULL pointer deference in the case described by the comment. (Then again, it may not crash, and hit userspace at the 64M mark.) You absolutely need to check ivrs_mappings being non NULL before using IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 04.10.2019 15:18, Andrew Cooper wrote: > On 26/09/2019 15:28, Jan Beulich wrote: >> @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st >> IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); >> } >> >> +/* >> + * Within ivrs_mappings[] we allocate an extra array element to store >> + * - segment number, >> + * - device table. >> + */ >> +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id >> +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table >> + >> +static void __init free_ivrs_mapping(void *ptr) >> +{ >> + const struct ivrs_mappings *ivrs_mappings = ptr; > > How absolutely certain are we that ptr will never be NULL? As certain as we can be by never installing a NULL pointer into the radix tree, and by observing that neither radix_tree_destroy() nor radix_tree_node_destroy() would ever call the callback for a NULL node. > It might be better to rename this to radix_tree_free_ivrs_mappings() to > make it clear who calls it, and also provide a hint as to why the > parameter is void. I'm not happy to add a radix_tree_ prefix; I'd be fine with adding e.g. do_ instead, in case this provides enough of a hint for your taste that this is actually a callback function. >> @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str >> if ( intr && !set_iommu_interrupt_handler(iommu) ) >> goto error_out; >> >> - /* To make sure that device_table.buffer has been successfully allocated */ >> - if ( device_table.buffer == NULL ) >> + /* Make sure that the device table has been successfully allocated. */ >> + ivrs_mappings = get_ivrs_mappings(iommu->seg); >> + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) > > This is still going to crash with a NULL pointer deference in the case > described by the comment. (Then again, it may not crash, and hit > userspace at the 64M mark.) > > You absolutely need to check ivrs_mappings being non NULL before using > IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. I can only repeat what I've said in reply to your respective v6 remark: We won't come here for an IOMMU which didn't have its ivrs_mappings successfully allocated. You also seem to be mixing up this and the device table allocation - the comment refers to the latter, while your NULL deref concern is about the former. (If you go through the code you'll find that we have numerous other places utilizing the fact that get_ivrs_mappings() can't fail in cases like the one above.) Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 04/10/2019 14:30, Jan Beulich wrote: > On 04.10.2019 15:18, Andrew Cooper wrote: >> On 26/09/2019 15:28, Jan Beulich wrote: >>> @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st >>> IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); >>> } >>> >>> +/* >>> + * Within ivrs_mappings[] we allocate an extra array element to store >>> + * - segment number, >>> + * - device table. >>> + */ >>> +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id >>> +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table >>> + >>> +static void __init free_ivrs_mapping(void *ptr) >>> +{ >>> + const struct ivrs_mappings *ivrs_mappings = ptr; >> How absolutely certain are we that ptr will never be NULL? > As certain as we can be by never installing a NULL pointer into the > radix tree, and by observing that neither radix_tree_destroy() nor > radix_tree_node_destroy() would ever call the callback for a NULL > node. > >> It might be better to rename this to radix_tree_free_ivrs_mappings() to >> make it clear who calls it, and also provide a hint as to why the >> parameter is void. > I'm not happy to add a radix_tree_ prefix; I'd be fine with adding > e.g. do_ instead, in case this provides enough of a hint for your > taste that this is actually a callback function. How about a _callback() suffix? I'm looking to make it obvious that you code shouldn't simply call it directly. A "do_" prefix, in particular, provides no useful information to the reader. >>> @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str >>> if ( intr && !set_iommu_interrupt_handler(iommu) ) >>> goto error_out; >>> >>> - /* To make sure that device_table.buffer has been successfully allocated */ >>> - if ( device_table.buffer == NULL ) >>> + /* Make sure that the device table has been successfully allocated. */ >>> + ivrs_mappings = get_ivrs_mappings(iommu->seg); >>> + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) >> This is still going to crash with a NULL pointer deference in the case >> described by the comment. (Then again, it may not crash, and hit >> userspace at the 64M mark.) >> >> You absolutely need to check ivrs_mappings being non NULL before using >> IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. > I can only repeat what I've said in reply to your respective v6 remark: > We won't come here for an IOMMU which didn't have its ivrs_mappings > successfully allocated. Right, but to a first approximation, I don't care. I can picture exactly what Coverity will say about this, in that radix_tree_lookup() may return NULL, and it is used here unconditionally where in most other contexts, the pointer gets checked before use. > You also seem to be mixing up this and the > device table allocation - the comment refers to the latter, while your > NULL deref concern is about the former. (If you go through the code > you'll find that we have numerous other places utilizing the fact that > get_ivrs_mappings() can't fail in cases like the one above.) The existing code being terrible isn't a reasonable justification for adding to the mess. It appears we have: 1x assert not null 14x blind use 3x check which isn't a great statement about the quality of the code. Seeing as we are pushed to the deadline for 4.13, begrudgingly A-by (preferably with the _callback() suffix), but I'm still not happy with the overall quality of the code. At least it isn't getting substantially worse as a consequence here. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 04.10.2019 19:28, Andrew Cooper wrote: > On 04/10/2019 14:30, Jan Beulich wrote: >> On 04.10.2019 15:18, Andrew Cooper wrote: >>> On 26/09/2019 15:28, Jan Beulich wrote: >>>> @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st >>>> IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); >>>> } >>>> >>>> +/* >>>> + * Within ivrs_mappings[] we allocate an extra array element to store >>>> + * - segment number, >>>> + * - device table. >>>> + */ >>>> +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id >>>> +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table >>>> + >>>> +static void __init free_ivrs_mapping(void *ptr) >>>> +{ >>>> + const struct ivrs_mappings *ivrs_mappings = ptr; >>> How absolutely certain are we that ptr will never be NULL? >> As certain as we can be by never installing a NULL pointer into the >> radix tree, and by observing that neither radix_tree_destroy() nor >> radix_tree_node_destroy() would ever call the callback for a NULL >> node. >> >>> It might be better to rename this to radix_tree_free_ivrs_mappings() to >>> make it clear who calls it, and also provide a hint as to why the >>> parameter is void. >> I'm not happy to add a radix_tree_ prefix; I'd be fine with adding >> e.g. do_ instead, in case this provides enough of a hint for your >> taste that this is actually a callback function. > > How about a _callback() suffix? I'm looking to make it obvious that you > code shouldn't simply call it directly. As indicated I've done this. >>>> @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str >>>> if ( intr && !set_iommu_interrupt_handler(iommu) ) >>>> goto error_out; >>>> >>>> - /* To make sure that device_table.buffer has been successfully allocated */ >>>> - if ( device_table.buffer == NULL ) >>>> + /* Make sure that the device table has been successfully allocated. */ >>>> + ivrs_mappings = get_ivrs_mappings(iommu->seg); >>>> + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) >>> This is still going to crash with a NULL pointer deference in the case >>> described by the comment. (Then again, it may not crash, and hit >>> userspace at the 64M mark.) >>> >>> You absolutely need to check ivrs_mappings being non NULL before using >>> IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. >> I can only repeat what I've said in reply to your respective v6 remark: >> We won't come here for an IOMMU which didn't have its ivrs_mappings >> successfully allocated. > > Right, but to a first approximation, I don't care. I can picture > exactly what Coverity will say about this, in that radix_tree_lookup() > may return NULL, and it is used here unconditionally where in most other > contexts, the pointer gets checked before use. Just one more word on top of the prior discussion: Would you also insist on an explicit check here (when ... >> You also seem to be mixing up this and the >> device table allocation - the comment refers to the latter, while your >> NULL deref concern is about the former. (If you go through the code >> you'll find that we have numerous other places utilizing the fact that >> get_ivrs_mappings() can't fail in cases like the one above.) > > The existing code being terrible isn't a reasonable justification for > adding to the mess. > > It appears we have: > > 1x assert not null > 14x blind use > 3x check ... none exists on basically all similar paths elsewhere) if the IVRS mappings array hung off of struct amd_iommu as a plain pointer, rather than being taken from a guaranteed populated (by this point in time) radix tree slot? > Seeing as we are pushed to the deadline for 4.13, begrudgingly A-by > (preferably with the _callback() suffix), but I'm still not happy with > the overall quality of the code. At least it isn't getting > substantially worse as a consequence here. Juergen, since I didn't hear back from Andrew, would you be willing to give a release ack on this series, as at this point I don't see any good alternative to using the "begrudgingly A-by" give above? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 10.10.19 07:57, Jan Beulich wrote: > On 04.10.2019 19:28, Andrew Cooper wrote: >> On 04/10/2019 14:30, Jan Beulich wrote: >>> On 04.10.2019 15:18, Andrew Cooper wrote: >>>> On 26/09/2019 15:28, Jan Beulich wrote: >>>>> @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st >>>>> IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); >>>>> } >>>>> >>>>> +/* >>>>> + * Within ivrs_mappings[] we allocate an extra array element to store >>>>> + * - segment number, >>>>> + * - device table. >>>>> + */ >>>>> +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id >>>>> +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table >>>>> + >>>>> +static void __init free_ivrs_mapping(void *ptr) >>>>> +{ >>>>> + const struct ivrs_mappings *ivrs_mappings = ptr; >>>> How absolutely certain are we that ptr will never be NULL? >>> As certain as we can be by never installing a NULL pointer into the >>> radix tree, and by observing that neither radix_tree_destroy() nor >>> radix_tree_node_destroy() would ever call the callback for a NULL >>> node. >>> >>>> It might be better to rename this to radix_tree_free_ivrs_mappings() to >>>> make it clear who calls it, and also provide a hint as to why the >>>> parameter is void. >>> I'm not happy to add a radix_tree_ prefix; I'd be fine with adding >>> e.g. do_ instead, in case this provides enough of a hint for your >>> taste that this is actually a callback function. >> >> How about a _callback() suffix? I'm looking to make it obvious that you >> code shouldn't simply call it directly. > > As indicated I've done this. > >>>>> @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str >>>>> if ( intr && !set_iommu_interrupt_handler(iommu) ) >>>>> goto error_out; >>>>> >>>>> - /* To make sure that device_table.buffer has been successfully allocated */ >>>>> - if ( device_table.buffer == NULL ) >>>>> + /* Make sure that the device table has been successfully allocated. */ >>>>> + ivrs_mappings = get_ivrs_mappings(iommu->seg); >>>>> + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) >>>> This is still going to crash with a NULL pointer deference in the case >>>> described by the comment. (Then again, it may not crash, and hit >>>> userspace at the 64M mark.) >>>> >>>> You absolutely need to check ivrs_mappings being non NULL before using >>>> IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. >>> I can only repeat what I've said in reply to your respective v6 remark: >>> We won't come here for an IOMMU which didn't have its ivrs_mappings >>> successfully allocated. >> >> Right, but to a first approximation, I don't care. I can picture >> exactly what Coverity will say about this, in that radix_tree_lookup() >> may return NULL, and it is used here unconditionally where in most other >> contexts, the pointer gets checked before use. > > Just one more word on top of the prior discussion: Would you also > insist on an explicit check here (when ... > >>> You also seem to be mixing up this and the >>> device table allocation - the comment refers to the latter, while your >>> NULL deref concern is about the former. (If you go through the code >>> you'll find that we have numerous other places utilizing the fact that >>> get_ivrs_mappings() can't fail in cases like the one above.) >> >> The existing code being terrible isn't a reasonable justification for >> adding to the mess. >> >> It appears we have: >> >> 1x assert not null >> 14x blind use >> 3x check > > ... none exists on basically all similar paths elsewhere) if the > IVRS mappings array hung off of struct amd_iommu as a plain pointer, > rather than being taken from a guaranteed populated (by this point > in time) radix tree slot? > >> Seeing as we are pushed to the deadline for 4.13, begrudgingly A-by >> (preferably with the _callback() suffix), but I'm still not happy with >> the overall quality of the code. At least it isn't getting >> substantially worse as a consequence here. > > Juergen, since I didn't hear back from Andrew, would you be willing > to give a release ack on this series, as at this point I don't see > any good alternative to using the "begrudgingly A-by" give above? Release-acked-by: Juergen Gross <jgross@suse.com> Juergen _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 04.10.2019 19:28, Andrew Cooper wrote: > On 04/10/2019 14:30, Jan Beulich wrote: >> On 04.10.2019 15:18, Andrew Cooper wrote: >>> On 26/09/2019 15:28, Jan Beulich wrote: >>>> @@ -1068,8 +1067,29 @@ static void * __init allocate_ppr_log(st >>>> IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); >>>> } >>>> >>>> +/* >>>> + * Within ivrs_mappings[] we allocate an extra array element to store >>>> + * - segment number, >>>> + * - device table. >>>> + */ >>>> +#define IVRS_MAPPINGS_SEG(m) (m)[ivrs_bdf_entries].dte_requestor_id >>>> +#define IVRS_MAPPINGS_DEVTAB(m) (m)[ivrs_bdf_entries].intremap_table >>>> + >>>> +static void __init free_ivrs_mapping(void *ptr) >>>> +{ >>>> + const struct ivrs_mappings *ivrs_mappings = ptr; >>> How absolutely certain are we that ptr will never be NULL? >> As certain as we can be by never installing a NULL pointer into the >> radix tree, and by observing that neither radix_tree_destroy() nor >> radix_tree_node_destroy() would ever call the callback for a NULL >> node. >> >>> It might be better to rename this to radix_tree_free_ivrs_mappings() to >>> make it clear who calls it, and also provide a hint as to why the >>> parameter is void. >> I'm not happy to add a radix_tree_ prefix; I'd be fine with adding >> e.g. do_ instead, in case this provides enough of a hint for your >> taste that this is actually a callback function. > > How about a _callback() suffix? I'm looking to make it obvious that you > code shouldn't simply call it directly. Well, okay, done. > A "do_" prefix, in particular, provides no useful information to the reader. Depends, I guess: There are a couple of places where we already use such naming. People aware of this may make this implication. >>>> @@ -1082,13 +1102,15 @@ static int __init amd_iommu_init_one(str >>>> if ( intr && !set_iommu_interrupt_handler(iommu) ) >>>> goto error_out; >>>> >>>> - /* To make sure that device_table.buffer has been successfully allocated */ >>>> - if ( device_table.buffer == NULL ) >>>> + /* Make sure that the device table has been successfully allocated. */ >>>> + ivrs_mappings = get_ivrs_mappings(iommu->seg); >>>> + if ( !IVRS_MAPPINGS_DEVTAB(ivrs_mappings) ) >>> This is still going to crash with a NULL pointer deference in the case >>> described by the comment. (Then again, it may not crash, and hit >>> userspace at the 64M mark.) >>> >>> You absolutely need to check ivrs_mappings being non NULL before using >>> IVRS_MAPPINGS_DEVTAB(), or perhaps roll the check into the macro. >> I can only repeat what I've said in reply to your respective v6 remark: >> We won't come here for an IOMMU which didn't have its ivrs_mappings >> successfully allocated. > > Right, but to a first approximation, I don't care. I can picture > exactly what Coverity will say about this, in that radix_tree_lookup() > may return NULL, and it is used here unconditionally where in most other > contexts, the pointer gets checked before use. Except that, as per your stats below, it's not anywhere near "most". >> You also seem to be mixing up this and the >> device table allocation - the comment refers to the latter, while your >> NULL deref concern is about the former. (If you go through the code >> you'll find that we have numerous other places utilizing the fact that >> get_ivrs_mappings() can't fail in cases like the one above.) > > The existing code being terrible isn't a reasonable justification for > adding to the mess. > > It appears we have: > > 1x assert not null > 14x blind use > 3x check > > which isn't a great statement about the quality of the code. If any of the "blind" uses were indeed on a path where this could in theory be NULL, I'd agree. The patch we're discussing here definitely doesn't fall into this category. > Seeing as we are pushed to the deadline for 4.13, begrudgingly A-by > (preferably with the _callback() suffix), but I'm still not happy with > the overall quality of the code. At least it isn't getting > substantially worse as a consequence here. I appreciate the ack, but I think I'd prefer to not make use of it if at all possible under these conditions. Instead I'd like us to reach some common ground here. Seeing that we're past the deadline already, Jürgen's release ack will now be needed anyway. Jürgen - would you be fine with settling on this taking a few more days, and then still allow in this series? Or is trying to soon find a resolution here pointless as you'd rather not see this go in anymore? As to what (if anything) to change - I'd be fine with adding an assertion, but I don't think that would buy us much (considering non-debug builds). What I'm not happy about is adding checks just for the sake of doing so. Applying the underlying thinking of "don't trust ourselves" to the entire code base would imo result in severe crippling of the sources (nevertheless I agree that there are cases, when connections are less obvious, where adding extra checks is actually useful). As to the stats you provide and your implication on code quality: What's wrong with code e.g. utilizing the knowledge that once it holds a struct amd_iommu in its hands, it can rely on there being a respective IVRS mappings entry? The cases where the return value of get_ivrs_mappings() gets checked are - to determine whether the mapping needs allocating (alloc_ivrs_mappings()), - to determine whether there's an IOMMU for a device in the first place (find_iommu_for_device()), - redundant verification after an IOMMU has already been determined for a device (amd_iommu_add_device()). I.e. the first two are justified, and to arrange for a consistent code base the 3rd one should be considered to drop again (I think this is an instance I added recently, not having realized (yet) that the implication is being utilized everywhere else. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 07.10.19 12:03, Jan Beulich wrote: > I appreciate the ack, but I think I'd prefer to not make use of it > if at all possible under these conditions. Instead I'd like us to > reach some common ground here. Seeing that we're past the deadline > already, Jürgen's release ack will now be needed anyway. Jürgen - > would you be fine with settling on this taking a few more days, > and then still allow in this series? Or is trying to soon find a > resolution here pointless as you'd rather not see this go in > anymore? As it isn't a completely trivial patch series I'd like to know what the gain would be: is it just a "nice to have", does it solve a theoretical (not to be expected) problem, or do you think it will be needed to be backported if I say "no"? Juergen _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 07.10.2019 12:19, Jürgen Groß wrote: > On 07.10.19 12:03, Jan Beulich wrote: >> I appreciate the ack, but I think I'd prefer to not make use of it >> if at all possible under these conditions. Instead I'd like us to >> reach some common ground here. Seeing that we're past the deadline >> already, Jürgen's release ack will now be needed anyway. Jürgen - >> would you be fine with settling on this taking a few more days, >> and then still allow in this series? Or is trying to soon find a >> resolution here pointless as you'd rather not see this go in >> anymore? > > As it isn't a completely trivial patch series I'd like to know what > the gain would be: is it just a "nice to have", does it solve a > theoretical (not to be expected) problem, or do you think it will > be needed to be backported if I say "no"? The 3rd patch in this series is what is really wanted, to close a previously just theoretical but (I think) now in principle possible gap with device table initialization, potentially allowing untranslated DMA or interrupt requests to make it through when not so intended. If this was to be backported, I think I'd try re-basing patches 2 and 3 onto a tree without patch 1, but doing so for master doesn't look (to me) to be a very reasonable step, seeing that patch 1 had been there first. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 07.10.19 12:49, Jan Beulich wrote: > On 07.10.2019 12:19, Jürgen Groß wrote: >> On 07.10.19 12:03, Jan Beulich wrote: >>> I appreciate the ack, but I think I'd prefer to not make use of it >>> if at all possible under these conditions. Instead I'd like us to >>> reach some common ground here. Seeing that we're past the deadline >>> already, Jürgen's release ack will now be needed anyway. Jürgen - >>> would you be fine with settling on this taking a few more days, >>> and then still allow in this series? Or is trying to soon find a >>> resolution here pointless as you'd rather not see this go in >>> anymore? >> >> As it isn't a completely trivial patch series I'd like to know what >> the gain would be: is it just a "nice to have", does it solve a >> theoretical (not to be expected) problem, or do you think it will >> be needed to be backported if I say "no"? > > The 3rd patch in this series is what is really wanted, to close > a previously just theoretical but (I think) now in principle > possible gap with device table initialization, potentially > allowing untranslated DMA or interrupt requests to make it > through when not so intended. If this was to be backported, I > think I'd try re-basing patches 2 and 3 onto a tree without > patch 1, but doing so for master doesn't look (to me) to be a > very reasonable step, seeing that patch 1 had been there first. Okay, thanks for the explanation. Needing some more days is fine for me, so trying to find a solution soon absolutely makes sense. :-) Juergen _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
The command ring buffer doesn't need clearing up front in any event. Subsequently we'll also want to avoid clearing the device tables. While playing with functions signatures replace undue use of fixed width types at the same time, and extend this to deallocate_buffer() as well. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v7: New. --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -994,12 +994,12 @@ static unsigned int __init dt_alloc_size IOMMU_DEV_TABLE_ENTRY_SIZE); } -static void __init deallocate_buffer(void *buf, uint32_t sz) +static void __init deallocate_buffer(void *buf, unsigned long sz) { - int order = 0; if ( buf ) { - order = get_order_from_bytes(sz); + unsigned int order = get_order_from_bytes(sz); + __free_amd_iommu_tables(buf, order); } } @@ -1012,10 +1012,11 @@ static void __init deallocate_ring_buffe ring_buf->tail = 0; } -static void * __init allocate_buffer(uint32_t alloc_size, const char *name) +static void *__init allocate_buffer(unsigned long alloc_size, + const char *name, bool clear) { - void * buffer; - int order = get_order_from_bytes(alloc_size); + void *buffer; + unsigned int order = get_order_from_bytes(alloc_size); buffer = __alloc_amd_iommu_tables(order); @@ -1025,13 +1026,16 @@ static void * __init allocate_buffer(uin return NULL; } - memset(buffer, 0, PAGE_SIZE * (1UL << order)); + if ( clear ) + memset(buffer, 0, PAGE_SIZE << order); + return buffer; } -static void * __init allocate_ring_buffer(struct ring_buffer *ring_buf, - uint32_t entry_size, - uint64_t entries, const char *name) +static void *__init allocate_ring_buffer(struct ring_buffer *ring_buf, + unsigned int entry_size, + unsigned long entries, + const char *name, bool clear) { ring_buf->head = 0; ring_buf->tail = 0; @@ -1041,7 +1045,8 @@ static void * __init allocate_ring_buffe ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries * entry_size); ring_buf->entries = ring_buf->alloc_size / entry_size; - ring_buf->buffer = allocate_buffer(ring_buf->alloc_size, name); + ring_buf->buffer = allocate_buffer(ring_buf->alloc_size, name, clear); + return ring_buf->buffer; } @@ -1050,21 +1055,23 @@ static void * __init allocate_cmd_buffer /* allocate 'command buffer' in power of 2 increments of 4K */ return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t), IOMMU_CMD_BUFFER_DEFAULT_ENTRIES, - "Command Buffer"); + "Command Buffer", false); } static void * __init allocate_event_log(struct amd_iommu *iommu) { /* allocate 'event log' in power of 2 increments of 4K */ return allocate_ring_buffer(&iommu->event_log, sizeof(event_entry_t), - IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); + IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log", + true); } static void * __init allocate_ppr_log(struct amd_iommu *iommu) { /* allocate 'ppr log' in power of 2 increments of 4K */ return allocate_ring_buffer(&iommu->ppr_log, sizeof(ppr_entry_t), - IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); + IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log", + true); } /* @@ -1257,7 +1264,7 @@ static int __init amd_iommu_setup_device { /* allocate 'device table' on a 4K boundary */ dt = IVRS_MAPPINGS_DEVTAB(ivrs_mappings) = - allocate_buffer(dt_alloc_size(), "Device Table"); + allocate_buffer(dt_alloc_size(), "Device Table", true); } if ( !dt ) return -ENOMEM; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 26/09/2019 15:29, Jan Beulich wrote: > The command ring buffer doesn't need clearing up front in any event. > Subsequently we'll also want to avoid clearing the device tables. > > While playing with functions signatures replace undue use of fixed width > types at the same time, and extend this to deallocate_buffer() as well. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v7: New. > > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -994,12 +994,12 @@ static unsigned int __init dt_alloc_size > IOMMU_DEV_TABLE_ENTRY_SIZE); > } > > -static void __init deallocate_buffer(void *buf, uint32_t sz) > +static void __init deallocate_buffer(void *buf, unsigned long sz) Probably best to use size_t here, being both shorter, and guaranteed not to require modification in the future. > { > - int order = 0; > if ( buf ) > { > - order = get_order_from_bytes(sz); > + unsigned int order = get_order_from_bytes(sz); > + > __free_amd_iommu_tables(buf, order); > } > } How about simply if ( buf ) __free_amd_iommu_tables(buf, get_order_from_bytes(sz)); which drops the order variable entirely? Ideally with both of these modifications, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> > @@ -1050,21 +1055,23 @@ static void * __init allocate_cmd_buffer > /* allocate 'command buffer' in power of 2 increments of 4K */ > return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t), > IOMMU_CMD_BUFFER_DEFAULT_ENTRIES, > - "Command Buffer"); > + "Command Buffer", false); > } > > static void * __init allocate_event_log(struct amd_iommu *iommu) > { > /* allocate 'event log' in power of 2 increments of 4K */ > return allocate_ring_buffer(&iommu->event_log, sizeof(event_entry_t), > - IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); > + IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log", > + true); Well - this means I've got yet another AMD IOMMU bugfix hiding somewhere in a branch. I could have sworn I posted it. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 04.10.2019 15:26, Andrew Cooper wrote: > On 26/09/2019 15:29, Jan Beulich wrote: >> The command ring buffer doesn't need clearing up front in any event. >> Subsequently we'll also want to avoid clearing the device tables. >> >> While playing with functions signatures replace undue use of fixed width >> types at the same time, and extend this to deallocate_buffer() as well. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> v7: New. >> >> --- a/xen/drivers/passthrough/amd/iommu_init.c >> +++ b/xen/drivers/passthrough/amd/iommu_init.c >> @@ -994,12 +994,12 @@ static unsigned int __init dt_alloc_size >> IOMMU_DEV_TABLE_ENTRY_SIZE); >> } >> >> -static void __init deallocate_buffer(void *buf, uint32_t sz) >> +static void __init deallocate_buffer(void *buf, unsigned long sz) > > Probably best to use size_t here, being both shorter, and guaranteed not > to require modification in the future. I'd prefer not to without other related code also getting switched from unsigned long to size_t. >> { >> - int order = 0; >> if ( buf ) >> { >> - order = get_order_from_bytes(sz); >> + unsigned int order = get_order_from_bytes(sz); >> + >> __free_amd_iommu_tables(buf, order); >> } >> } > > How about simply > > if ( buf ) > __free_amd_iommu_tables(buf, get_order_from_bytes(sz)); > > which drops the order variable entirely? Fine with me; I did actually consider doing so, but then decided against to stay closer to what the code looked like before. > Ideally with both of these modifications, Reviewed-by: Andrew Cooper > <andrew.cooper3@citrix.com> Thanks. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
Make sure we don't leave any DTEs unexpected requests through which would be passed through untranslated. Set V and IV right away (with all other fields left as zero), relying on the V and/or IV bits getting cleared only by amd_iommu_set_root_page_table() and amd_iommu_set_intremap_table() under special pass-through circumstances. Switch back to initial settings in amd_iommu_disable_domain_device(). Take the liberty and also make the latter function static, constifying its first parameter at the same time, at this occasion. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Paul Durrant <paul.durrant@citrix.com> --- v7: Avoid writing the DT twice during initial allocation. v6: New. --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -1262,12 +1262,28 @@ static int __init amd_iommu_setup_device if ( !dt ) { + unsigned int size = dt_alloc_size(); + /* allocate 'device table' on a 4K boundary */ dt = IVRS_MAPPINGS_DEVTAB(ivrs_mappings) = - allocate_buffer(dt_alloc_size(), "Device Table", true); + allocate_buffer(size, "Device Table", false); + if ( !dt ) + return -ENOMEM; + + /* + * Prefill every DTE such that all kinds of requests will get aborted. + * Besides the two bits set to true below this builds upon + * IOMMU_DEV_TABLE_SYS_MGT_DMA_ABORTED, + * IOMMU_DEV_TABLE_IO_CONTROL_ABORTED, as well as + * IOMMU_DEV_TABLE_INT_CONTROL_ABORTED all being zero, and us also + * wanting at least TV, GV, I, and EX set to false. + */ + for ( bdf = 0, size /= sizeof(*dt); bdf < size; ++bdf ) + dt[bdf] = (struct amd_iommu_dte){ + .v = true, + .iv = true, + }; } - if ( !dt ) - return -ENOMEM; /* Add device table entries */ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -267,9 +267,9 @@ static void __hwdom_init amd_iommu_hwdom setup_hwdom_pci_devices(d, amd_iommu_add_device); } -void amd_iommu_disable_domain_device(struct domain *domain, - struct amd_iommu *iommu, - u8 devfn, struct pci_dev *pdev) +static void amd_iommu_disable_domain_device(const struct domain *domain, + struct amd_iommu *iommu, + uint8_t devfn, struct pci_dev *pdev) { struct amd_iommu_dte *table, *dte; unsigned long flags; @@ -284,9 +284,21 @@ void amd_iommu_disable_domain_device(str spin_lock_irqsave(&iommu->lock, flags); if ( dte->tv || dte->v ) { + /* See the comment in amd_iommu_setup_device_table(). */ + dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED; + smp_wmb(); + dte->iv = true; dte->tv = false; - dte->v = false; + dte->gv = false; dte->i = false; + dte->ex = false; + dte->sa = false; + dte->se = false; + dte->sd = false; + dte->sys_mgt = IOMMU_DEV_TABLE_SYS_MGT_DMA_ABORTED; + dte->ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED; + smp_wmb(); + dte->v = true; amd_iommu_flush_device(iommu, req_id); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 26/09/2019 15:29, Jan Beulich wrote: > Make sure we don't leave any DTEs unexpected requests through which > would be passed through untranslated. Set V and IV right away (with > all other fields left as zero), relying on the V and/or IV bits > getting cleared only by amd_iommu_set_root_page_table() and > amd_iommu_set_intremap_table() under special pass-through circumstances. > Switch back to initial settings in amd_iommu_disable_domain_device(). > > Take the liberty and also make the latter function static, constifying > its first parameter at the same time, at this occasion. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > Reviewed-by: Paul Durrant <paul.durrant@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2024 Red Hat, Inc.