Today blkfront will trust the backend to send only sane response data.
In order to avoid privilege escalations or crashes in case of malicious
backends verify the data to be within expected limits. Especially make
sure that the response always references an outstanding request.
Introduce a new state of the ring BLKIF_STATE_ERROR which will be
switched to in case an inconsistency is being detected.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
drivers/block/xen-blkfront.c | 62 +++++++++++++++++++++++++++---------
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c6a05de4f15f..aa0f159829b4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -80,6 +80,7 @@ enum blkif_state {
BLKIF_STATE_DISCONNECTED,
BLKIF_STATE_CONNECTED,
BLKIF_STATE_SUSPENDED,
+ BLKIF_STATE_ERROR,
};
struct grant {
@@ -89,6 +90,7 @@ struct grant {
};
enum blk_req_status {
+ REQ_PROCESSING,
REQ_WAITING,
REQ_DONE,
REQ_ERROR,
@@ -543,7 +545,7 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
id = get_id_from_freelist(rinfo);
rinfo->shadow[id].request = req;
- rinfo->shadow[id].status = REQ_WAITING;
+ rinfo->shadow[id].status = REQ_PROCESSING;
rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
rinfo->shadow[id].req.u.rw.id = id;
@@ -572,6 +574,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
/* Copy the request to the ring page. */
*final_ring_req = *ring_req;
+ rinfo->shadow[id].status = REQ_WAITING;
return 0;
}
@@ -847,8 +850,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
/* Copy request(s) to the ring page. */
*final_ring_req = *ring_req;
- if (unlikely(require_extra_req))
+ rinfo->shadow[id].status = REQ_WAITING;
+ if (unlikely(require_extra_req)) {
*final_extra_ring_req = *extra_ring_req;
+ rinfo->shadow[extra_id].status = REQ_WAITING;
+ }
if (new_persistent_gnts)
gnttab_free_grant_references(setup.gref_head);
@@ -1420,8 +1426,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
static int blkif_get_final_status(enum blk_req_status s1,
enum blk_req_status s2)
{
- BUG_ON(s1 == REQ_WAITING);
- BUG_ON(s2 == REQ_WAITING);
+ BUG_ON(s1 < REQ_DONE);
+ BUG_ON(s2 < REQ_DONE);
if (s1 == REQ_ERROR || s2 == REQ_ERROR)
return BLKIF_RSP_ERROR;
@@ -1454,7 +1460,7 @@ static bool blkif_completion(unsigned long *id,
s->status = blkif_rsp_to_req_status(bret->status);
/* Wait the second response if not yet here. */
- if (s2->status == REQ_WAITING)
+ if (s2->status < REQ_DONE)
return false;
bret->status = blkif_get_final_status(s->status,
@@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
rp = rinfo->ring.sring->rsp_prod;
+ if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
+ pr_alert("%s: illegal number of responses %u\n",
+ info->gd->disk_name, rp - rinfo->ring.rsp_cons);
+ goto err;
+ }
rmb(); /* Ensure we see queued responses up to 'rp'. */
for (i = rinfo->ring.rsp_cons; i != rp; i++) {
unsigned long id;
+ unsigned int op;
RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
id = bret.id;
@@ -1588,14 +1600,28 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
* look in get_id_from_freelist.
*/
if (id >= BLK_RING_SIZE(info)) {
- WARN(1, "%s: response to %s has incorrect id (%ld)\n",
- info->gd->disk_name, op_name(bret.operation), id);
- /* We can't safely get the 'struct request' as
- * the id is busted. */
- continue;
+ pr_alert("%s: response has incorrect id (%ld)\n",
+ info->gd->disk_name, id);
+ goto err;
}
+ if (rinfo->shadow[id].status != REQ_WAITING) {
+ pr_alert("%s: response references no pending request\n",
+ info->gd->disk_name);
+ goto err;
+ }
+
+ rinfo->shadow[id].status = REQ_PROCESSING;
req = rinfo->shadow[id].request;
+ op = rinfo->shadow[id].req.operation;
+ if (op == BLKIF_OP_INDIRECT)
+ op = rinfo->shadow[id].req.u.indirect.indirect_op;
+ if (bret.operation != op) {
+ pr_alert("%s: response has wrong operation (%u instead of %u)\n",
+ info->gd->disk_name, bret.operation, op);
+ goto err;
+ }
+
if (bret.operation != BLKIF_OP_DISCARD) {
/*
* We may need to wait for an extra response if the
@@ -1620,7 +1646,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_DISCARD:
if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+
+ pr_warn_ratelimited("blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
@@ -1632,13 +1659,13 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ pr_warn_ratelimited("blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
}
if (unlikely(bret.status == BLKIF_RSP_ERROR &&
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+ pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
}
@@ -1653,8 +1680,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
if (unlikely(bret.status != BLKIF_RSP_OKAY))
- dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
- "request: %x\n", bret.status);
+ dev_dbg_ratelimited(&info->xbdev->dev,
+ "Bad return from blkdev data request: %x\n", bret.status);
break;
default:
@@ -1680,6 +1707,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return IRQ_HANDLED;
+
+ err:
+ info->connected = BLKIF_STATE_ERROR;
+ pr_alert("%s disabled for further use\n", info->gd->disk_name);
+ return IRQ_HANDLED;
}
--
2.26.2
On 13.05.2021 12:02, Juergen Gross wrote: > @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > spin_lock_irqsave(&rinfo->ring_lock, flags); > again: > rp = rinfo->ring.sring->rsp_prod; > + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { > + pr_alert("%s: illegal number of responses %u\n", > + info->gd->disk_name, rp - rinfo->ring.rsp_cons); > + goto err; > + } > rmb(); /* Ensure we see queued responses up to 'rp'. */ I think you want to insert after the barrier. > @@ -1680,6 +1707,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > spin_unlock_irqrestore(&rinfo->ring_lock, flags); > > return IRQ_HANDLED; > + > + err: > + info->connected = BLKIF_STATE_ERROR; > + pr_alert("%s disabled for further use\n", info->gd->disk_name); > + return IRQ_HANDLED; > } Am I understanding that a suspend (and then resume) can be used to recover from error state? If so - is this intentional? If so in turn, would it make sense to spell this out in the description? Jan
On 17.05.21 16:11, Jan Beulich wrote: > On 13.05.2021 12:02, Juergen Gross wrote: >> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >> spin_lock_irqsave(&rinfo->ring_lock, flags); >> again: >> rp = rinfo->ring.sring->rsp_prod; >> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >> + pr_alert("%s: illegal number of responses %u\n", >> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >> + goto err; >> + } >> rmb(); /* Ensure we see queued responses up to 'rp'. */ > > I think you want to insert after the barrier. Why? The relevant variable which is checked is "rp". The result of the check is in no way depending on the responses themselves. And any change of rsp_cons is protected by ring_lock, so there is no possibility of reading an old value here. > >> @@ -1680,6 +1707,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >> spin_unlock_irqrestore(&rinfo->ring_lock, flags); >> >> return IRQ_HANDLED; >> + >> + err: >> + info->connected = BLKIF_STATE_ERROR; >> + pr_alert("%s disabled for further use\n", info->gd->disk_name); >> + return IRQ_HANDLED; >> } > > Am I understanding that a suspend (and then resume) can be used to > recover from error state? If so - is this intentional? If so in turn, > would it make sense to spell this out in the description? I'd call it a nice side effect rather than intention. I can add a remark to the commit message if you want. Juergen
On 17.05.2021 16:23, Juergen Gross wrote: > On 17.05.21 16:11, Jan Beulich wrote: >> On 13.05.2021 12:02, Juergen Gross wrote: >>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>> again: >>> rp = rinfo->ring.sring->rsp_prod; >>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>> + pr_alert("%s: illegal number of responses %u\n", >>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>> + goto err; >>> + } >>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >> >> I think you want to insert after the barrier. > > Why? The relevant variable which is checked is "rp". The result of the > check is in no way depending on the responses themselves. And any change > of rsp_cons is protected by ring_lock, so there is no possibility of > reading an old value here. But this is a standard double read situation: You might check a value and then (via a separate read) use a different one past the barrier. Jan
On 17.05.21 17:12, Jan Beulich wrote: > On 17.05.2021 16:23, Juergen Gross wrote: >> On 17.05.21 16:11, Jan Beulich wrote: >>> On 13.05.2021 12:02, Juergen Gross wrote: >>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>> again: >>>> rp = rinfo->ring.sring->rsp_prod; >>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>> + pr_alert("%s: illegal number of responses %u\n", >>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>> + goto err; >>>> + } >>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>> >>> I think you want to insert after the barrier. >> >> Why? The relevant variable which is checked is "rp". The result of the >> check is in no way depending on the responses themselves. And any change >> of rsp_cons is protected by ring_lock, so there is no possibility of >> reading an old value here. > > But this is a standard double read situation: You might check a value > and then (via a separate read) use a different one past the barrier. Yes and no. rsp_cons should never be written by the other side, and additionally it would be read multiple times anyway. So if the other side is writing it, the write could always happen after the test and before the loop is started. This is no real issue here as the frontend would very soon stumble over an illegal response (either no request pending, or some other inconsistency). The test is meant to have a more detailed error message in case it hits. In the end it doesn't really matter, so I can change it. I just wanted to point out that IMO both variants are equally valid. Juergen
On 17.05.2021 17:22, Juergen Gross wrote: > On 17.05.21 17:12, Jan Beulich wrote: >> On 17.05.2021 16:23, Juergen Gross wrote: >>> On 17.05.21 16:11, Jan Beulich wrote: >>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>> again: >>>>> rp = rinfo->ring.sring->rsp_prod; >>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>> + goto err; >>>>> + } >>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>> >>>> I think you want to insert after the barrier. >>> >>> Why? The relevant variable which is checked is "rp". The result of the >>> check is in no way depending on the responses themselves. And any change >>> of rsp_cons is protected by ring_lock, so there is no possibility of >>> reading an old value here. >> >> But this is a standard double read situation: You might check a value >> and then (via a separate read) use a different one past the barrier. > > Yes and no. > > rsp_cons should never be written by the other side, and additionally > it would be read multiple times anyway. But I'm talking about rsp_prod, as that's what rp gets loaded from. Jan > So if the other side is writing it, the write could always happen after > the test and before the loop is started. This is no real issue here as > the frontend would very soon stumble over an illegal response (either > no request pending, or some other inconsistency). The test is meant to > have a more detailed error message in case it hits. > > In the end it doesn't really matter, so I can change it. I just wanted > to point out that IMO both variants are equally valid. > > > Juergen >
On 17.05.21 17:33, Jan Beulich wrote: > On 17.05.2021 17:22, Juergen Gross wrote: >> On 17.05.21 17:12, Jan Beulich wrote: >>> On 17.05.2021 16:23, Juergen Gross wrote: >>>> On 17.05.21 16:11, Jan Beulich wrote: >>>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>>> again: >>>>>> rp = rinfo->ring.sring->rsp_prod; >>>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>>> + goto err; >>>>>> + } >>>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>>> >>>>> I think you want to insert after the barrier. >>>> >>>> Why? The relevant variable which is checked is "rp". The result of the >>>> check is in no way depending on the responses themselves. And any change >>>> of rsp_cons is protected by ring_lock, so there is no possibility of >>>> reading an old value here. >>> >>> But this is a standard double read situation: You might check a value >>> and then (via a separate read) use a different one past the barrier. >> >> Yes and no. >> >> rsp_cons should never be written by the other side, and additionally >> it would be read multiple times anyway. > > But I'm talking about rsp_prod, as that's what rp gets loaded from. Oh, now I get your problem. But shouldn't that better be solved by using READ_ONCE() for reading rp instead? Juergen
On 08.07.2021 07:47, Juergen Gross wrote: > On 17.05.21 17:33, Jan Beulich wrote: >> On 17.05.2021 17:22, Juergen Gross wrote: >>> On 17.05.21 17:12, Jan Beulich wrote: >>>> On 17.05.2021 16:23, Juergen Gross wrote: >>>>> On 17.05.21 16:11, Jan Beulich wrote: >>>>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>>>> again: >>>>>>> rp = rinfo->ring.sring->rsp_prod; >>>>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>>>> + goto err; >>>>>>> + } >>>>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>>>> >>>>>> I think you want to insert after the barrier. >>>>> >>>>> Why? The relevant variable which is checked is "rp". The result of the >>>>> check is in no way depending on the responses themselves. And any change >>>>> of rsp_cons is protected by ring_lock, so there is no possibility of >>>>> reading an old value here. >>>> >>>> But this is a standard double read situation: You might check a value >>>> and then (via a separate read) use a different one past the barrier. >>> >>> Yes and no. >>> >>> rsp_cons should never be written by the other side, and additionally >>> it would be read multiple times anyway. >> >> But I'm talking about rsp_prod, as that's what rp gets loaded from. > > Oh, now I get your problem. > > But shouldn't that better be solved by using READ_ONCE() for reading rp > instead? Not sure - the rmb() is needed anyway aiui, and hence you could as well move your code addition. Jan
On 08.07.21 08:37, Jan Beulich wrote: > On 08.07.2021 07:47, Juergen Gross wrote: >> On 17.05.21 17:33, Jan Beulich wrote: >>> On 17.05.2021 17:22, Juergen Gross wrote: >>>> On 17.05.21 17:12, Jan Beulich wrote: >>>>> On 17.05.2021 16:23, Juergen Gross wrote: >>>>>> On 17.05.21 16:11, Jan Beulich wrote: >>>>>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>>>>> again: >>>>>>>> rp = rinfo->ring.sring->rsp_prod; >>>>>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>>>>> + goto err; >>>>>>>> + } >>>>>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>>>>> >>>>>>> I think you want to insert after the barrier. >>>>>> >>>>>> Why? The relevant variable which is checked is "rp". The result of the >>>>>> check is in no way depending on the responses themselves. And any change >>>>>> of rsp_cons is protected by ring_lock, so there is no possibility of >>>>>> reading an old value here. >>>>> >>>>> But this is a standard double read situation: You might check a value >>>>> and then (via a separate read) use a different one past the barrier. >>>> >>>> Yes and no. >>>> >>>> rsp_cons should never be written by the other side, and additionally >>>> it would be read multiple times anyway. >>> >>> But I'm talking about rsp_prod, as that's what rp gets loaded from. >> >> Oh, now I get your problem. >> >> But shouldn't that better be solved by using READ_ONCE() for reading rp >> instead? > > Not sure - the rmb() is needed anyway aiui, and hence you could as well > move your code addition. Sure. My question was rather: does the rmb() really eliminate the possibility of a double read introduced by the compiler? If yes, moving the code is the correct solution. Juergen
On 08.07.2021 08:40, Juergen Gross wrote: > On 08.07.21 08:37, Jan Beulich wrote: >> On 08.07.2021 07:47, Juergen Gross wrote: >>> On 17.05.21 17:33, Jan Beulich wrote: >>>> On 17.05.2021 17:22, Juergen Gross wrote: >>>>> On 17.05.21 17:12, Jan Beulich wrote: >>>>>> On 17.05.2021 16:23, Juergen Gross wrote: >>>>>>> On 17.05.21 16:11, Jan Beulich wrote: >>>>>>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>>>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>>>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>>>>>> again: >>>>>>>>> rp = rinfo->ring.sring->rsp_prod; >>>>>>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>>>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>>>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>>>>>> + goto err; >>>>>>>>> + } >>>>>>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>>>>>> >>>>>>>> I think you want to insert after the barrier. >>>>>>> >>>>>>> Why? The relevant variable which is checked is "rp". The result of the >>>>>>> check is in no way depending on the responses themselves. And any change >>>>>>> of rsp_cons is protected by ring_lock, so there is no possibility of >>>>>>> reading an old value here. >>>>>> >>>>>> But this is a standard double read situation: You might check a value >>>>>> and then (via a separate read) use a different one past the barrier. >>>>> >>>>> Yes and no. >>>>> >>>>> rsp_cons should never be written by the other side, and additionally >>>>> it would be read multiple times anyway. >>>> >>>> But I'm talking about rsp_prod, as that's what rp gets loaded from. >>> >>> Oh, now I get your problem. >>> >>> But shouldn't that better be solved by using READ_ONCE() for reading rp >>> instead? >> >> Not sure - the rmb() is needed anyway aiui, and hence you could as well >> move your code addition. > > Sure. > > My question was rather: does the rmb() really eliminate the possibility > of a double read introduced by the compiler? If yes, moving the code is > the correct solution. It doesn't eliminate the possibility of a double read, but (leaving aside split accesses) that's not what you care about here. What you need is a single stable value to operate on. No matter how many (non-split) reads the compiler may issue to fill "rp", the final read's value will be used in the subsequent calculation. Or at least that's been my understanding; thinking about it the compiler might issue multiple reads into distinct registers ahead of the barrier, and use different registers for different subsequent operations. While this would look like intentionally inefficient code generation to me, you may indeed want to play safe and use ACCESS_ONCE() _and_ the barrier. I guess there are more places then which would want similar treatment, and it's not a problem that this change introduces ... Jan
On 08.07.21 08:52, Jan Beulich wrote: > On 08.07.2021 08:40, Juergen Gross wrote: >> On 08.07.21 08:37, Jan Beulich wrote: >>> On 08.07.2021 07:47, Juergen Gross wrote: >>>> On 17.05.21 17:33, Jan Beulich wrote: >>>>> On 17.05.2021 17:22, Juergen Gross wrote: >>>>>> On 17.05.21 17:12, Jan Beulich wrote: >>>>>>> On 17.05.2021 16:23, Juergen Gross wrote: >>>>>>>> On 17.05.21 16:11, Jan Beulich wrote: >>>>>>>>> On 13.05.2021 12:02, Juergen Gross wrote: >>>>>>>>>> @@ -1574,10 +1580,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>>>>>>>>> spin_lock_irqsave(&rinfo->ring_lock, flags); >>>>>>>>>> again: >>>>>>>>>> rp = rinfo->ring.sring->rsp_prod; >>>>>>>>>> + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { >>>>>>>>>> + pr_alert("%s: illegal number of responses %u\n", >>>>>>>>>> + info->gd->disk_name, rp - rinfo->ring.rsp_cons); >>>>>>>>>> + goto err; >>>>>>>>>> + } >>>>>>>>>> rmb(); /* Ensure we see queued responses up to 'rp'. */ >>>>>>>>> >>>>>>>>> I think you want to insert after the barrier. >>>>>>>> >>>>>>>> Why? The relevant variable which is checked is "rp". The result of the >>>>>>>> check is in no way depending on the responses themselves. And any change >>>>>>>> of rsp_cons is protected by ring_lock, so there is no possibility of >>>>>>>> reading an old value here. >>>>>>> >>>>>>> But this is a standard double read situation: You might check a value >>>>>>> and then (via a separate read) use a different one past the barrier. >>>>>> >>>>>> Yes and no. >>>>>> >>>>>> rsp_cons should never be written by the other side, and additionally >>>>>> it would be read multiple times anyway. >>>>> >>>>> But I'm talking about rsp_prod, as that's what rp gets loaded from. >>>> >>>> Oh, now I get your problem. >>>> >>>> But shouldn't that better be solved by using READ_ONCE() for reading rp >>>> instead? >>> >>> Not sure - the rmb() is needed anyway aiui, and hence you could as well >>> move your code addition. >> >> Sure. >> >> My question was rather: does the rmb() really eliminate the possibility >> of a double read introduced by the compiler? If yes, moving the code is >> the correct solution. > > It doesn't eliminate the possibility of a double read, but (leaving > aside split accesses) that's not what you care about here. What you > need is a single stable value to operate on. No matter how many > (non-split) reads the compiler may issue to fill "rp", the final > read's value will be used in the subsequent calculation. Or at > least that's been my understanding; thinking about it the compiler > might issue multiple reads into distinct registers ahead of the > barrier, and use different registers for different subsequent > operations. While this would look like intentionally inefficient > code generation to me, you may indeed want to play safe and use > ACCESS_ONCE() _and_ the barrier. I guess there are more places then > which would want similar treatment, and it's not a problem that > this change introduces ... Nevertheless I think I can change it right away. It will also help against load tearing. Juergen
© 2016 - 2024 Red Hat, Inc.