[PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx

Ojaswin Mujoo posted 11 patches 4 months, 3 weeks ago
[PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 4 months, 3 weeks ago
Implement atomic write support to help fuzz atomic writes
with fsx.

Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
---
 ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 110 insertions(+), 5 deletions(-)

diff --git a/ltp/fsx.c b/ltp/fsx.c
index 163b9453..bdb87ca9 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -40,6 +40,7 @@
 #include <liburing.h>
 #endif
 #include <sys/syscall.h>
+#include "statx.h"
 
 #ifndef MAP_FILE
 # define MAP_FILE 0
@@ -49,6 +50,10 @@
 #define RWF_DONTCACHE	0x80
 #endif
 
+#ifndef RWF_ATOMIC
+#define RWF_ATOMIC	0x40
+#endif
+
 #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
 
 /* Operation flags (bitmask) */
@@ -110,6 +115,7 @@ enum {
 	OP_READ_DONTCACHE,
 	OP_WRITE,
 	OP_WRITE_DONTCACHE,
+	OP_WRITE_ATOMIC,
 	OP_MAPREAD,
 	OP_MAPWRITE,
 	OP_MAX_LITE,
@@ -200,6 +206,11 @@ int	uring = 0;
 int	mark_nr = 0;
 int	dontcache_io = 1;
 int	hugepages = 0;                  /* -h flag */
+int	do_atomic_writes = 1;		/* -a flag disables */
+
+/* User for atomic writes */
+int awu_min = 0;
+int awu_max = 0;
 
 /* Stores info needed to periodically collapse hugepages */
 struct hugepages_collapse_info {
@@ -288,6 +299,7 @@ static const char *op_names[] = {
 	[OP_READ_DONTCACHE] = "read_dontcache",
 	[OP_WRITE] = "write",
 	[OP_WRITE_DONTCACHE] = "write_dontcache",
+	[OP_WRITE_ATOMIC] = "write_atomic",
 	[OP_MAPREAD] = "mapread",
 	[OP_MAPWRITE] = "mapwrite",
 	[OP_TRUNCATE] = "truncate",
@@ -422,6 +434,7 @@ logdump(void)
 				prt("\t***RRRR***");
 			break;
 		case OP_WRITE_DONTCACHE:
+		case OP_WRITE_ATOMIC:
 		case OP_WRITE:
 			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
@@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size)
 	file_size = offset + size;
 }
 
+static int is_power_of_2(unsigned n) {
+	return ((n & (n - 1)) == 0);
+}
+
+/*
+ * Round down n to nearest power of 2.
+ * If n is already a power of 2, return n;
+ */
+static int rounddown_pow_of_2(int n) {
+	int i = 0;
+
+	if (is_power_of_2(n))
+		return n;
+
+	for (; (1 << i) < n; i++);
+
+	return 1 << (i - 1);
+}
+
 void
 dowrite(unsigned offset, unsigned size, int flags)
 {
@@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags)
 	offset -= offset % writebdy;
 	if (o_direct)
 		size -= size % writebdy;
+	if (flags & RWF_ATOMIC) {
+		/* atomic write len must be between awu_min and awu_max */
+		if (size < awu_min)
+			size = awu_min;
+		if (size > awu_max)
+			size = awu_max;
+
+		/* atomic writes need power-of-2 sizes */
+		size = rounddown_pow_of_2(size);
+
+		/* atomic writes need naturally aligned offsets */
+		offset -= offset % size;
+
+		/* Skip the write if we are crossing max filesize */
+		if ((offset + size) > maxfilelen) {
+			if (!quiet && testcalls > simulatedopcount)
+				prt("skipping atomic write past maxfilelen\n");
+			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
+			return;
+		}
+	}
 	if (size == 0) {
 		if (!quiet && testcalls > simulatedopcount && !o_direct)
 			prt("skipping zero size write\n");
@@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags)
 		return;
 	}
 
-	log4(OP_WRITE, offset, size, FL_NONE);
+	if (flags & RWF_ATOMIC)
+		log4(OP_WRITE_ATOMIC, offset, size, FL_NONE);
+	else
+		log4(OP_WRITE, offset, size, FL_NONE);
 
 	gendata(original_buf, good_buf, offset, size);
 	if (offset + size > file_size) {
@@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags)
 		       (monitorstart == -1 ||
 			(offset + size > monitorstart &&
 			(monitorend == -1 || offset <= monitorend))))))
-		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
-		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
+		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls,
+		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0,
+		    (flags & RWF_ATOMIC) != 0);
 	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
 	if (iret != size) {
 		if (iret == -1)
@@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
 }
 #endif
 
+int test_atomic_writes(void) {
+	int ret;
+	struct statx stx;
+
+	if (o_direct != O_DIRECT) {
+		fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
+				"disabling!\n");
+		return 0;
+	}
+
+	ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx);
+	if (ret < 0) {
+		fprintf(stderr, "main: Statx failed with %d."
+			" Failed to determine atomic write limits, "
+			" disabling!\n", ret);
+		return 0;
+	}
+
+	if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC &&
+	    stx.stx_atomic_write_unit_min > 0) {
+		awu_min = stx.stx_atomic_write_unit_min;
+		awu_max = stx.stx_atomic_write_unit_max;
+		return 1;
+	}
+
+	fprintf(stderr, "main: IO Stack does not support "
+			"atomic writes, disabling!\n");
+	return 0;
+}
+
 #ifdef HAVE_COPY_FILE_RANGE
 int
 test_copy_range(void)
@@ -2356,6 +2443,12 @@ have_op:
 			goto out;
 		}
 		break;
+	case OP_WRITE_ATOMIC:
+		if (!do_atomic_writes) {
+			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
+			goto out;
+		}
+		break;
 	}
 
 	switch (op) {
@@ -2385,6 +2478,11 @@ have_op:
 			dowrite(offset, size, 0);
 		break;
 
+	case OP_WRITE_ATOMIC:
+		TRIM_OFF_LEN(offset, size, maxfilelen);
+		dowrite(offset, size, RWF_ATOMIC);
+		break;
+
 	case OP_MAPREAD:
 		TRIM_OFF_LEN(offset, size, file_size);
 		domapread(offset, size);
@@ -2511,13 +2609,14 @@ void
 usage(void)
 {
 	fprintf(stdout, "usage: %s",
-		"fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
+		"fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\
 	   [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
 	   [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
 	   [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
 	   [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\
 	   [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\
 	   ... fname\n\
+	-a: disable atomic writes\n\
 	-b opnum: beginning operation number (default 1)\n\
 	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
 	-d: debug output for all operations\n\
@@ -3059,9 +3158,13 @@ main(int argc, char **argv)
 	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
 
 	while ((ch = getopt_long(argc, argv,
-				 "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
+				 "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
 				 longopts, NULL)) != EOF)
 		switch (ch) {
+		case 'a':
+			prt("main(): Atomic writes disabled\n");
+			do_atomic_writes = 0;
+			break;
 		case 'b':
 			simulatedopcount = getnum(optarg, &endp);
 			if (!quiet)
@@ -3475,6 +3578,8 @@ main(int argc, char **argv)
 		exchange_range_calls = test_exchange_range();
 	if (dontcache_io)
 		dontcache_io = test_dontcache_io();
+	if (do_atomic_writes)
+		do_atomic_writes = test_atomic_writes();
 
 	while (keep_running())
 		if (!test())
-- 
2.49.0
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 4 months, 1 week ago
On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> Implement atomic write support to help fuzz atomic writes
> with fsx.
> 
> Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: John Garry <john.g.garry@oracle.com>
> Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> ---

Hmm... this patch causes more regular fsx test cases fail on old kernel,
(e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
to disable "atomic write" automatically if it's not supported by current
system?

Thanks,
Zorro

>  ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 110 insertions(+), 5 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 163b9453..bdb87ca9 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -40,6 +40,7 @@
>  #include <liburing.h>
>  #endif
>  #include <sys/syscall.h>
> +#include "statx.h"
>  
>  #ifndef MAP_FILE
>  # define MAP_FILE 0
> @@ -49,6 +50,10 @@
>  #define RWF_DONTCACHE	0x80
>  #endif
>  
> +#ifndef RWF_ATOMIC
> +#define RWF_ATOMIC	0x40
> +#endif
> +
>  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
>  
>  /* Operation flags (bitmask) */
> @@ -110,6 +115,7 @@ enum {
>  	OP_READ_DONTCACHE,
>  	OP_WRITE,
>  	OP_WRITE_DONTCACHE,
> +	OP_WRITE_ATOMIC,
>  	OP_MAPREAD,
>  	OP_MAPWRITE,
>  	OP_MAX_LITE,
> @@ -200,6 +206,11 @@ int	uring = 0;
>  int	mark_nr = 0;
>  int	dontcache_io = 1;
>  int	hugepages = 0;                  /* -h flag */
> +int	do_atomic_writes = 1;		/* -a flag disables */
> +
> +/* User for atomic writes */
> +int awu_min = 0;
> +int awu_max = 0;
>  
>  /* Stores info needed to periodically collapse hugepages */
>  struct hugepages_collapse_info {
> @@ -288,6 +299,7 @@ static const char *op_names[] = {
>  	[OP_READ_DONTCACHE] = "read_dontcache",
>  	[OP_WRITE] = "write",
>  	[OP_WRITE_DONTCACHE] = "write_dontcache",
> +	[OP_WRITE_ATOMIC] = "write_atomic",
>  	[OP_MAPREAD] = "mapread",
>  	[OP_MAPWRITE] = "mapwrite",
>  	[OP_TRUNCATE] = "truncate",
> @@ -422,6 +434,7 @@ logdump(void)
>  				prt("\t***RRRR***");
>  			break;
>  		case OP_WRITE_DONTCACHE:
> +		case OP_WRITE_ATOMIC:
>  		case OP_WRITE:
>  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> @@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size)
>  	file_size = offset + size;
>  }
>  
> +static int is_power_of_2(unsigned n) {
> +	return ((n & (n - 1)) == 0);
> +}
> +
> +/*
> + * Round down n to nearest power of 2.
> + * If n is already a power of 2, return n;
> + */
> +static int rounddown_pow_of_2(int n) {
> +	int i = 0;
> +
> +	if (is_power_of_2(n))
> +		return n;
> +
> +	for (; (1 << i) < n; i++);
> +
> +	return 1 << (i - 1);
> +}
> +
>  void
>  dowrite(unsigned offset, unsigned size, int flags)
>  {
> @@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags)
>  	offset -= offset % writebdy;
>  	if (o_direct)
>  		size -= size % writebdy;
> +	if (flags & RWF_ATOMIC) {
> +		/* atomic write len must be between awu_min and awu_max */
> +		if (size < awu_min)
> +			size = awu_min;
> +		if (size > awu_max)
> +			size = awu_max;
> +
> +		/* atomic writes need power-of-2 sizes */
> +		size = rounddown_pow_of_2(size);
> +
> +		/* atomic writes need naturally aligned offsets */
> +		offset -= offset % size;
> +
> +		/* Skip the write if we are crossing max filesize */
> +		if ((offset + size) > maxfilelen) {
> +			if (!quiet && testcalls > simulatedopcount)
> +				prt("skipping atomic write past maxfilelen\n");
> +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> +			return;
> +		}
> +	}
>  	if (size == 0) {
>  		if (!quiet && testcalls > simulatedopcount && !o_direct)
>  			prt("skipping zero size write\n");
> @@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags)
>  		return;
>  	}
>  
> -	log4(OP_WRITE, offset, size, FL_NONE);
> +	if (flags & RWF_ATOMIC)
> +		log4(OP_WRITE_ATOMIC, offset, size, FL_NONE);
> +	else
> +		log4(OP_WRITE, offset, size, FL_NONE);
>  
>  	gendata(original_buf, good_buf, offset, size);
>  	if (offset + size > file_size) {
> @@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags)
>  		       (monitorstart == -1 ||
>  			(offset + size > monitorstart &&
>  			(monitorend == -1 || offset <= monitorend))))))
> -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> -		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls,
> +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0,
> +		    (flags & RWF_ATOMIC) != 0);
>  	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
> @@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
>  }
>  #endif
>  
> +int test_atomic_writes(void) {
> +	int ret;
> +	struct statx stx;
> +
> +	if (o_direct != O_DIRECT) {
> +		fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> +				"disabling!\n");
> +		return 0;
> +	}
> +
> +	ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx);
> +	if (ret < 0) {
> +		fprintf(stderr, "main: Statx failed with %d."
> +			" Failed to determine atomic write limits, "
> +			" disabling!\n", ret);
> +		return 0;
> +	}
> +
> +	if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC &&
> +	    stx.stx_atomic_write_unit_min > 0) {
> +		awu_min = stx.stx_atomic_write_unit_min;
> +		awu_max = stx.stx_atomic_write_unit_max;
> +		return 1;
> +	}
> +
> +	fprintf(stderr, "main: IO Stack does not support "
> +			"atomic writes, disabling!\n");
> +	return 0;
> +}
> +
>  #ifdef HAVE_COPY_FILE_RANGE
>  int
>  test_copy_range(void)
> @@ -2356,6 +2443,12 @@ have_op:
>  			goto out;
>  		}
>  		break;
> +	case OP_WRITE_ATOMIC:
> +		if (!do_atomic_writes) {
> +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> +			goto out;
> +		}
> +		break;
>  	}
>  
>  	switch (op) {
> @@ -2385,6 +2478,11 @@ have_op:
>  			dowrite(offset, size, 0);
>  		break;
>  
> +	case OP_WRITE_ATOMIC:
> +		TRIM_OFF_LEN(offset, size, maxfilelen);
> +		dowrite(offset, size, RWF_ATOMIC);
> +		break;
> +
>  	case OP_MAPREAD:
>  		TRIM_OFF_LEN(offset, size, file_size);
>  		domapread(offset, size);
> @@ -2511,13 +2609,14 @@ void
>  usage(void)
>  {
>  	fprintf(stdout, "usage: %s",
> -		"fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
> +		"fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\
>  	   [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
>  	   [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
>  	   [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
>  	   [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\
>  	   [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\
>  	   ... fname\n\
> +	-a: disable atomic writes\n\
>  	-b opnum: beginning operation number (default 1)\n\
>  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
>  	-d: debug output for all operations\n\
> @@ -3059,9 +3158,13 @@ main(int argc, char **argv)
>  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
>  
>  	while ((ch = getopt_long(argc, argv,
> -				 "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> +				 "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
>  				 longopts, NULL)) != EOF)
>  		switch (ch) {
> +		case 'a':
> +			prt("main(): Atomic writes disabled\n");
> +			do_atomic_writes = 0;
> +			break;
>  		case 'b':
>  			simulatedopcount = getnum(optarg, &endp);
>  			if (!quiet)
> @@ -3475,6 +3578,8 @@ main(int argc, char **argv)
>  		exchange_range_calls = test_exchange_range();
>  	if (dontcache_io)
>  		dontcache_io = test_dontcache_io();
> +	if (do_atomic_writes)
> +		do_atomic_writes = test_atomic_writes();
>  
>  	while (keep_running())
>  		if (!test())
> -- 
> 2.49.0
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 4 months, 1 week ago
On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > Implement atomic write support to help fuzz atomic writes
> > with fsx.
> > 
> > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > ---
> 
> Hmm... this patch causes more regular fsx test cases fail on old kernel,
> (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> to disable "atomic write" automatically if it's not supported by current
> system?

Hi Zorro, 
Sorry for being late, I've been on vacation this week.

Yes so by design we should be automatically disabling atomic writes when
they are not supported by the stack but seems like the issue is that
when we do disable it we print some extra messages to stdout/err which
show up in the xfstests output causing failure.

I can think of 2 ways around this:

1. Don't print anything and just silently drop atomic writes if stack
doesn't support them.

2. Make atomic writes as a default off instead of default on feature but
his loses a bit of coverage as existing tests wont get atomic write
testing free of cost any more.

Regards,
ojaswin

> Thanks,
> Zorro
> 
> >  ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 110 insertions(+), 5 deletions(-)
> > 
> > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > index 163b9453..bdb87ca9 100644
> > --- a/ltp/fsx.c
> > +++ b/ltp/fsx.c
> > @@ -40,6 +40,7 @@
> >  #include <liburing.h>
> >  #endif
> >  #include <sys/syscall.h>
> > +#include "statx.h"
> >  
> >  #ifndef MAP_FILE
> >  # define MAP_FILE 0
> > @@ -49,6 +50,10 @@
> >  #define RWF_DONTCACHE	0x80
> >  #endif
> >  
> > +#ifndef RWF_ATOMIC
> > +#define RWF_ATOMIC	0x40
> > +#endif
> > +
> >  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
> >  
> >  /* Operation flags (bitmask) */
> > @@ -110,6 +115,7 @@ enum {
> >  	OP_READ_DONTCACHE,
> >  	OP_WRITE,
> >  	OP_WRITE_DONTCACHE,
> > +	OP_WRITE_ATOMIC,
> >  	OP_MAPREAD,
> >  	OP_MAPWRITE,
> >  	OP_MAX_LITE,
> > @@ -200,6 +206,11 @@ int	uring = 0;
> >  int	mark_nr = 0;
> >  int	dontcache_io = 1;
> >  int	hugepages = 0;                  /* -h flag */
> > +int	do_atomic_writes = 1;		/* -a flag disables */
> > +
> > +/* User for atomic writes */
> > +int awu_min = 0;
> > +int awu_max = 0;
> >  
> >  /* Stores info needed to periodically collapse hugepages */
> >  struct hugepages_collapse_info {
> > @@ -288,6 +299,7 @@ static const char *op_names[] = {
> >  	[OP_READ_DONTCACHE] = "read_dontcache",
> >  	[OP_WRITE] = "write",
> >  	[OP_WRITE_DONTCACHE] = "write_dontcache",
> > +	[OP_WRITE_ATOMIC] = "write_atomic",
> >  	[OP_MAPREAD] = "mapread",
> >  	[OP_MAPWRITE] = "mapwrite",
> >  	[OP_TRUNCATE] = "truncate",
> > @@ -422,6 +434,7 @@ logdump(void)
> >  				prt("\t***RRRR***");
> >  			break;
> >  		case OP_WRITE_DONTCACHE:
> > +		case OP_WRITE_ATOMIC:
> >  		case OP_WRITE:
> >  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
> >  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> > @@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size)
> >  	file_size = offset + size;
> >  }
> >  
> > +static int is_power_of_2(unsigned n) {
> > +	return ((n & (n - 1)) == 0);
> > +}
> > +
> > +/*
> > + * Round down n to nearest power of 2.
> > + * If n is already a power of 2, return n;
> > + */
> > +static int rounddown_pow_of_2(int n) {
> > +	int i = 0;
> > +
> > +	if (is_power_of_2(n))
> > +		return n;
> > +
> > +	for (; (1 << i) < n; i++);
> > +
> > +	return 1 << (i - 1);
> > +}
> > +
> >  void
> >  dowrite(unsigned offset, unsigned size, int flags)
> >  {
> > @@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags)
> >  	offset -= offset % writebdy;
> >  	if (o_direct)
> >  		size -= size % writebdy;
> > +	if (flags & RWF_ATOMIC) {
> > +		/* atomic write len must be between awu_min and awu_max */
> > +		if (size < awu_min)
> > +			size = awu_min;
> > +		if (size > awu_max)
> > +			size = awu_max;
> > +
> > +		/* atomic writes need power-of-2 sizes */
> > +		size = rounddown_pow_of_2(size);
> > +
> > +		/* atomic writes need naturally aligned offsets */
> > +		offset -= offset % size;
> > +
> > +		/* Skip the write if we are crossing max filesize */
> > +		if ((offset + size) > maxfilelen) {
> > +			if (!quiet && testcalls > simulatedopcount)
> > +				prt("skipping atomic write past maxfilelen\n");
> > +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> > +			return;
> > +		}
> > +	}
> >  	if (size == 0) {
> >  		if (!quiet && testcalls > simulatedopcount && !o_direct)
> >  			prt("skipping zero size write\n");
> > @@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags)
> >  		return;
> >  	}
> >  
> > -	log4(OP_WRITE, offset, size, FL_NONE);
> > +	if (flags & RWF_ATOMIC)
> > +		log4(OP_WRITE_ATOMIC, offset, size, FL_NONE);
> > +	else
> > +		log4(OP_WRITE, offset, size, FL_NONE);
> >  
> >  	gendata(original_buf, good_buf, offset, size);
> >  	if (offset + size > file_size) {
> > @@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags)
> >  		       (monitorstart == -1 ||
> >  			(offset + size > monitorstart &&
> >  			(monitorend == -1 || offset <= monitorend))))))
> > -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> > -		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> > +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls,
> > +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0,
> > +		    (flags & RWF_ATOMIC) != 0);
> >  	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
> >  	if (iret != size) {
> >  		if (iret == -1)
> > @@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
> >  }
> >  #endif
> >  
> > +int test_atomic_writes(void) {
> > +	int ret;
> > +	struct statx stx;
> > +
> > +	if (o_direct != O_DIRECT) {
> > +		fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > +				"disabling!\n");
> > +		return 0;
> > +	}
> > +
> > +	ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx);
> > +	if (ret < 0) {
> > +		fprintf(stderr, "main: Statx failed with %d."
> > +			" Failed to determine atomic write limits, "
> > +			" disabling!\n", ret);
> > +		return 0;
> > +	}
> > +
> > +	if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC &&
> > +	    stx.stx_atomic_write_unit_min > 0) {
> > +		awu_min = stx.stx_atomic_write_unit_min;
> > +		awu_max = stx.stx_atomic_write_unit_max;
> > +		return 1;
> > +	}
> > +
> > +	fprintf(stderr, "main: IO Stack does not support "
> > +			"atomic writes, disabling!\n");
> > +	return 0;
> > +}
> > +
> >  #ifdef HAVE_COPY_FILE_RANGE
> >  int
> >  test_copy_range(void)
> > @@ -2356,6 +2443,12 @@ have_op:
> >  			goto out;
> >  		}
> >  		break;
> > +	case OP_WRITE_ATOMIC:
> > +		if (!do_atomic_writes) {
> > +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> > +			goto out;
> > +		}
> > +		break;
> >  	}
> >  
> >  	switch (op) {
> > @@ -2385,6 +2478,11 @@ have_op:
> >  			dowrite(offset, size, 0);
> >  		break;
> >  
> > +	case OP_WRITE_ATOMIC:
> > +		TRIM_OFF_LEN(offset, size, maxfilelen);
> > +		dowrite(offset, size, RWF_ATOMIC);
> > +		break;
> > +
> >  	case OP_MAPREAD:
> >  		TRIM_OFF_LEN(offset, size, file_size);
> >  		domapread(offset, size);
> > @@ -2511,13 +2609,14 @@ void
> >  usage(void)
> >  {
> >  	fprintf(stdout, "usage: %s",
> > -		"fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
> > +		"fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\
> >  	   [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
> >  	   [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
> >  	   [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
> >  	   [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\
> >  	   [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\
> >  	   ... fname\n\
> > +	-a: disable atomic writes\n\
> >  	-b opnum: beginning operation number (default 1)\n\
> >  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
> >  	-d: debug output for all operations\n\
> > @@ -3059,9 +3158,13 @@ main(int argc, char **argv)
> >  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
> >  
> >  	while ((ch = getopt_long(argc, argv,
> > -				 "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> > +				 "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> >  				 longopts, NULL)) != EOF)
> >  		switch (ch) {
> > +		case 'a':
> > +			prt("main(): Atomic writes disabled\n");
> > +			do_atomic_writes = 0;
> > +			break;
> >  		case 'b':
> >  			simulatedopcount = getnum(optarg, &endp);
> >  			if (!quiet)
> > @@ -3475,6 +3578,8 @@ main(int argc, char **argv)
> >  		exchange_range_calls = test_exchange_range();
> >  	if (dontcache_io)
> >  		dontcache_io = test_dontcache_io();
> > +	if (do_atomic_writes)
> > +		do_atomic_writes = test_atomic_writes();
> >  
> >  	while (keep_running())
> >  		if (!test())
> > -- 
> > 2.49.0
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 4 months, 1 week ago
On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > Implement atomic write support to help fuzz atomic writes
> > > with fsx.
> > > 
> > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > ---
> > 
> > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > to disable "atomic write" automatically if it's not supported by current
> > system?
> 
> Hi Zorro, 
> Sorry for being late, I've been on vacation this week.
> 
> Yes so by design we should be automatically disabling atomic writes when
> they are not supported by the stack but seems like the issue is that
> when we do disable it we print some extra messages to stdout/err which
> show up in the xfstests output causing failure.
> 
> I can think of 2 ways around this:
> 
> 1. Don't print anything and just silently drop atomic writes if stack
> doesn't support them.
> 
> 2. Make atomic writes as a default off instead of default on feature but
> his loses a bit of coverage as existing tests wont get atomic write
> testing free of cost any more.

Hi Ojaswin,

Please have a nice vacation :)

It's not the "extra messages" cause failure, those "quiet" failures can be fixed
by:

diff --git a/ltp/fsx.c b/ltp/fsx.c
index bdb87ca90..0a035b37b 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
        struct statx stx;
 
        if (o_direct != O_DIRECT) {
-               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
-                               "disabling!\n");
+               if (!quiet)
+                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
+                                       "disabling!\n");
                return 0;
        }
 
@@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
                return 1;
        }
 
-       fprintf(stderr, "main: IO Stack does not support "
-                       "atomic writes, disabling!\n");
+       if (!quiet)
+               fprintf(stderr, "main: IO Stack does not support "
+                               "atomic writes, disabling!\n");
        return 0;
 }

But I hit more read or write failures e.g. [1], this failure can't be
reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
test failure?

Thanks,
Zorro

[1]
--- /dev/fd/63	2025-09-26 19:20:35.426617312 -0400
+++ generic/263.out.bad	2025-09-26 19:20:35.116617862 -0400
@@ -1,3 +1,337 @@
 QA output created by 263
 fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z
-fsx -N 10000 -o 128000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z
+Seed set to 1
+main: filesystem does not support exchange range, disabling!
+skipping zero length punch hole
+truncating to largest ever: 0x50e00
+fallocating to largest ever: 0x74d54
+fallocating to largest ever: 0x759e7
+zero_range to largest ever: 0x78242
+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
+OFFSET      GOOD    BAD     RANGE
+0x1c000     0x0000  0xcdcd  0x0
+operation# (mod 256) for the bad data may be 205
+0x1c001     0x0000  0xcdcd  0x1
+operation# (mod 256) for the bad data may be 205
+0x1c002     0x0000  0xcdcd  0x2
+operation# (mod 256) for the bad data may be 205
+0x1c003     0x0000  0xcdcd  0x3
+operation# (mod 256) for the bad data may be 205
+0x1c004     0x0000  0xcdcd  0x4
+operation# (mod 256) for the bad data may be 205
+0x1c005     0x0000  0xcdcd  0x5
+operation# (mod 256) for the bad data may be 205
+0x1c006     0x0000  0xcdcd  0x6
+operation# (mod 256) for the bad data may be 205
+0x1c007     0x0000  0xcdcd  0x7
+operation# (mod 256) for the bad data may be 205
+0x1c008     0x0000  0xcdcd  0x8
+operation# (mod 256) for the bad data may be 205
+0x1c009     0x0000  0xcdcd  0x9
+operation# (mod 256) for the bad data may be 205
+0x1c00a     0x0000  0xcdcd  0xa
+operation# (mod 256) for the bad data may be 205
+0x1c00b     0x0000  0xcdcd  0xb
+operation# (mod 256) for the bad data may be 205
+0x1c00c     0x0000  0xcdcd  0xc
+operation# (mod 256) for the bad data may be 205
+0x1c00d     0x0000  0xcdcd  0xd
+operation# (mod 256) for the bad data may be 205
+0x1c00e     0x0000  0xcdcd  0xe
+operation# (mod 256) for the bad data may be 205
+0x1c00f     0x0000  0xcdcd  0xf
+operation# (mod 256) for the bad data may be 205
+LOG DUMP (290 total operations):
+1(  1 mod 256): SKIPPED (no operation)
+2(  2 mod 256): WRITE    0x20400 thru 0x209ff	(0x600 bytes) HOLE	***WWWW
+3(  3 mod 256): MAPWRITE 0x69200 thru 0x6968f	(0x490 bytes)
+4(  4 mod 256): SKIPPED (no operation)
+5(  5 mod 256): SKIPPED (no operation)
+6(  6 mod 256): WRITE    0x57000 thru 0x57fff	(0x1000 bytes)
+7(  7 mod 256): TRUNCATE DOWN	from 0x69690 to 0x50e00
+8(  8 mod 256): WRITE    0x3ac00 thru 0x3b5ff	(0xa00 bytes)
+9(  9 mod 256): SKIPPED (no operation)
+10( 10 mod 256): SKIPPED (no operation)
+11( 11 mod 256): FALLOC   0x13346 thru 0x13898	(0x552 bytes) INTERIOR
+12( 12 mod 256): SKIPPED (no operation)
+13( 13 mod 256): TRUNCATE DOWN	from 0x50e00 to 0x21600
+14( 14 mod 256): SKIPPED (no operation)
+15( 15 mod 256): WRITE    0x4de00 thru 0x4edff	(0x1000 bytes) HOLE
+16( 16 mod 256): MAPREAD  0x34000 thru 0x35b18	(0x1b19 bytes)
+17( 17 mod 256): INSERT 0x18000 thru 0x18fff	(0x1000 bytes)
+18( 18 mod 256): SKIPPED (no operation)
+19( 19 mod 256): ZERO     0x423c thru 0x4d76	(0xb3b bytes)
+20( 20 mod 256): SKIPPED (no operation)
+21( 21 mod 256): SKIPPED (no operation)
+22( 22 mod 256): MAPWRITE 0x15c00 thru 0x16fcc	(0x13cd bytes)
+23( 23 mod 256): COLLAPSE 0x39000 thru 0x39fff	(0x1000 bytes)
+24( 24 mod 256): SKIPPED (no operation)
+25( 25 mod 256): FALLOC   0xc95d thru 0xd6bc	(0xd5f bytes) INTERIOR
+26( 26 mod 256): SKIPPED (no operation)
+27( 27 mod 256): DEDUPE 0x4c000 thru 0x4cfff	(0x1000 bytes) to 0x0 thru 0xfff
+28( 28 mod 256): SKIPPED (no operation)
+29( 29 mod 256): SKIPPED (no operation)
+30( 30 mod 256): WRITE    0x66000 thru 0x66fff	(0x1000 bytes) HOLE
+31( 31 mod 256): SKIPPED (no operation)
+32( 32 mod 256): WRITE    0x66600 thru 0x679ff	(0x1400 bytes) EXTEND
+33( 33 mod 256): FALLOC   0x1eb35 thru 0x1fd32	(0x11fd bytes) INTERIOR
+34( 34 mod 256): PUNCH    0x51265 thru 0x525e0	(0x137c bytes)
+35( 35 mod 256): MAPWRITE 0x63400 thru 0x63c56	(0x857 bytes)
+36( 36 mod 256): SKIPPED (no operation)
+37( 37 mod 256): SKIPPED (no operation)
+38( 38 mod 256): MAPREAD  0x4d000 thru 0x4e8b1	(0x18b2 bytes)
+39( 39 mod 256): ZERO     0x2b418 thru 0x2b96c	(0x555 bytes)
+40( 40 mod 256): READ     0x39000 thru 0x39fff	(0x1000 bytes)
+41( 41 mod 256): READ     0x2b000 thru 0x2bfff	(0x1000 bytes)
+42( 42 mod 256): WRITE    0x29000 thru 0x299ff	(0xa00 bytes)
+43( 43 mod 256): SKIPPED (no operation)
+44( 44 mod 256): SKIPPED (no operation)
+45( 45 mod 256): WRITE    0x17000 thru 0x17fff	(0x1000 bytes)
+46( 46 mod 256): WRITE    0x2f800 thru 0x313ff	(0x1c00 bytes)
+47( 47 mod 256): SKIPPED (no operation)
+48( 48 mod 256): PUNCH    0x4d698 thru 0x4f5b1	(0x1f1a bytes)
+49( 49 mod 256): SKIPPED (no operation)
+50( 50 mod 256): ZERO     0x4e2de thru 0x4fd40	(0x1a63 bytes)
+51( 51 mod 256): WRITE    0x7000 thru 0x7fff	(0x1000 bytes)
+52( 52 mod 256): ZERO     0x20849 thru 0x22574	(0x1d2c bytes)
+53( 53 mod 256): MAPREAD  0x18000 thru 0x180f5	(0xf6 bytes)
+54( 54 mod 256): WRITE    0x3d200 thru 0x3e7ff	(0x1600 bytes)
+55( 55 mod 256): SKIPPED (no operation)
+56( 56 mod 256): READ     0x4a000 thru 0x4afff	(0x1000 bytes)
+57( 57 mod 256): WRITE    0xc000 thru 0xcfff	(0x1000 bytes)
+58( 58 mod 256): WRITE    0x3dc00 thru 0x3f7ff	(0x1c00 bytes)
+59( 59 mod 256): TRUNCATE DOWN	from 0x67a00 to 0x50800
+60( 60 mod 256): TRUNCATE DOWN	from 0x50800 to 0x16a00	******WWWW
+61( 61 mod 256): WRITE    0x2aa00 thru 0x2c7ff	(0x1e00 bytes) HOLE	***WWWW
+62( 62 mod 256): WRITE    0x18000 thru 0x18fff	(0x1000 bytes)
+63( 63 mod 256): PUNCH    0x21b7f thru 0x223db	(0x85d bytes)
+64( 64 mod 256): SKIPPED (no operation)
+65( 65 mod 256): SKIPPED (no operation)
+66( 66 mod 256): ZERO     0x1a7f4 thru 0x1b577	(0xd84 bytes)
+67( 67 mod 256): PUNCH    0x2b961 thru 0x2bc67	(0x307 bytes)
+68( 68 mod 256): READ     0x18000 thru 0x18fff	(0x1000 bytes)
+69( 69 mod 256): WRITE    0x21200 thru 0x229ff	(0x1800 bytes)
+70( 70 mod 256): MAPWRITE 0x22400 thru 0x22b7a	(0x77b bytes)
+71( 71 mod 256): MAPWRITE 0x38e00 thru 0x3a8a6	(0x1aa7 bytes)
+72( 72 mod 256): FALLOC   0x74270 thru 0x74d54	(0xae4 bytes) EXTENDING
+73( 73 mod 256): WRITE    0x12000 thru 0x12fff	(0x1000 bytes)
+74( 74 mod 256): TRUNCATE DOWN	from 0x74d54 to 0x4e000
+75( 75 mod 256): SKIPPED (no operation)
+76( 76 mod 256): TRUNCATE UP	from 0x4e000 to 0x70e00
+77( 77 mod 256): COPY 0x3c000 thru 0x3cfff	(0x1000 bytes) to 0x43600 thru 0x445ff
+78( 78 mod 256): ZERO     0x6323d thru 0x64af5	(0x18b9 bytes)
+79( 79 mod 256): MAPREAD  0x51000 thru 0x5119f	(0x1a0 bytes)
+80( 80 mod 256): MAPREAD  0x6d000 thru 0x6e285	(0x1286 bytes)
+81( 81 mod 256): WRITE    0x9000 thru 0x9fff	(0x1000 bytes)
+82( 82 mod 256): FALLOC   0x19973 thru 0x1a711	(0xd9e bytes) INTERIOR
+83( 83 mod 256): COPY 0x20000 thru 0x20fff	(0x1000 bytes) to 0xe00 thru 0x1dff
+84( 84 mod 256): SKIPPED (no operation)
+85( 85 mod 256): SKIPPED (no operation)
+86( 86 mod 256): COPY 0xc000 thru 0xcfff	(0x1000 bytes) to 0x36e00 thru 0x37dff
+87( 87 mod 256): WRITE    0x18000 thru 0x18fff	(0x1000 bytes)
+88( 88 mod 256): FALLOC   0x57797 thru 0x5818a	(0x9f3 bytes) INTERIOR
+89( 89 mod 256): WRITE    0x70200 thru 0x71bff	(0x1a00 bytes) EXTEND
+90( 90 mod 256): PUNCH    0x22525 thru 0x2408f	(0x1b6b bytes)
+91( 91 mod 256): ZERO     0x710bf thru 0x729b6	(0x18f8 bytes)
+92( 92 mod 256): DEDUPE 0x14000 thru 0x14fff	(0x1000 bytes) to 0x8000 thru 0x8fff
+93( 93 mod 256): WRITE    0x6b800 thru 0x6bbff	(0x400 bytes)
+94( 94 mod 256): ZERO     0x5caa5 thru 0x5e066	(0x15c2 bytes)
+95( 95 mod 256): MAPWRITE 0x3d000 thru 0x3d7ef	(0x7f0 bytes)
+96( 96 mod 256): READ     0x30000 thru 0x30fff	(0x1000 bytes)
+97( 97 mod 256): PUNCH    0x1e513 thru 0x204d1	(0x1fbf bytes)
+98( 98 mod 256): FALLOC   0x67904 thru 0x690c7	(0x17c3 bytes) INTERIOR
+99( 99 mod 256): MAPREAD  0x44000 thru 0x44e08	(0xe09 bytes)
+100(100 mod 256): WRITE    0x1200 thru 0x1dff	(0xc00 bytes)
+101(101 mod 256): ZERO     0x6e649 thru 0x70552	(0x1f0a bytes)
+102(102 mod 256): SKIPPED (no operation)
+103(103 mod 256): SKIPPED (no operation)
+104(104 mod 256): WRITE    0x2b800 thru 0x2bfff	(0x800 bytes)
+105(105 mod 256): READ     0x64000 thru 0x64fff	(0x1000 bytes)
+106(106 mod 256): SKIPPED (no operation)
+107(107 mod 256): SKIPPED (no operation)
+108(108 mod 256): SKIPPED (no operation)
+109(109 mod 256): WRITE    0x6e400 thru 0x6ebff	(0x800 bytes)
+110(110 mod 256): PUNCH    0x504ed thru 0x51d71	(0x1885 bytes)
+111(111 mod 256): MAPWRITE 0x19800 thru 0x1ac57	(0x1458 bytes)
+112(112 mod 256): WRITE    0x51800 thru 0x521ff	(0xa00 bytes)
+113(113 mod 256): ZERO     0x205b9 thru 0x20d56	(0x79e bytes)
+114(114 mod 256): SKIPPED (no operation)
+115(115 mod 256): SKIPPED (no operation)
+116(116 mod 256): COLLAPSE 0x39000 thru 0x39fff	(0x1000 bytes)
+117(117 mod 256): SKIPPED (no operation)
+118(118 mod 256): MAPWRITE 0x74000 thru 0x75217	(0x1218 bytes)
+119(119 mod 256): COPY 0x6000 thru 0x6fff	(0x1000 bytes) to 0x29000 thru 0x29fff
+120(120 mod 256): SKIPPED (no operation)
+121(121 mod 256): COLLAPSE 0x1a000 thru 0x1afff	(0x1000 bytes)
+122(122 mod 256): SKIPPED (no operation)
+123(123 mod 256): WRITE    0x1b600 thru 0x1bfff	(0xa00 bytes)
+124(124 mod 256): INSERT 0x70000 thru 0x70fff	(0x1000 bytes)
+125(125 mod 256): FALLOC   0x21210 thru 0x2293d	(0x172d bytes) INTERIOR
+126(126 mod 256): COPY 0x21000 thru 0x21fff	(0x1000 bytes) to 0x23000 thru 0x23fff
+127(127 mod 256): MAPWRITE 0x33200 thru 0x342a3	(0x10a4 bytes)
+128(128 mod 256): TRUNCATE DOWN	from 0x75218 to 0x49c00
+129(129 mod 256): WRITE    0x11200 thru 0x12bff	(0x1a00 bytes)
+130(130 mod 256): TRUNCATE DOWN	from 0x49c00 to 0x32800
+131(131 mod 256): DEDUPE 0xa000 thru 0xafff	(0x1000 bytes) to 0x2b000 thru 0x2bfff
+132(132 mod 256): SKIPPED (no operation)
+133(133 mod 256): FALLOC   0x56e33 thru 0x57d67	(0xf34 bytes) PAST_EOF
+134(134 mod 256): MAPREAD  0xc000 thru 0xdedd	(0x1ede bytes)
+135(135 mod 256): READ     0x21000 thru 0x21fff	(0x1000 bytes)
+136(136 mod 256): FALLOC   0x34071 thru 0x34b27	(0xab6 bytes) EXTENDING
+137(137 mod 256): ZERO     0x4db33 thru 0x4f0da	(0x15a8 bytes)
+138(138 mod 256): FALLOC   0xf70b thru 0x10eda	(0x17cf bytes) INTERIOR
+139(139 mod 256): PUNCH    0xdba7 thru 0xf1c7	(0x1621 bytes)
+140(140 mod 256): SKIPPED (no operation)
+141(141 mod 256): MAPWRITE 0x25800 thru 0x27422	(0x1c23 bytes)
+142(142 mod 256): READ     0x1f000 thru 0x1ffff	(0x1000 bytes)
+143(143 mod 256): TRUNCATE UP	from 0x34b27 to 0x45e00
+144(144 mod 256): PUNCH    0x3ba91 thru 0x3ccf2	(0x1262 bytes)
+145(145 mod 256): COLLAPSE 0x16000 thru 0x16fff	(0x1000 bytes)
+146(146 mod 256): COLLAPSE 0x17000 thru 0x17fff	(0x1000 bytes)
+147(147 mod 256): WRITE    0x2b200 thru 0x2c7ff	(0x1600 bytes)
+148(148 mod 256): ZERO     0x285f2 thru 0x292fa	(0xd09 bytes)
+149(149 mod 256): CLONE 0x9000 thru 0x9fff	(0x1000 bytes) to 0x2000 thru 0x2fff
+150(150 mod 256): ZERO     0x7030e thru 0x71879	(0x156c bytes)
+151(151 mod 256): SKIPPED (no operation)
+152(152 mod 256): WRITE    0x6000 thru 0x6fff	(0x1000 bytes)
+153(153 mod 256): SKIPPED (no operation)
+154(154 mod 256): FALLOC   0x5669 thru 0x6fcb	(0x1962 bytes) INTERIOR
+155(155 mod 256): WRITE    0x76600 thru 0x76bff	(0x600 bytes) HOLE
+156(156 mod 256): ZERO     0x4c77 thru 0x5f94	(0x131e bytes)
+157(157 mod 256): MAPREAD  0x50000 thru 0x512c3	(0x12c4 bytes)
+158(158 mod 256): DEDUPE 0x5000 thru 0x5fff	(0x1000 bytes) to 0x62000 thru 0x62fff
+159(159 mod 256): COLLAPSE 0x4c000 thru 0x4cfff	(0x1000 bytes)
+160(160 mod 256): FALLOC   0x6dbe1 thru 0x6f58f	(0x19ae bytes) INTERIOR
+161(161 mod 256): TRUNCATE DOWN	from 0x75c00 to 0x4a00	******WWWW
+162(162 mod 256): SKIPPED (no operation)
+163(163 mod 256): ZERO     0x2034f thru 0x21e10	(0x1ac2 bytes)
+164(164 mod 256): MAPREAD  0x1a000 thru 0x1bd44	(0x1d45 bytes)
+165(165 mod 256): PUNCH    0x104d9 thru 0x10d5c	(0x884 bytes)
+166(166 mod 256): TRUNCATE UP	from 0x21e11 to 0x4e200
+167(167 mod 256): WRITE    0x75800 thru 0x759ff	(0x200 bytes) HOLE
+168(168 mod 256): MAPWRITE 0x36c00 thru 0x389ee	(0x1def bytes)
+169(169 mod 256): WRITE    0x6da00 thru 0x6edff	(0x1400 bytes)
+170(170 mod 256): ZERO     0x592e2 thru 0x5a0af	(0xdce bytes)
+171(171 mod 256): SKIPPED (no operation)
+172(172 mod 256): SKIPPED (no operation)
+173(173 mod 256): SKIPPED (no operation)
+174(174 mod 256): WRITE    0x30000 thru 0x30fff	(0x1000 bytes)
+175(175 mod 256): SKIPPED (no operation)
+176(176 mod 256): ZERO     0x41a29 thru 0x42580	(0xb58 bytes)
+177(177 mod 256): SKIPPED (no operation)
+178(178 mod 256): SKIPPED (no operation)
+179(179 mod 256): MAPWRITE 0x47600 thru 0x4901a	(0x1a1b bytes)
+180(180 mod 256): ZERO     0x6acd0 thru 0x6b5f3	(0x924 bytes)
+181(181 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes)
+182(182 mod 256): SKIPPED (no operation)
+183(183 mod 256): SKIPPED (no operation)
+184(184 mod 256): MAPREAD  0x38000 thru 0x3854c	(0x54d bytes)
+185(185 mod 256): READ     0x7000 thru 0x7fff	(0x1000 bytes)
+186(186 mod 256): SKIPPED (no operation)
+187(187 mod 256): SKIPPED (no operation)
+188(188 mod 256): MAPREAD  0x18000 thru 0x1805b	(0x5c bytes)
+189(189 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x49800
+190(190 mod 256): ZERO     0x51bfe thru 0x5369f	(0x1aa2 bytes)
+191(191 mod 256): FALLOC   0x74f96 thru 0x759e7	(0xa51 bytes) EXTENDING
+192(192 mod 256): WRITE    0x35000 thru 0x35fff	(0x1000 bytes)
+193(193 mod 256): WRITE    0x45000 thru 0x451ff	(0x200 bytes)
+194(194 mod 256): SKIPPED (no operation)
+195(195 mod 256): MAPREAD  0x57000 thru 0x58600	(0x1601 bytes)
+196(196 mod 256): SKIPPED (no operation)
+197(197 mod 256): FALLOC   0x50aa4 thru 0x5181a	(0xd76 bytes) INTERIOR
+198(198 mod 256): WRITE    0x2ec00 thru 0x307ff	(0x1c00 bytes)
+199(199 mod 256): TRUNCATE DOWN	from 0x759e7 to 0x32600
+200(200 mod 256): DEDUPE 0x1b000 thru 0x1bfff	(0x1000 bytes) to 0x18000 thru 0x18fff
+201(201 mod 256): WRITE    0x38200 thru 0x389ff	(0x800 bytes) HOLE
+202(202 mod 256): READ     0x30000 thru 0x30fff	(0x1000 bytes)
+203(203 mod 256): WRITE    0x3dc00 thru 0x3f5ff	(0x1a00 bytes) HOLE
+204(204 mod 256): SKIPPED (no operation)
+205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
+206(206 mod 256): WRITE    0x46800 thru 0x475ff	(0xe00 bytes) HOLE
+207(207 mod 256): SKIPPED (no operation)
+208(208 mod 256): MAPREAD  0x1000 thru 0x1658	(0x659 bytes)
+209(209 mod 256): SKIPPED (no operation)
+210(210 mod 256): SKIPPED (no operation)
+211(211 mod 256): TRUNCATE DOWN	from 0x47600 to 0x22200
+212(212 mod 256): SKIPPED (no operation)
+213(213 mod 256): FALLOC   0x69700 thru 0x6b675	(0x1f75 bytes) EXTENDING
+214(214 mod 256): TRUNCATE DOWN	from 0x6b675 to 0x3f400
+215(215 mod 256): READ     0x24000 thru 0x24fff	(0x1000 bytes)
+216(216 mod 256): WRITE    0xea00 thru 0x105ff	(0x1c00 bytes)
+217(217 mod 256): FALLOC   0xc67b thru 0xde4b	(0x17d0 bytes) INTERIOR
+218(218 mod 256): SKIPPED (no operation)
+219(219 mod 256): MAPWRITE 0x3e200 thru 0x3f9fe	(0x17ff bytes)
+220(220 mod 256): TRUNCATE DOWN	from 0x3f9ff to 0xe200	******WWWW
+221(221 mod 256): PUNCH    0x7e84 thru 0x92c8	(0x1445 bytes)
+222(222 mod 256): FALLOC   0x1e61b thru 0x1e747	(0x12c bytes) EXTENDING
+223(223 mod 256): INSERT 0xd000 thru 0xdfff	(0x1000 bytes)
+224(224 mod 256): TRUNCATE DOWN	from 0x1f747 to 0xe000	******WWWW
+225(225 mod 256): CLONE 0xa000 thru 0xafff	(0x1000 bytes) to 0x2f000 thru 0x2ffff
+226(226 mod 256): CLONE 0x26000 thru 0x26fff	(0x1000 bytes) to 0x1b000 thru 0x1bfff
+227(227 mod 256): SKIPPED (no operation)
+228(228 mod 256): ZERO     0x2c6f7 thru 0x2e4be	(0x1dc8 bytes)
+229(229 mod 256): WRITE    0x6d400 thru 0x6ddff	(0xa00 bytes) HOLE
+230(230 mod 256): ZERO     0x1873 thru 0x1c7a	(0x408 bytes)
+231(231 mod 256): MAPWRITE 0x44400 thru 0x447d5	(0x3d6 bytes)
+232(232 mod 256): WRITE    0x4b000 thru 0x4bfff	(0x1000 bytes)
+233(233 mod 256): ZERO     0x6ed0 thru 0x82c8	(0x13f9 bytes)
+234(234 mod 256): SKIPPED (no operation)
+235(235 mod 256): TRUNCATE DOWN	from 0x6de00 to 0x4c600
+236(236 mod 256): WRITE    0x19a00 thru 0x1a3ff	(0xa00 bytes)
+237(237 mod 256): READ     0x38000 thru 0x38fff	(0x1000 bytes)
+238(238 mod 256): TRUNCATE UP	from 0x4c600 to 0x52a00
+239(239 mod 256): WRITE    0x77a00 thru 0x78dff	(0x1400 bytes) HOLE
+240(240 mod 256): TRUNCATE DOWN	from 0x78e00 to 0x5f600
+241(241 mod 256): PUNCH    0x30dcd thru 0x32305	(0x1539 bytes)
+242(242 mod 256): SKIPPED (no operation)
+243(243 mod 256): ZERO     0x76374 thru 0x78241	(0x1ece bytes)
+244(244 mod 256): SKIPPED (no operation)
+245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
+246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
+247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
+248(248 mod 256): SKIPPED (no operation)
+249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
+250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
+251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
+252(252 mod 256): SKIPPED (no operation)
+253(253 mod 256): SKIPPED (no operation)
+254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
+255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
+256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
+257(  1 mod 256): SKIPPED (no operation)
+258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
+259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
+260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
+261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
+262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
+263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
+264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
+265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
+266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
+267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
+268( 12 mod 256): SKIPPED (no operation)
+269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
+270( 14 mod 256): SKIPPED (no operation)
+271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
+272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
+273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
+274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
+275( 19 mod 256): SKIPPED (no operation)
+276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
+277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
+278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
+279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
+280( 24 mod 256): SKIPPED (no operation)
+281( 25 mod 256): SKIPPED (no operation)
+282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
+283( 27 mod 256): SKIPPED (no operation)
+284( 28 mod 256): SKIPPED (no operation)
+285( 29 mod 256): SKIPPED (no operation)
+286( 30 mod 256): SKIPPED (no operation)
+287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
+288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
+289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
+290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
+Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
+Correct content saved for comparison
+(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")

Thanks,
Zorro

> 
> Regards,
> ojaswin
> 
> > Thanks,
> > Zorro
> > 
> > >  ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++---
> > >  1 file changed, 110 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > index 163b9453..bdb87ca9 100644
> > > --- a/ltp/fsx.c
> > > +++ b/ltp/fsx.c
> > > @@ -40,6 +40,7 @@
> > >  #include <liburing.h>
> > >  #endif
> > >  #include <sys/syscall.h>
> > > +#include "statx.h"
> > >  
> > >  #ifndef MAP_FILE
> > >  # define MAP_FILE 0
> > > @@ -49,6 +50,10 @@
> > >  #define RWF_DONTCACHE	0x80
> > >  #endif
> > >  
> > > +#ifndef RWF_ATOMIC
> > > +#define RWF_ATOMIC	0x40
> > > +#endif
> > > +
> > >  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
> > >  
> > >  /* Operation flags (bitmask) */
> > > @@ -110,6 +115,7 @@ enum {
> > >  	OP_READ_DONTCACHE,
> > >  	OP_WRITE,
> > >  	OP_WRITE_DONTCACHE,
> > > +	OP_WRITE_ATOMIC,
> > >  	OP_MAPREAD,
> > >  	OP_MAPWRITE,
> > >  	OP_MAX_LITE,
> > > @@ -200,6 +206,11 @@ int	uring = 0;
> > >  int	mark_nr = 0;
> > >  int	dontcache_io = 1;
> > >  int	hugepages = 0;                  /* -h flag */
> > > +int	do_atomic_writes = 1;		/* -a flag disables */
> > > +
> > > +/* User for atomic writes */
> > > +int awu_min = 0;
> > > +int awu_max = 0;
> > >  
> > >  /* Stores info needed to periodically collapse hugepages */
> > >  struct hugepages_collapse_info {
> > > @@ -288,6 +299,7 @@ static const char *op_names[] = {
> > >  	[OP_READ_DONTCACHE] = "read_dontcache",
> > >  	[OP_WRITE] = "write",
> > >  	[OP_WRITE_DONTCACHE] = "write_dontcache",
> > > +	[OP_WRITE_ATOMIC] = "write_atomic",
> > >  	[OP_MAPREAD] = "mapread",
> > >  	[OP_MAPWRITE] = "mapwrite",
> > >  	[OP_TRUNCATE] = "truncate",
> > > @@ -422,6 +434,7 @@ logdump(void)
> > >  				prt("\t***RRRR***");
> > >  			break;
> > >  		case OP_WRITE_DONTCACHE:
> > > +		case OP_WRITE_ATOMIC:
> > >  		case OP_WRITE:
> > >  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
> > >  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> > > @@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size)
> > >  	file_size = offset + size;
> > >  }
> > >  
> > > +static int is_power_of_2(unsigned n) {
> > > +	return ((n & (n - 1)) == 0);
> > > +}
> > > +
> > > +/*
> > > + * Round down n to nearest power of 2.
> > > + * If n is already a power of 2, return n;
> > > + */
> > > +static int rounddown_pow_of_2(int n) {
> > > +	int i = 0;
> > > +
> > > +	if (is_power_of_2(n))
> > > +		return n;
> > > +
> > > +	for (; (1 << i) < n; i++);
> > > +
> > > +	return 1 << (i - 1);
> > > +}
> > > +
> > >  void
> > >  dowrite(unsigned offset, unsigned size, int flags)
> > >  {
> > > @@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags)
> > >  	offset -= offset % writebdy;
> > >  	if (o_direct)
> > >  		size -= size % writebdy;
> > > +	if (flags & RWF_ATOMIC) {
> > > +		/* atomic write len must be between awu_min and awu_max */
> > > +		if (size < awu_min)
> > > +			size = awu_min;
> > > +		if (size > awu_max)
> > > +			size = awu_max;
> > > +
> > > +		/* atomic writes need power-of-2 sizes */
> > > +		size = rounddown_pow_of_2(size);
> > > +
> > > +		/* atomic writes need naturally aligned offsets */
> > > +		offset -= offset % size;
> > > +
> > > +		/* Skip the write if we are crossing max filesize */
> > > +		if ((offset + size) > maxfilelen) {
> > > +			if (!quiet && testcalls > simulatedopcount)
> > > +				prt("skipping atomic write past maxfilelen\n");
> > > +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> > > +			return;
> > > +		}
> > > +	}
> > >  	if (size == 0) {
> > >  		if (!quiet && testcalls > simulatedopcount && !o_direct)
> > >  			prt("skipping zero size write\n");
> > > @@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags)
> > >  		return;
> > >  	}
> > >  
> > > -	log4(OP_WRITE, offset, size, FL_NONE);
> > > +	if (flags & RWF_ATOMIC)
> > > +		log4(OP_WRITE_ATOMIC, offset, size, FL_NONE);
> > > +	else
> > > +		log4(OP_WRITE, offset, size, FL_NONE);
> > >  
> > >  	gendata(original_buf, good_buf, offset, size);
> > >  	if (offset + size > file_size) {
> > > @@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags)
> > >  		       (monitorstart == -1 ||
> > >  			(offset + size > monitorstart &&
> > >  			(monitorend == -1 || offset <= monitorend))))))
> > > -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> > > -		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> > > +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls,
> > > +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0,
> > > +		    (flags & RWF_ATOMIC) != 0);
> > >  	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
> > >  	if (iret != size) {
> > >  		if (iret == -1)
> > > @@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
> > >  }
> > >  #endif
> > >  
> > > +int test_atomic_writes(void) {
> > > +	int ret;
> > > +	struct statx stx;
> > > +
> > > +	if (o_direct != O_DIRECT) {
> > > +		fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > +				"disabling!\n");
> > > +		return 0;
> > > +	}
> > > +
> > > +	ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx);
> > > +	if (ret < 0) {
> > > +		fprintf(stderr, "main: Statx failed with %d."
> > > +			" Failed to determine atomic write limits, "
> > > +			" disabling!\n", ret);
> > > +		return 0;
> > > +	}
> > > +
> > > +	if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC &&
> > > +	    stx.stx_atomic_write_unit_min > 0) {
> > > +		awu_min = stx.stx_atomic_write_unit_min;
> > > +		awu_max = stx.stx_atomic_write_unit_max;
> > > +		return 1;
> > > +	}
> > > +
> > > +	fprintf(stderr, "main: IO Stack does not support "
> > > +			"atomic writes, disabling!\n");
> > > +	return 0;
> > > +}
> > > +
> > >  #ifdef HAVE_COPY_FILE_RANGE
> > >  int
> > >  test_copy_range(void)
> > > @@ -2356,6 +2443,12 @@ have_op:
> > >  			goto out;
> > >  		}
> > >  		break;
> > > +	case OP_WRITE_ATOMIC:
> > > +		if (!do_atomic_writes) {
> > > +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> > > +			goto out;
> > > +		}
> > > +		break;
> > >  	}
> > >  
> > >  	switch (op) {
> > > @@ -2385,6 +2478,11 @@ have_op:
> > >  			dowrite(offset, size, 0);
> > >  		break;
> > >  
> > > +	case OP_WRITE_ATOMIC:
> > > +		TRIM_OFF_LEN(offset, size, maxfilelen);
> > > +		dowrite(offset, size, RWF_ATOMIC);
> > > +		break;
> > > +
> > >  	case OP_MAPREAD:
> > >  		TRIM_OFF_LEN(offset, size, file_size);
> > >  		domapread(offset, size);
> > > @@ -2511,13 +2609,14 @@ void
> > >  usage(void)
> > >  {
> > >  	fprintf(stdout, "usage: %s",
> > > -		"fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
> > > +		"fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\
> > >  	   [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
> > >  	   [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
> > >  	   [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
> > >  	   [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\
> > >  	   [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\
> > >  	   ... fname\n\
> > > +	-a: disable atomic writes\n\
> > >  	-b opnum: beginning operation number (default 1)\n\
> > >  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
> > >  	-d: debug output for all operations\n\
> > > @@ -3059,9 +3158,13 @@ main(int argc, char **argv)
> > >  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
> > >  
> > >  	while ((ch = getopt_long(argc, argv,
> > > -				 "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> > > +				 "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> > >  				 longopts, NULL)) != EOF)
> > >  		switch (ch) {
> > > +		case 'a':
> > > +			prt("main(): Atomic writes disabled\n");
> > > +			do_atomic_writes = 0;
> > > +			break;
> > >  		case 'b':
> > >  			simulatedopcount = getnum(optarg, &endp);
> > >  			if (!quiet)
> > > @@ -3475,6 +3578,8 @@ main(int argc, char **argv)
> > >  		exchange_range_calls = test_exchange_range();
> > >  	if (dontcache_io)
> > >  		dontcache_io = test_dontcache_io();
> > > +	if (do_atomic_writes)
> > > +		do_atomic_writes = test_atomic_writes();
> > >  
> > >  	while (keep_running())
> > >  		if (!test())
> > > -- 
> > > 2.49.0
> > > 
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 4 months ago
On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > Implement atomic write support to help fuzz atomic writes
> > > > with fsx.
> > > > 
> > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > ---
> > > 
> > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > to disable "atomic write" automatically if it's not supported by current
> > > system?
> > 
> > Hi Zorro, 
> > Sorry for being late, I've been on vacation this week.
> > 
> > Yes so by design we should be automatically disabling atomic writes when
> > they are not supported by the stack but seems like the issue is that
> > when we do disable it we print some extra messages to stdout/err which
> > show up in the xfstests output causing failure.
> > 
> > I can think of 2 ways around this:
> > 
> > 1. Don't print anything and just silently drop atomic writes if stack
> > doesn't support them.
> > 
> > 2. Make atomic writes as a default off instead of default on feature but
> > his loses a bit of coverage as existing tests wont get atomic write
> > testing free of cost any more.
> 
> Hi Ojaswin,
> 
> Please have a nice vacation :)
> 
> It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> by:

Oh okay got it.

> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index bdb87ca90..0a035b37b 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
>         struct statx stx;
>  
>         if (o_direct != O_DIRECT) {
> -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> -                               "disabling!\n");
> +               if (!quiet)
> +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> +                                       "disabling!\n");
>                 return 0;
>         }
>  
> @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
>                 return 1;
>         }
>  
> -       fprintf(stderr, "main: IO Stack does not support "
> -                       "atomic writes, disabling!\n");
> +       if (!quiet)
> +               fprintf(stderr, "main: IO Stack does not support "
> +                               "atomic writes, disabling!\n");
>         return 0;
>  }

> 
> But I hit more read or write failures e.g. [1], this failure can't be
> reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> test failure?
> 
> Thanks,
> Zorro
> 

<...>

> +244(244 mod 256): SKIPPED (no operation)
> +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> +248(248 mod 256): SKIPPED (no operation)
> +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> +252(252 mod 256): SKIPPED (no operation)
> +253(253 mod 256): SKIPPED (no operation)
> +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> +257(  1 mod 256): SKIPPED (no operation)
> +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> +268( 12 mod 256): SKIPPED (no operation)
> +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> +270( 14 mod 256): SKIPPED (no operation)
> +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> +275( 19 mod 256): SKIPPED (no operation)
> +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> +280( 24 mod 256): SKIPPED (no operation)
> +281( 25 mod 256): SKIPPED (no operation)
> +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> +283( 27 mod 256): SKIPPED (no operation)
> +284( 28 mod 256): SKIPPED (no operation)
> +285( 29 mod 256): SKIPPED (no operation)
> +286( 30 mod 256): SKIPPED (no operation)
> +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> +Correct content saved for comparison
> +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> 
> Thanks,
> Zorro

Hi Zorro, just to confirm is this on an older kernel that doesnt support
RWF_ATOMIC or on a kernle that does support it.

Regards,
ojaswin
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 4 months ago
On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > Implement atomic write support to help fuzz atomic writes
> > > > > with fsx.
> > > > > 
> > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > ---
> > > > 
> > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > to disable "atomic write" automatically if it's not supported by current
> > > > system?
> > > 
> > > Hi Zorro, 
> > > Sorry for being late, I've been on vacation this week.
> > > 
> > > Yes so by design we should be automatically disabling atomic writes when
> > > they are not supported by the stack but seems like the issue is that
> > > when we do disable it we print some extra messages to stdout/err which
> > > show up in the xfstests output causing failure.
> > > 
> > > I can think of 2 ways around this:
> > > 
> > > 1. Don't print anything and just silently drop atomic writes if stack
> > > doesn't support them.
> > > 
> > > 2. Make atomic writes as a default off instead of default on feature but
> > > his loses a bit of coverage as existing tests wont get atomic write
> > > testing free of cost any more.
> > 
> > Hi Ojaswin,
> > 
> > Please have a nice vacation :)
> > 
> > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > by:
> 
> Oh okay got it.
> 
> > 
> > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > index bdb87ca90..0a035b37b 100644
> > --- a/ltp/fsx.c
> > +++ b/ltp/fsx.c
> > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> >         struct statx stx;
> >  
> >         if (o_direct != O_DIRECT) {
> > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > -                               "disabling!\n");
> > +               if (!quiet)
> > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > +                                       "disabling!\n");
> >                 return 0;
> >         }
> >  
> > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> >                 return 1;
> >         }
> >  
> > -       fprintf(stderr, "main: IO Stack does not support "
> > -                       "atomic writes, disabling!\n");
> > +       if (!quiet)
> > +               fprintf(stderr, "main: IO Stack does not support "
> > +                               "atomic writes, disabling!\n");
> >         return 0;
> >  }
> 
> > 
> > But I hit more read or write failures e.g. [1], this failure can't be
> > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > test failure?
> > 
> > Thanks,
> > Zorro
> > 
> 
> <...>
> 
> > +244(244 mod 256): SKIPPED (no operation)
> > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > +248(248 mod 256): SKIPPED (no operation)
> > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > +252(252 mod 256): SKIPPED (no operation)
> > +253(253 mod 256): SKIPPED (no operation)
> > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > +257(  1 mod 256): SKIPPED (no operation)
> > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > +268( 12 mod 256): SKIPPED (no operation)
> > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > +270( 14 mod 256): SKIPPED (no operation)
> > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > +275( 19 mod 256): SKIPPED (no operation)
> > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > +280( 24 mod 256): SKIPPED (no operation)
> > +281( 25 mod 256): SKIPPED (no operation)
> > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > +283( 27 mod 256): SKIPPED (no operation)
> > +284( 28 mod 256): SKIPPED (no operation)
> > +285( 29 mod 256): SKIPPED (no operation)
> > +286( 30 mod 256): SKIPPED (no operation)
> > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > +Correct content saved for comparison
> > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > 
> > Thanks,
> > Zorro
> 
> Hi Zorro, just to confirm is this on an older kernel that doesnt support
> RWF_ATOMIC or on a kernle that does support it.

I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
About the RWF_ATOMIC flag in my system:

# grep -rsn RWF_ATOMIC /usr/include/
/usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
/usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
/usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
/usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
/usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)

Thanks,
Zorro

> 
> Regards,
> ojaswin
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 4 months ago
On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > with fsx.
> > > > > > 
> > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > ---
> > > > > 
> > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > system?
> > > > 
> > > > Hi Zorro, 
> > > > Sorry for being late, I've been on vacation this week.
> > > > 
> > > > Yes so by design we should be automatically disabling atomic writes when
> > > > they are not supported by the stack but seems like the issue is that
> > > > when we do disable it we print some extra messages to stdout/err which
> > > > show up in the xfstests output causing failure.
> > > > 
> > > > I can think of 2 ways around this:
> > > > 
> > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > doesn't support them.
> > > > 
> > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > testing free of cost any more.
> > > 
> > > Hi Ojaswin,
> > > 
> > > Please have a nice vacation :)
> > > 
> > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > by:
> > 
> > Oh okay got it.
> > 
> > > 
> > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > index bdb87ca90..0a035b37b 100644
> > > --- a/ltp/fsx.c
> > > +++ b/ltp/fsx.c
> > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > >         struct statx stx;
> > >  
> > >         if (o_direct != O_DIRECT) {
> > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > -                               "disabling!\n");
> > > +               if (!quiet)
> > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > +                                       "disabling!\n");
> > >                 return 0;
> > >         }
> > >  
> > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > >                 return 1;
> > >         }
> > >  
> > > -       fprintf(stderr, "main: IO Stack does not support "
> > > -                       "atomic writes, disabling!\n");
> > > +       if (!quiet)
> > > +               fprintf(stderr, "main: IO Stack does not support "
> > > +                               "atomic writes, disabling!\n");
> > >         return 0;
> > >  }
> > 
> > > 
> > > But I hit more read or write failures e.g. [1], this failure can't be
> > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > test failure?
> > > 
> > > Thanks,
> > > Zorro
> > > 
> > 
> > <...>
> > 
> > > +244(244 mod 256): SKIPPED (no operation)
> > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > +248(248 mod 256): SKIPPED (no operation)
> > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > +252(252 mod 256): SKIPPED (no operation)
> > > +253(253 mod 256): SKIPPED (no operation)
> > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > +257(  1 mod 256): SKIPPED (no operation)
> > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > +268( 12 mod 256): SKIPPED (no operation)
> > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > +270( 14 mod 256): SKIPPED (no operation)
> > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > +275( 19 mod 256): SKIPPED (no operation)
> > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > +280( 24 mod 256): SKIPPED (no operation)
> > > +281( 25 mod 256): SKIPPED (no operation)
> > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > +283( 27 mod 256): SKIPPED (no operation)
> > > +284( 28 mod 256): SKIPPED (no operation)
> > > +285( 29 mod 256): SKIPPED (no operation)
> > > +286( 30 mod 256): SKIPPED (no operation)
> > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > +Correct content saved for comparison
> > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > 
> > > Thanks,
> > > Zorro
> > 
> > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > RWF_ATOMIC or on a kernle that does support it.
> 
> I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> About the RWF_ATOMIC flag in my system:
> 
> # grep -rsn RWF_ATOMIC /usr/include/
> /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)

Hi Zorro, thanks for checking this. So correct me if im wrong but I
understand that you have run this test on an atomic writes enabled 
kernel where the stack also supports atomic writes.

Looking at the bad data log:

	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
	+OFFSET      GOOD    BAD     RANGE
	+0x1c000     0x0000  0xcdcd  0x0
	+operation# (mod 256) for the bad data may be 205

We see that 0x0000 was expected but we got 0xcdcd. Now the operation
that caused this is indicated to be 205, but looking at that operation:

+205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)

This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
Infact, it does seem like an unlikely coincidence that the actual data
in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
to default (fsx writes random data in even offsets and operation num in
odd).

I am able to replicate this but only on XFS but not on ext4 (atleast not
in 20 runs).  I'm trying to better understand if this is a test issue or
not. Will keep you update.

I'm not sure how this will affect the upcoming release, if you want
shall I send a small patch to make the atomic writes feature default off
instead of default on till we root cause this?

Regards,
Ojaswin

> 
> Thanks,
> Zorro
> 
> > 
> > Regards,
> > ojaswin
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by John Garry 3 months, 2 weeks ago
On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> Hi Zorro, thanks for checking this. So correct me if im wrong but I
> understand that you have run this test on an atomic writes enabled
> kernel where the stack also supports atomic writes.
> 
> Looking at the bad data log:
> 
> 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> 	+OFFSET      GOOD    BAD     RANGE
> 	+0x1c000     0x0000  0xcdcd  0x0
> 	+operation# (mod 256) for the bad data may be 205
> 
> We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> that caused this is indicated to be 205, but looking at that operation:
> 
> +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> 
> This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> Infact, it does seem like an unlikely coincidence that the actual data
> in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> to default (fsx writes random data in even offsets and operation num in
> odd).
> 
> I am able to replicate this but only on XFS but not on ext4 (atleast not
> in 20 runs).  I'm trying to better understand if this is a test issue or
> not. Will keep you update.


Hi Ojaswin,

Sorry for the very slow response.

Are you still checking this issue?

To replicate, should I just take latest xfs kernel and run this series 
on top of latest xfstests? Is it 100% reproducible?

Thanks,
John
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 3 months, 2 weeks ago
On Mon, Oct 20, 2025 at 11:33:40AM +0100, John Garry wrote:
> On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > understand that you have run this test on an atomic writes enabled
> > kernel where the stack also supports atomic writes.
> > 
> > Looking at the bad data log:
> > 
> > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > 	+OFFSET      GOOD    BAD     RANGE
> > 	+0x1c000     0x0000  0xcdcd  0x0
> > 	+operation# (mod 256) for the bad data may be 205
> > 
> > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > that caused this is indicated to be 205, but looking at that operation:
> > 
> > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > 
> > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > Infact, it does seem like an unlikely coincidence that the actual data
> > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > to default (fsx writes random data in even offsets and operation num in
> > odd).
> > 
> > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > in 20 runs).  I'm trying to better understand if this is a test issue or
> > not. Will keep you update.
> 
> 
> Hi Ojaswin,
> 
> Sorry for the very slow response.
> 
> Are you still checking this issue?
> 
> To replicate, should I just take latest xfs kernel and run this series on
> top of latest xfstests? Is it 100% reproducible?
> 
> Thanks,
> John

Hi John,

Yes Im looking into it but I'm now starting to run into some reflink/cow
based concepts that are taking time to understand. Let me share what I
have till now:

So the test.sh that I'm using can be found here [1] which just uses an
fsx replay file (which replays all operations) present in the same repo
[2]. If you see the replay file, there are a bunch of random operations
followed by the last 2 commented out operations:

# copy_range 0xd000 0x1000 0x1d800 0x44000   <--- # operations <start> <len> <dest of copy> <filesize (can be ignored)>
# mapread 0x1e000 0x1000 0x1e400 *

The copy_range here is the one which causes (or exposes) the corruption
at 0x1e800 (the end of copy range destination gets corrupted).

To have more control, I commented these 2 operations and am doing it by
hand in the test.sh file, with xfs_io. I'm also using a non atomic write
device so we only have S/W fallback.

Now some observations:

1. The copy_range operations is actually copying from a hole to a hole,
so we should be reading all 0s. But What I see is the following happening:

  vfs_copy_file_range
   do_splice_direct
    do_splice_direct_actor
     do_splice_read
       # Adds the folio at src offset to the pipe. I confirmed this is all 0x0.
     splice_direct_to_actor
      direct_splice_actor
       do_splice_from
        iter_file_splice_write
         xfs_file_write_iter
          xfs_file_buffered_write
           iomap_file_buferred_write
            iomap_iter
             xfs_buferred_write_iomap_begin
               # Here we correctly see that there is noting at the
               # destination in data fork, but somehow we find a mapped
               # extent in cow fork which is returned to iomap.
             iomap_write_iter
              __iomap_write_begin
                # Here we notice folio is not uptodate and call
                # iomap_read_folio_range() to read from the cow_fork
                # mapping we found earlier. This results in folio having
                # incorrect data at 0x1e800 offset.

 So it seems like the fsx operations might be corrupting the cow fork state
 somehow leading to stale data exposure. 

2. If we disable atomic writes we dont hit the issue.

3. If I do a -c pread of the destination range before doing the
copy_range operation then I don't see the corruption any more.

I'm now trying to figure out why the mapping returned is not IOMAP_HOLE
as it should be. I don't know the COW path in xfs so there are some gaps
in my understanding. Let me know if you need any other information since
I'm reliably able to replicate on 6.17.0-rc4.

[1]
https://github.com/OjaswinM/fsx-aw-issue/tree/master

[2] https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops

regards,
ojaswin
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Brian Foster 3 months, 2 weeks ago
On Tue, Oct 21, 2025 at 03:58:23PM +0530, Ojaswin Mujoo wrote:
> On Mon, Oct 20, 2025 at 11:33:40AM +0100, John Garry wrote:
> > On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > understand that you have run this test on an atomic writes enabled
> > > kernel where the stack also supports atomic writes.
> > > 
> > > Looking at the bad data log:
> > > 
> > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > 	+OFFSET      GOOD    BAD     RANGE
> > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > 	+operation# (mod 256) for the bad data may be 205
> > > 
> > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > that caused this is indicated to be 205, but looking at that operation:
> > > 
> > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > 
> > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > Infact, it does seem like an unlikely coincidence that the actual data
> > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > to default (fsx writes random data in even offsets and operation num in
> > > odd).
> > > 
> > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > not. Will keep you update.
> > 
> > 
> > Hi Ojaswin,
> > 
> > Sorry for the very slow response.
> > 
> > Are you still checking this issue?
> > 
> > To replicate, should I just take latest xfs kernel and run this series on
> > top of latest xfstests? Is it 100% reproducible?
> > 
> > Thanks,
> > John
> 
> Hi John,
> 
> Yes Im looking into it but I'm now starting to run into some reflink/cow
> based concepts that are taking time to understand. Let me share what I
> have till now:
> 
> So the test.sh that I'm using can be found here [1] which just uses an
> fsx replay file (which replays all operations) present in the same repo
> [2]. If you see the replay file, there are a bunch of random operations
> followed by the last 2 commented out operations:
> 
> # copy_range 0xd000 0x1000 0x1d800 0x44000   <--- # operations <start> <len> <dest of copy> <filesize (can be ignored)>
> # mapread 0x1e000 0x1000 0x1e400 *
> 
> The copy_range here is the one which causes (or exposes) the corruption
> at 0x1e800 (the end of copy range destination gets corrupted).
> 
> To have more control, I commented these 2 operations and am doing it by
> hand in the test.sh file, with xfs_io. I'm also using a non atomic write
> device so we only have S/W fallback.
> 
> Now some observations:
> 
> 1. The copy_range operations is actually copying from a hole to a hole,
> so we should be reading all 0s. But What I see is the following happening:
> 
>   vfs_copy_file_range
>    do_splice_direct
>     do_splice_direct_actor
>      do_splice_read
>        # Adds the folio at src offset to the pipe. I confirmed this is all 0x0.
>      splice_direct_to_actor
>       direct_splice_actor
>        do_splice_from
>         iter_file_splice_write
>          xfs_file_write_iter
>           xfs_file_buffered_write
>            iomap_file_buferred_write
>             iomap_iter
>              xfs_buferred_write_iomap_begin
>                # Here we correctly see that there is noting at the
>                # destination in data fork, but somehow we find a mapped
>                # extent in cow fork which is returned to iomap.
>              iomap_write_iter
>               __iomap_write_begin
>                 # Here we notice folio is not uptodate and call
>                 # iomap_read_folio_range() to read from the cow_fork
>                 # mapping we found earlier. This results in folio having
>                 # incorrect data at 0x1e800 offset.
> 
>  So it seems like the fsx operations might be corrupting the cow fork state
>  somehow leading to stale data exposure. 
> 
> 2. If we disable atomic writes we dont hit the issue.
> 
> 3. If I do a -c pread of the destination range before doing the
> copy_range operation then I don't see the corruption any more.
> 
> I'm now trying to figure out why the mapping returned is not IOMAP_HOLE
> as it should be. I don't know the COW path in xfs so there are some gaps
> in my understanding. Let me know if you need any other information since
> I'm reliably able to replicate on 6.17.0-rc4.
> 

I haven't followed your issue closely, but just on this hole vs. COW
thing, XFS has a bit of a quirk where speculative COW fork preallocation
can expand out over holes in the data fork. If iomap lookup for buffered
write sees COW fork blocks present, it reports those blocks as the
primary mapping even if the data fork happens to be a hole (since
there's no point in allocating blocks to the data fork when we can just
remap).

Again I've no idea if this relates to your issue or what you're
referring to as a hole (i.e. data fork only?), but just pointing it out.
The latest iomap/xfs patches I posted a few days ago kind of dance
around this a bit, but I was somewhat hoping that maybe the cleanups
there would trigger some thoughts on better iomap reporting in that
regard.

Brian

> [1]
> https://github.com/OjaswinM/fsx-aw-issue/tree/master
> 
> [2] https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops
> 
> regards,
> ojaswin
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 3 months, 2 weeks ago
On Tue, Oct 21, 2025 at 07:30:32AM -0400, Brian Foster wrote:
> On Tue, Oct 21, 2025 at 03:58:23PM +0530, Ojaswin Mujoo wrote:
> > On Mon, Oct 20, 2025 at 11:33:40AM +0100, John Garry wrote:
> > > On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> > > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > > understand that you have run this test on an atomic writes enabled
> > > > kernel where the stack also supports atomic writes.
> > > > 
> > > > Looking at the bad data log:
> > > > 
> > > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > > 	+OFFSET      GOOD    BAD     RANGE
> > > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > > 	+operation# (mod 256) for the bad data may be 205
> > > > 
> > > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > > that caused this is indicated to be 205, but looking at that operation:
> > > > 
> > > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > > 
> > > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > > Infact, it does seem like an unlikely coincidence that the actual data
> > > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > > to default (fsx writes random data in even offsets and operation num in
> > > > odd).
> > > > 
> > > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > > not. Will keep you update.
> > > 
> > > 
> > > Hi Ojaswin,
> > > 
> > > Sorry for the very slow response.
> > > 
> > > Are you still checking this issue?
> > > 
> > > To replicate, should I just take latest xfs kernel and run this series on
> > > top of latest xfstests? Is it 100% reproducible?
> > > 
> > > Thanks,
> > > John
> > 
> > Hi John,
> > 
> > Yes Im looking into it but I'm now starting to run into some reflink/cow
> > based concepts that are taking time to understand. Let me share what I
> > have till now:
> > 
> > So the test.sh that I'm using can be found here [1] which just uses an
> > fsx replay file (which replays all operations) present in the same repo
> > [2]. If you see the replay file, there are a bunch of random operations
> > followed by the last 2 commented out operations:
> > 
> > # copy_range 0xd000 0x1000 0x1d800 0x44000   <--- # operations <start> <len> <dest of copy> <filesize (can be ignored)>
> > # mapread 0x1e000 0x1000 0x1e400 *
> > 
> > The copy_range here is the one which causes (or exposes) the corruption
> > at 0x1e800 (the end of copy range destination gets corrupted).
> > 
> > To have more control, I commented these 2 operations and am doing it by
> > hand in the test.sh file, with xfs_io. I'm also using a non atomic write
> > device so we only have S/W fallback.
> > 
> > Now some observations:
> > 
> > 1. The copy_range operations is actually copying from a hole to a hole,
> > so we should be reading all 0s. But What I see is the following happening:
> > 
> >   vfs_copy_file_range
> >    do_splice_direct
> >     do_splice_direct_actor
> >      do_splice_read
> >        # Adds the folio at src offset to the pipe. I confirmed this is all 0x0.
> >      splice_direct_to_actor
> >       direct_splice_actor
> >        do_splice_from
> >         iter_file_splice_write
> >          xfs_file_write_iter
> >           xfs_file_buffered_write
> >            iomap_file_buferred_write
> >             iomap_iter
> >              xfs_buferred_write_iomap_begin
> >                # Here we correctly see that there is noting at the
> >                # destination in data fork, but somehow we find a mapped
> >                # extent in cow fork which is returned to iomap.
> >              iomap_write_iter
> >               __iomap_write_begin
> >                 # Here we notice folio is not uptodate and call
> >                 # iomap_read_folio_range() to read from the cow_fork
> >                 # mapping we found earlier. This results in folio having
> >                 # incorrect data at 0x1e800 offset.
> > 
> >  So it seems like the fsx operations might be corrupting the cow fork state
> >  somehow leading to stale data exposure. 
> > 
> > 2. If we disable atomic writes we dont hit the issue.
> > 
> > 3. If I do a -c pread of the destination range before doing the
> > copy_range operation then I don't see the corruption any more.
> > 
> > I'm now trying to figure out why the mapping returned is not IOMAP_HOLE
> > as it should be. I don't know the COW path in xfs so there are some gaps
> > in my understanding. Let me know if you need any other information since
> > I'm reliably able to replicate on 6.17.0-rc4.
> > 
> 
> I haven't followed your issue closely, but just on this hole vs. COW
> thing, XFS has a bit of a quirk where speculative COW fork preallocation
> can expand out over holes in the data fork. If iomap lookup for buffered
> write sees COW fork blocks present, it reports those blocks as the
> primary mapping even if the data fork happens to be a hole (since
> there's no point in allocating blocks to the data fork when we can just
> remap).
> 
> Again I've no idea if this relates to your issue or what you're
> referring to as a hole (i.e. data fork only?), but just pointing it out.
> The latest iomap/xfs patches I posted a few days ago kind of dance
> around this a bit, but I was somewhat hoping that maybe the cleanups
> there would trigger some thoughts on better iomap reporting in that
> regard.

Hi Brian, Thanks for the details and yes by "hole" i did mean hole in
data fork only. The part that I'm now confused about is does this sort
of preallocation extent hold any valid data? IIUC it should not, so I
would expect it to trigger iomap_block_needs_zeroing() to write zeroes
to the folio. Instead, what I see in the issue is that we are trying to
do disk read.

Regards,
ojaswin
> 
> Brian

> 
> > [1]
> > https://github.com/OjaswinM/fsx-aw-issue/tree/master
> > 
> > [2] https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops
> > 
> > regards,
> > ojaswin
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Darrick J. Wong 3 months, 2 weeks ago
On Tue, Oct 21, 2025 at 05:28:32PM +0530, Ojaswin Mujoo wrote:
> On Tue, Oct 21, 2025 at 07:30:32AM -0400, Brian Foster wrote:
> > On Tue, Oct 21, 2025 at 03:58:23PM +0530, Ojaswin Mujoo wrote:
> > > On Mon, Oct 20, 2025 at 11:33:40AM +0100, John Garry wrote:
> > > > On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> > > > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > > > understand that you have run this test on an atomic writes enabled
> > > > > kernel where the stack also supports atomic writes.
> > > > > 
> > > > > Looking at the bad data log:
> > > > > 
> > > > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > > > 	+OFFSET      GOOD    BAD     RANGE
> > > > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > > > 	+operation# (mod 256) for the bad data may be 205
> > > > > 
> > > > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > > > that caused this is indicated to be 205, but looking at that operation:
> > > > > 
> > > > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > > > 
> > > > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > > > Infact, it does seem like an unlikely coincidence that the actual data
> > > > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > > > to default (fsx writes random data in even offsets and operation num in
> > > > > odd).
> > > > > 
> > > > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > > > not. Will keep you update.
> > > > 
> > > > 
> > > > Hi Ojaswin,
> > > > 
> > > > Sorry for the very slow response.
> > > > 
> > > > Are you still checking this issue?
> > > > 
> > > > To replicate, should I just take latest xfs kernel and run this series on
> > > > top of latest xfstests? Is it 100% reproducible?
> > > > 
> > > > Thanks,
> > > > John
> > > 
> > > Hi John,
> > > 
> > > Yes Im looking into it but I'm now starting to run into some reflink/cow
> > > based concepts that are taking time to understand. Let me share what I
> > > have till now:
> > > 
> > > So the test.sh that I'm using can be found here [1] which just uses an
> > > fsx replay file (which replays all operations) present in the same repo
> > > [2]. If you see the replay file, there are a bunch of random operations
> > > followed by the last 2 commented out operations:
> > > 
> > > # copy_range 0xd000 0x1000 0x1d800 0x44000   <--- # operations <start> <len> <dest of copy> <filesize (can be ignored)>
> > > # mapread 0x1e000 0x1000 0x1e400 *
> > > 
> > > The copy_range here is the one which causes (or exposes) the corruption
> > > at 0x1e800 (the end of copy range destination gets corrupted).
> > > 
> > > To have more control, I commented these 2 operations and am doing it by
> > > hand in the test.sh file, with xfs_io. I'm also using a non atomic write
> > > device so we only have S/W fallback.
> > > 
> > > Now some observations:
> > > 
> > > 1. The copy_range operations is actually copying from a hole to a hole,
> > > so we should be reading all 0s. But What I see is the following happening:
> > > 
> > >   vfs_copy_file_range
> > >    do_splice_direct
> > >     do_splice_direct_actor
> > >      do_splice_read
> > >        # Adds the folio at src offset to the pipe. I confirmed this is all 0x0.
> > >      splice_direct_to_actor
> > >       direct_splice_actor
> > >        do_splice_from
> > >         iter_file_splice_write
> > >          xfs_file_write_iter
> > >           xfs_file_buffered_write
> > >            iomap_file_buferred_write
> > >             iomap_iter
> > >              xfs_buferred_write_iomap_begin
> > >                # Here we correctly see that there is noting at the
> > >                # destination in data fork, but somehow we find a mapped
> > >                # extent in cow fork which is returned to iomap.
> > >              iomap_write_iter
> > >               __iomap_write_begin
> > >                 # Here we notice folio is not uptodate and call
> > >                 # iomap_read_folio_range() to read from the cow_fork
> > >                 # mapping we found earlier. This results in folio having
> > >                 # incorrect data at 0x1e800 offset.
> > > 
> > >  So it seems like the fsx operations might be corrupting the cow fork state
> > >  somehow leading to stale data exposure. 
> > > 
> > > 2. If we disable atomic writes we dont hit the issue.
> > > 
> > > 3. If I do a -c pread of the destination range before doing the
> > > copy_range operation then I don't see the corruption any more.

Yeah, I stopped seeing failures after adding -X (verify data after every
operation) to FSX_AVOID.

> > > I'm now trying to figure out why the mapping returned is not IOMAP_HOLE
> > > as it should be. I don't know the COW path in xfs so there are some gaps
> > > in my understanding. Let me know if you need any other information since
> > > I'm reliably able to replicate on 6.17.0-rc4.
> > > 
> > 
> > I haven't followed your issue closely, but just on this hole vs. COW
> > thing, XFS has a bit of a quirk where speculative COW fork preallocation
> > can expand out over holes in the data fork. If iomap lookup for buffered
> > write sees COW fork blocks present, it reports those blocks as the
> > primary mapping even if the data fork happens to be a hole (since
> > there's no point in allocating blocks to the data fork when we can just
> > remap).

That sounds like a bug -- if a sub-fsblock write to an uncached file
range has to read data in from disk, then xfs needs to pass the data
fork mapping to iomap even if it's a read.

Can you capture the ftrace output of the iomap_iter_*map tracepoints?

> > Again I've no idea if this relates to your issue or what you're
> > referring to as a hole (i.e. data fork only?), but just pointing it out.
> > The latest iomap/xfs patches I posted a few days ago kind of dance
> > around this a bit, but I was somewhat hoping that maybe the cleanups
> > there would trigger some thoughts on better iomap reporting in that
> > regard.
> 
> Hi Brian, Thanks for the details and yes by "hole" i did mean hole in
> data fork only. The part that I'm now confused about is does this sort
> of preallocation extent hold any valid data? IIUC it should not, so I

No.  Mappings in the cow fork are not fully written and should never be
used for reads.

> would expect it to trigger iomap_block_needs_zeroing() to write zeroes
> to the folio. Instead, what I see in the issue is that we are trying to
> do disk read.

Hrm.  Part of the problem here might be that iomap_read_folio_range
ignores iomap_iter::srcmap if it's type IOMAP_HOLE (see
iomap_iter_srcmap), even if the filesystem actually *set* the srcmap to
a hole.

FWIW I see a somewhat different failure -- not data corruption, but
pwrite returning failure:

--- /run/fstests/bin/tests/generic/521.out      2025-07-15 14:45:15.100315255 -0700
+++ /var/tmp/fstests/generic/521.out.bad        2025-10-21 10:33:39.032263811 -0700
@@ -1,2 +1,668 @@
 QA output created by 521
+dowrite: write: Input/output error
+LOG DUMP (661 total operations):
+1(  1 mod 256): TRUNCATE UP    from 0x0 to 0x1d000
+2(  2 mod 256): DEDUPE 0x19000 thru 0x1bfff    (0x3000 bytes) to 0x13000 thru 0x15fff
+3(  3 mod 256): SKIPPED (no operation)
+4(  4 mod 256): PUNCH    0x5167 thru 0x12d1c   (0xdbb6 bytes)
+5(  5 mod 256): WRITE    0x79000 thru 0x86fff  (0xe000 bytes) HOLE
+6(  6 mod 256): PUNCH    0x32344 thru 0x36faf  (0x4c6c bytes)
+7(  7 mod 256): READ     0x0 thru 0xfff        (0x1000 bytes)
+8(  8 mod 256): WRITE    0xe000 thru 0x11fff   (0x4000 bytes)
+9(  9 mod 256): PUNCH    0x71324 thru 0x86fff  (0x15cdc bytes)
+10( 10 mod 256): MAPREAD  0x5b000 thru 0x6d218 (0x12219 bytes)
+11( 11 mod 256): COLLAPSE 0x70000 thru 0x79fff (0xa000 bytes)
+12( 12 mod 256): WRITE    0x41000 thru 0x50fff (0x10000 bytes)
+13( 13 mod 256): INSERT 0x39000 thru 0x4dfff   (0x15000 bytes)
+14( 14 mod 256): WRITE    0x34000 thru 0x37fff (0x4000 bytes)
+15( 15 mod 256): MAPREAD  0x55000 thru 0x6ee44 (0x19e45 bytes)
+16( 16 mod 256): READ     0x46000 thru 0x55fff (0x10000 bytes)
+17( 17 mod 256): PUNCH    0x1ccea thru 0x23b2e (0x6e45 bytes)
+18( 18 mod 256): COPY 0x2a000 thru 0x35fff     (0xc000 bytes) to 0x52000 thru 0x5dfff
+19( 19 mod 256): SKIPPED (no operation)
+20( 20 mod 256): WRITE    0x10000 thru 0x1ffff (0x10000 bytes)
<snip>
+645(133 mod 256): READ     0x5000 thru 0x16fff (0x12000 bytes)
+646(134 mod 256): PUNCH    0x3a51d thru 0x41978        (0x745c bytes)
+647(135 mod 256): FALLOC   0x47f4c thru 0x54867        (0xc91b bytes) INTERIOR
+648(136 mod 256): WRITE    0xa000 thru 0x1dfff (0x14000 bytes)
+649(137 mod 256): CLONE 0x83000 thru 0x89fff   (0x7000 bytes) to 0x4b000 thru 0x51fff
+650(138 mod 256): TRUNCATE DOWN        from 0x8bac4 to 0x7e000
+651(139 mod 256): MAPWRITE 0x13000 thru 0x170e6        (0x40e7 bytes)
+652(140 mod 256): XCHG 0x6a000 thru 0x7cfff    (0x13000 bytes) to 0x8000 thru 0x1afff
+653(141 mod 256): XCHG 0x35000 thru 0x3cfff    (0x8000 bytes) to 0x1b000 thru 0x22fff
+654(142 mod 256): CLONE 0x47000 thru 0x60fff   (0x1a000 bytes) to 0x65000 thru 0x7efff
+655(143 mod 256): DEDUPE 0x79000 thru 0x7dfff  (0x5000 bytes) to 0x6e000 thru 0x72fff
+656(144 mod 256): XCHG 0x4d000 thru 0x5ffff    (0x13000 bytes) to 0x8000 thru 0x1afff
+657(145 mod 256): PUNCH    0x7194f thru 0x7efff        (0xd6b1 bytes)
+658(146 mod 256): PUNCH    0x7af7e thru 0x7efff        (0x4082 bytes)
+659(147 mod 256): MAPREAD  0x77000 thru 0x7e55d        (0x755e bytes)
+660(148 mod 256): READ     0x58000 thru 0x64fff        (0xd000 bytes)
+661(149 mod 256): WRITE    0x88000 thru 0x8bfff        (0x4000 bytes) HOLE
+Log of operations saved to "/mnt/junk.fsxops"; replay with --replay-ops
+Correct content saved for comparison
+(maybe hexdump "/mnt/junk" vs "/mnt/junk.fsxgood")

Curiously there are no EIO errors logged in dmesg.

--D

> Regards,
> ojaswin
> > 
> > Brian
> 
> > 
> > > [1]
> > > https://github.com/OjaswinM/fsx-aw-issue/tree/master
> > > 
> > > [2] https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops
> > > 
> > > regards,
> > > ojaswin
> > > 
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 3 months, 2 weeks ago
On Tue, Oct 21, 2025 at 10:44:06AM -0700, Darrick J. Wong wrote:
> On Tue, Oct 21, 2025 at 05:28:32PM +0530, Ojaswin Mujoo wrote:
> > On Tue, Oct 21, 2025 at 07:30:32AM -0400, Brian Foster wrote:
> > > On Tue, Oct 21, 2025 at 03:58:23PM +0530, Ojaswin Mujoo wrote:
> > > > On Mon, Oct 20, 2025 at 11:33:40AM +0100, John Garry wrote:
> > > > > On 06/10/2025 14:20, Ojaswin Mujoo wrote:
> > > > > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > > > > understand that you have run this test on an atomic writes enabled
> > > > > > kernel where the stack also supports atomic writes.
> > > > > > 
> > > > > > Looking at the bad data log:
> > > > > > 
> > > > > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > > > > 	+OFFSET      GOOD    BAD     RANGE
> > > > > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > > > > 	+operation# (mod 256) for the bad data may be 205
> > > > > > 
> > > > > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > > > > that caused this is indicated to be 205, but looking at that operation:
> > > > > > 
> > > > > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > > > > 
> > > > > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > > > > Infact, it does seem like an unlikely coincidence that the actual data
> > > > > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > > > > to default (fsx writes random data in even offsets and operation num in
> > > > > > odd).
> > > > > > 
> > > > > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > > > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > > > > not. Will keep you update.
> > > > > 
> > > > > 
> > > > > Hi Ojaswin,
> > > > > 
> > > > > Sorry for the very slow response.
> > > > > 
> > > > > Are you still checking this issue?
> > > > > 
> > > > > To replicate, should I just take latest xfs kernel and run this series on
> > > > > top of latest xfstests? Is it 100% reproducible?
> > > > > 
> > > > > Thanks,
> > > > > John
> > > > 
> > > > Hi John,
> > > > 
> > > > Yes Im looking into it but I'm now starting to run into some reflink/cow
> > > > based concepts that are taking time to understand. Let me share what I
> > > > have till now:
> > > > 
> > > > So the test.sh that I'm using can be found here [1] which just uses an
> > > > fsx replay file (which replays all operations) present in the same repo
> > > > [2]. If you see the replay file, there are a bunch of random operations
> > > > followed by the last 2 commented out operations:
> > > > 
> > > > # copy_range 0xd000 0x1000 0x1d800 0x44000   <--- # operations <start> <len> <dest of copy> <filesize (can be ignored)>
> > > > # mapread 0x1e000 0x1000 0x1e400 *
> > > > 
> > > > The copy_range here is the one which causes (or exposes) the corruption
> > > > at 0x1e800 (the end of copy range destination gets corrupted).
> > > > 
> > > > To have more control, I commented these 2 operations and am doing it by
> > > > hand in the test.sh file, with xfs_io. I'm also using a non atomic write
> > > > device so we only have S/W fallback.
> > > > 
> > > > Now some observations:
> > > > 
> > > > 1. The copy_range operations is actually copying from a hole to a hole,
> > > > so we should be reading all 0s. But What I see is the following happening:
> > > > 
> > > >   vfs_copy_file_range
> > > >    do_splice_direct
> > > >     do_splice_direct_actor
> > > >      do_splice_read
> > > >        # Adds the folio at src offset to the pipe. I confirmed this is all 0x0.
> > > >      splice_direct_to_actor
> > > >       direct_splice_actor
> > > >        do_splice_from
> > > >         iter_file_splice_write
> > > >          xfs_file_write_iter
> > > >           xfs_file_buffered_write
> > > >            iomap_file_buferred_write
> > > >             iomap_iter
> > > >              xfs_buferred_write_iomap_begin
> > > >                # Here we correctly see that there is noting at the
> > > >                # destination in data fork, but somehow we find a mapped
> > > >                # extent in cow fork which is returned to iomap.
> > > >              iomap_write_iter
> > > >               __iomap_write_begin
> > > >                 # Here we notice folio is not uptodate and call
> > > >                 # iomap_read_folio_range() to read from the cow_fork
> > > >                 # mapping we found earlier. This results in folio having
> > > >                 # incorrect data at 0x1e800 offset.
> > > > 
> > > >  So it seems like the fsx operations might be corrupting the cow fork state
> > > >  somehow leading to stale data exposure. 
> > > > 
> > > > 2. If we disable atomic writes we dont hit the issue.
> > > > 
> > > > 3. If I do a -c pread of the destination range before doing the
> > > > copy_range operation then I don't see the corruption any more.
> 
> Yeah, I stopped seeing failures after adding -X (verify data after every
> operation) to FSX_AVOID.
> 
> > > > I'm now trying to figure out why the mapping returned is not IOMAP_HOLE
> > > > as it should be. I don't know the COW path in xfs so there are some gaps
> > > > in my understanding. Let me know if you need any other information since
> > > > I'm reliably able to replicate on 6.17.0-rc4.
> > > > 
> > > 
> > > I haven't followed your issue closely, but just on this hole vs. COW
> > > thing, XFS has a bit of a quirk where speculative COW fork preallocation
> > > can expand out over holes in the data fork. If iomap lookup for buffered
> > > write sees COW fork blocks present, it reports those blocks as the
> > > primary mapping even if the data fork happens to be a hole (since
> > > there's no point in allocating blocks to the data fork when we can just
> > > remap).
> 
> That sounds like a bug -- if a sub-fsblock write to an uncached file
> range has to read data in from disk, then xfs needs to pass the data
> fork mapping to iomap even if it's a read.
> 
> Can you capture the ftrace output of the iomap_iter_*map tracepoints?

Hey Darrick, below are the details:

Command:
	sudo ./xfstests-dev/ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI -g B --replay-ops prep.fsxops /mnt/test/junk
	sudo perf record -e 'iomap:iomap_iter_*map' xfs_io -c 'copy_range -s 0xd000 -d 0x1d800 -l 0x1000 /mnt/test/junk' -c 'pread -v 0x1e7f0 0x20' /mnt/test/junk

	(prep.fsxops is here: https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops)

stdout:
	Seed set to 1
	main: filesystem does not support exchange range, disabling!
	truncating to largest ever: 0x50e00
	truncating to largest ever: 0x70e00
	All 136 operations completed A-OK!

	0001e7f0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0001e800:  42 42 42 42 42 42 42 42 42 42 42 42 42 42 42 42  BBBBBBBBBBBBBBBB
	read 16/16 bytes at offset 124927
	16.000000 bytes, 1 ops; 0.0000 sec (339.674 KiB/sec and 21739.1304 ops/sec)
	[ perf record: Woken up 1 times to write data ]
	[ perf record: Captured and wrote 0.003 MB perf.data (1 samples) ]

perf output:
	xfs_io     981 [000]   254.331537: iomap:iomap_iter_dstmap: dev 254:32 ino 0x83 bdev 254:32 addr 0xb8000 offset 0x1d000 length 0x23000 type MAPPED (0x2) flags DIRTY|SHARED (0x6)


perf output if I call -c pread before -c copy_range:

	xfs_io    1098 [000]   911.542373: iomap:iomap_iter_dstmap: dev 254:32 ino 0x83 bdev 254:32 addr 0xffffffffffffffff offset 0x1e000 length 0x1000 type HOLE (0x0) flags DIRTY (0x2)
	xfs_io    1098 [000]   911.542776: iomap:iomap_iter_dstmap: dev 254:32 ino 0x83 bdev 254:32 addr 0xb8000 offset 0x1d000 length 0x23000 type MAPPED (0x2) flags DIRTY|SHARED (0x6)

	(The first event is from -c pread)


Fiemap before and after output incase it helps:

	$ xfs_io -c 'fiemap -v' -c 'copy_range -s 0xd000 -d 0x1d800 -l 0x1000 /mnt/test/junk' -c 'pread -v 0x1e7f0 0x20' -c 'fiemap -v' /mnt/test/junk

	/mnt/test/junk:
	 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
		 0: [0..7]:          408..415             8   0x0
		 1: [8..31]:         520..543            24   0x0
		 2: [32..167]:       hole               136
		 3: [168..175]:      1408..1415           8   0x0
		 4: [176..367]:      hole               192
		 5: [368..375]:      1608..1615           8   0x0
		 6: [376..535]:      hole               160
		 7: [536..543]:      1048..1055           8   0x1
	0001e7f0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
	0001e800:  42 42 42 42 42 42 42 42 42 42 42 42 42 42 42 42  BBBBBBBBBBBBBBBB
	read 32/32 bytes at offset 124912
	32.000000 bytes, 1 ops; 0.0000 sec (325.521 KiB/sec and 10416.6667 ops/sec)
	/mnt/test/junk:
	 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
		 0: [0..7]:          408..415             8   0x0
		 1: [8..31]:         520..543            24   0x0
		 2: [32..167]:       hole               136
		 3: [168..175]:      1408..1415           8   0x0
		 4: [176..231]:      hole                56
		 5: [232..247]:      1472..1487          16   0x0
		 6: [248..367]:      hole               120
		 7: [368..375]:      1608..1615           8   0x0
		 8: [376..535]:      hole               160
		 9: [536..543]:      1048..1055           8   0x1

> 
> > > Again I've no idea if this relates to your issue or what you're
> > > referring to as a hole (i.e. data fork only?), but just pointing it out.
> > > The latest iomap/xfs patches I posted a few days ago kind of dance
> > > around this a bit, but I was somewhat hoping that maybe the cleanups
> > > there would trigger some thoughts on better iomap reporting in that
> > > regard.
> > 
> > Hi Brian, Thanks for the details and yes by "hole" i did mean hole in
> > data fork only. The part that I'm now confused about is does this sort
> > of preallocation extent hold any valid data? IIUC it should not, so I
> 
> No.  Mappings in the cow fork are not fully written and should never be
> used for reads.
> 
> > would expect it to trigger iomap_block_needs_zeroing() to write zeroes
> > to the folio. Instead, what I see in the issue is that we are trying to
> > do disk read.
> 
> Hrm.  Part of the problem here might be that iomap_read_folio_range
> ignores iomap_iter::srcmap if it's type IOMAP_HOLE (see
> iomap_iter_srcmap), even if the filesystem actually *set* the srcmap to
> a hole.
> 
> FWIW I see a somewhat different failure -- not data corruption, but
> pwrite returning failure:

> 
> --- /run/fstests/bin/tests/generic/521.out      2025-07-15 14:45:15.100315255 -0700
> +++ /var/tmp/fstests/generic/521.out.bad        2025-10-21 10:33:39.032263811 -0700
> @@ -1,2 +1,668 @@
>  QA output created by 521
> +dowrite: write: Input/output error

Hmm that's strange. Can you try running the above command with
prep.fsxops that I've shared. You'll need to pull the fsx atomic write
changes in this patch for it to work. I've been running on non atomic
write device btw.

> +LOG DUMP (661 total operations):
> +1(  1 mod 256): TRUNCATE UP    from 0x0 to 0x1d000
> +2(  2 mod 256): DEDUPE 0x19000 thru 0x1bfff    (0x3000 bytes) to 0x13000 thru 0x15fff
> +3(  3 mod 256): SKIPPED (no operation)
> +4(  4 mod 256): PUNCH    0x5167 thru 0x12d1c   (0xdbb6 bytes)
> +5(  5 mod 256): WRITE    0x79000 thru 0x86fff  (0xe000 bytes) HOLE
> +6(  6 mod 256): PUNCH    0x32344 thru 0x36faf  (0x4c6c bytes)
> +7(  7 mod 256): READ     0x0 thru 0xfff        (0x1000 bytes)
> +8(  8 mod 256): WRITE    0xe000 thru 0x11fff   (0x4000 bytes)
> +9(  9 mod 256): PUNCH    0x71324 thru 0x86fff  (0x15cdc bytes)
> +10( 10 mod 256): MAPREAD  0x5b000 thru 0x6d218 (0x12219 bytes)
> +11( 11 mod 256): COLLAPSE 0x70000 thru 0x79fff (0xa000 bytes)
> +12( 12 mod 256): WRITE    0x41000 thru 0x50fff (0x10000 bytes)
> +13( 13 mod 256): INSERT 0x39000 thru 0x4dfff   (0x15000 bytes)
> +14( 14 mod 256): WRITE    0x34000 thru 0x37fff (0x4000 bytes)
> +15( 15 mod 256): MAPREAD  0x55000 thru 0x6ee44 (0x19e45 bytes)
> +16( 16 mod 256): READ     0x46000 thru 0x55fff (0x10000 bytes)
> +17( 17 mod 256): PUNCH    0x1ccea thru 0x23b2e (0x6e45 bytes)
> +18( 18 mod 256): COPY 0x2a000 thru 0x35fff     (0xc000 bytes) to 0x52000 thru 0x5dfff
> +19( 19 mod 256): SKIPPED (no operation)
> +20( 20 mod 256): WRITE    0x10000 thru 0x1ffff (0x10000 bytes)
> <snip>
> +645(133 mod 256): READ     0x5000 thru 0x16fff (0x12000 bytes)
> +646(134 mod 256): PUNCH    0x3a51d thru 0x41978        (0x745c bytes)
> +647(135 mod 256): FALLOC   0x47f4c thru 0x54867        (0xc91b bytes) INTERIOR
> +648(136 mod 256): WRITE    0xa000 thru 0x1dfff (0x14000 bytes)
> +649(137 mod 256): CLONE 0x83000 thru 0x89fff   (0x7000 bytes) to 0x4b000 thru 0x51fff
> +650(138 mod 256): TRUNCATE DOWN        from 0x8bac4 to 0x7e000
> +651(139 mod 256): MAPWRITE 0x13000 thru 0x170e6        (0x40e7 bytes)
> +652(140 mod 256): XCHG 0x6a000 thru 0x7cfff    (0x13000 bytes) to 0x8000 thru 0x1afff
> +653(141 mod 256): XCHG 0x35000 thru 0x3cfff    (0x8000 bytes) to 0x1b000 thru 0x22fff
> +654(142 mod 256): CLONE 0x47000 thru 0x60fff   (0x1a000 bytes) to 0x65000 thru 0x7efff
> +655(143 mod 256): DEDUPE 0x79000 thru 0x7dfff  (0x5000 bytes) to 0x6e000 thru 0x72fff
> +656(144 mod 256): XCHG 0x4d000 thru 0x5ffff    (0x13000 bytes) to 0x8000 thru 0x1afff
> +657(145 mod 256): PUNCH    0x7194f thru 0x7efff        (0xd6b1 bytes)
> +658(146 mod 256): PUNCH    0x7af7e thru 0x7efff        (0x4082 bytes)
> +659(147 mod 256): MAPREAD  0x77000 thru 0x7e55d        (0x755e bytes)
> +660(148 mod 256): READ     0x58000 thru 0x64fff        (0xd000 bytes)
> +661(149 mod 256): WRITE    0x88000 thru 0x8bfff        (0x4000 bytes) HOLE
> +Log of operations saved to "/mnt/junk.fsxops"; replay with --replay-ops
> +Correct content saved for comparison
> +(maybe hexdump "/mnt/junk" vs "/mnt/junk.fsxgood")
> 
> Curiously there are no EIO errors logged in dmesg.
> 
> --D
> 
> > Regards,
> > ojaswin
> > > 
> > > Brian
> > 
> > > 
> > > > [1]
> > > > https://github.com/OjaswinM/fsx-aw-issue/tree/master
> > > > 
> > > > [2] https://github.com/OjaswinM/fsx-aw-issue/blob/master/repro.fsxops
> > > > 
> > > > regards,
> > > > ojaswin
> > > > 
> > > 
> >
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by John Garry 3 months, 2 weeks ago
On 22/10/2025 08:40, Ojaswin Mujoo wrote:
>> FWIW I see a somewhat different failure -- not data corruption, but
>> pwrite returning failure:
>> --- /run/fstests/bin/tests/generic/521.out      2025-07-15 14:45:15.100315255 -0700
>> +++ /var/tmp/fstests/generic/521.out.bad        2025-10-21 10:33:39.032263811 -0700
>> @@ -1,2 +1,668 @@
>>   QA output created by 521
>> +dowrite: write: Input/output error
> Hmm that's strange. Can you try running the above command with
> prep.fsxops that I've shared. You'll need to pull the fsx atomic write
> changes in this patch for it to work. I've been running on non atomic
> write device btw.

JFYI, I can see this issue with v6.18-rc2 and generic/760

I'll investigate...
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Darrick J. Wong 3 months, 2 weeks ago
On Thu, Oct 23, 2025 at 04:44:36PM +0100, John Garry wrote:
> On 22/10/2025 08:40, Ojaswin Mujoo wrote:
> > > FWIW I see a somewhat different failure -- not data corruption, but
> > > pwrite returning failure:
> > > --- /run/fstests/bin/tests/generic/521.out      2025-07-15 14:45:15.100315255 -0700
> > > +++ /var/tmp/fstests/generic/521.out.bad        2025-10-21 10:33:39.032263811 -0700
> > > @@ -1,2 +1,668 @@
> > >   QA output created by 521
> > > +dowrite: write: Input/output error
> > Hmm that's strange. Can you try running the above command with
> > prep.fsxops that I've shared. You'll need to pull the fsx atomic write
> > changes in this patch for it to work. I've been running on non atomic
> > write device btw.
> 
> JFYI, I can see this issue with v6.18-rc2 and generic/760
> 
> I'll investigate...

I figured out why I'm seeing EIO from the ftrace output.  Apparently
xfs_atomic_write_cow_iomap_begin isn't detecting delalloc extents in the
cow fork and converting them to unwritten:

fsx-35984 [003]  1996.389571: xfs_file_direct_write: dev 8:0 ino 0x127 disize 0x88000 pos 0x20000 bytecount 0x10000
fsx-35984 [003]  1996.389572: iomap_dio_rw_begin:   dev 8:0 ino 0x127 size 0x88000 offset 0x20000 length 0x10000 done_before 0x0 flags ATOMIC|DIRECT dio_flags  aio 0
fsx-35984 [003]  1996.389583: iomap_iter:           dev 8:0 ino 0x127 pos 0x20000 length 0x10000 status 0 flags WRITE|DIRECT|ATOMIC (0x211) ops xfs_atomic_write_cow_iomap_ops caller __iomap_dio_rw+0x1cb
fsx-35984 [003]  1996.389583: xfs_iomap_atomic_write_cow: dev 8:0 ino 0x127 pos 0x20000 bytecount 0x10000
fsx-35984 [003]  1996.389584: xfs_iomap_found:      dev 8:0 ino 0x127 disize 0x88000 pos 0x20000 bytecount 0x10000 fork cow startoff 0x20 startblock 0xffffffffe0007 fsbcount 0x10
fsx-35984 [003]  1996.389584: iomap_iter_dstmap:    dev 8:0 ino 0x127 bdev 8:0 addr 0xffffffffffffffff offset 0x20000 length 0x10000 type DELALLOC (0x1) flags DIRTY|SHARED (0x6)
fsx-35984 [003]  1996.389595: console:              Direct I/O collision with buffered writes! File: /junk Comm: fsx
fsx-35984 [003]  1996.391183: iomap_iter:           dev 8:0 ino 0x127 pos 0x20000 length 0x10000 status -5 flags WRITE|DIRECT|ATOMIC (0x211) ops xfs_atomic_write_cow_iomap_ops caller __iomap_dio_rw+0x1cb
fsx-35984 [003]  1996.391184: iomap_dio_complete:   dev 8:0 ino 0x127 size 0x88000 offset 0x20000 flags ATOMIC|DIRECT aio 0 error -5 ret -5

I suspect this needs a xfs_bmapi_convert_delalloc call somewhere around
the end of that function.

--D
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Ojaswin Mujoo 4 months ago
On Mon, Oct 06, 2025 at 06:50:03PM +0530, Ojaswin Mujoo wrote:
> On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> > On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > > with fsx.
> > > > > > > 
> > > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > > ---
> > > > > > 
> > > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > > system?
> > > > > 
> > > > > Hi Zorro, 
> > > > > Sorry for being late, I've been on vacation this week.
> > > > > 
> > > > > Yes so by design we should be automatically disabling atomic writes when
> > > > > they are not supported by the stack but seems like the issue is that
> > > > > when we do disable it we print some extra messages to stdout/err which
> > > > > show up in the xfstests output causing failure.
> > > > > 
> > > > > I can think of 2 ways around this:
> > > > > 
> > > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > > doesn't support them.
> > > > > 
> > > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > > testing free of cost any more.
> > > > 
> > > > Hi Ojaswin,
> > > > 
> > > > Please have a nice vacation :)
> > > > 
> > > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > > by:
> > > 
> > > Oh okay got it.
> > > 
> > > > 
> > > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > > index bdb87ca90..0a035b37b 100644
> > > > --- a/ltp/fsx.c
> > > > +++ b/ltp/fsx.c
> > > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > > >         struct statx stx;
> > > >  
> > > >         if (o_direct != O_DIRECT) {
> > > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > -                               "disabling!\n");
> > > > +               if (!quiet)
> > > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > +                                       "disabling!\n");
> > > >                 return 0;
> > > >         }
> > > >  
> > > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > > >                 return 1;
> > > >         }
> > > >  
> > > > -       fprintf(stderr, "main: IO Stack does not support "
> > > > -                       "atomic writes, disabling!\n");
> > > > +       if (!quiet)
> > > > +               fprintf(stderr, "main: IO Stack does not support "
> > > > +                               "atomic writes, disabling!\n");
> > > >         return 0;
> > > >  }
> > > 
> > > > 
> > > > But I hit more read or write failures e.g. [1], this failure can't be
> > > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > > test failure?
> > > > 
> > > > Thanks,
> > > > Zorro
> > > > 
> > > 
> > > <...>
> > > 
> > > > +244(244 mod 256): SKIPPED (no operation)
> > > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > > +248(248 mod 256): SKIPPED (no operation)
> > > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > > +252(252 mod 256): SKIPPED (no operation)
> > > > +253(253 mod 256): SKIPPED (no operation)
> > > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > > +257(  1 mod 256): SKIPPED (no operation)
> > > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > > +268( 12 mod 256): SKIPPED (no operation)
> > > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > > +270( 14 mod 256): SKIPPED (no operation)
> > > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > > +275( 19 mod 256): SKIPPED (no operation)
> > > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > > +280( 24 mod 256): SKIPPED (no operation)
> > > > +281( 25 mod 256): SKIPPED (no operation)
> > > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > > +283( 27 mod 256): SKIPPED (no operation)
> > > > +284( 28 mod 256): SKIPPED (no operation)
> > > > +285( 29 mod 256): SKIPPED (no operation)
> > > > +286( 30 mod 256): SKIPPED (no operation)
> > > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > > +Correct content saved for comparison
> > > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > > 
> > > > Thanks,
> > > > Zorro
> > > 
> > > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > > RWF_ATOMIC or on a kernle that does support it.
> > 
> > I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> > About the RWF_ATOMIC flag in my system:
> > 
> > # grep -rsn RWF_ATOMIC /usr/include/
> > /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> > /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> > /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> > /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> > /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
> 
> Hi Zorro, thanks for checking this. So correct me if im wrong but I
> understand that you have run this test on an atomic writes enabled 
> kernel where the stack also supports atomic writes.
> 
> Looking at the bad data log:
> 
> 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> 	+OFFSET      GOOD    BAD     RANGE
> 	+0x1c000     0x0000  0xcdcd  0x0
> 	+operation# (mod 256) for the bad data may be 205
> 
> We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> that caused this is indicated to be 205, but looking at that operation:
> 
> +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> 
> This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> Infact, it does seem like an unlikely coincidence that the actual data
> in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> to default (fsx writes random data in even offsets and operation num in
> odd).
> 
> I am able to replicate this but only on XFS but not on ext4 (atleast not
> in 20 runs).  I'm trying to better understand if this is a test issue or
> not. Will keep you update.
> 
> I'm not sure how this will affect the upcoming release, if you want
> shall I send a small patch to make the atomic writes feature default off
> instead of default on till we root cause this?
> 
> Regards,
> Ojaswin

Hi Zorro,

So I'm able to narrow down the opoerations and replicate it via the
following replay file:

# -----
# replay.fsxops
# -----
write_atomic 0x57000 0x1000 0x69690
write_atomic 0x66000 0x1000 0x4de00
write_atomic 0x18000 0x1000 0x2c800
copy_range 0x20000 0x1000 0xe00 0x70e00
write_atomic 0x18000 0x1000 0x70e00
copy_range 0x21000 0x1000 0x23000 0x74218
truncate 0x0 0x11200 0x4daef *
write_atomic 0x43000 0x1000 0x11200 *
write_atomic 0x15000 0x1000 0x44000
copy_range 0xd000 0x1000 0x1d800 0x44000
mapread 0x1c000 0x1803 0x1e400 *


Command: ./ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI --replay-ops replay.fsxops $MNT/junk

$MNT/junk is always opened O_TRUNC and is an on an XFS FS where the
disk is non-atomic so all RWF_ATOMIC writes are software emulated.

Here are the logs generated for this run:

Seed set to 1
main: filesystem does not support exchange range, disabling!

READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/test/junk
OFFSET      GOOD    BAD     RANGE
0x1d000     0x0000  0xf322  0x0
operation# (mod 256) for the bad data may be 243
0x1d001     0x0000  0x22f3  0x1
operation# (mod 256) for the bad data may be 243
0x1d002     0x0000  0xf391  0x2
operation# (mod 256) for the bad data may be 243
0x1d003     0x0000  0x91f3  0x3
<... a few more such lines ..>

LOG DUMP (11 total operations):
openat(AT_FDCWD, "/mnt/test/junk.fsxops", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
1(  1 mod 256): WRITE    0x57000 thru 0x57fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
2(  2 mod 256): WRITE    0x66000 thru 0x66fff   (0x1000 bytes) HOLE ATOMIC
3(  3 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
4(  4 mod 256): COPY 0x20000 thru 0x20fff       (0x1000 bytes) to 0xe00 thru 0x1dff
5(  5 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
6(  6 mod 256): COPY 0x21000 thru 0x21fff       (0x1000 bytes) to 0x23000 thru 0x23fff
7(  7 mod 256): TRUNCATE DOWN   from 0x67000 to 0x11200 ******WWWW
8(  8 mod 256): WRITE    0x43000 thru 0x43fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
9(  9 mod 256): WRITE    0x15000 thru 0x15fff   (0x1000 bytes) ATOMIC
10( 10 mod 256): COPY 0xd000 thru 0xdfff        (0x1000 bytes) to 0x1d800 thru 0x1e7ff
11( 11 mod 256): MAPREAD  0x1c000 thru 0x1d802  (0x1803 bytes)  ***RRRR***
Log of operations saved to "/mnt/test/junk.fsxops"; replay with --replay-ops
Correct content saved for comparison
(maybe hexdump "/mnt/test/junk" vs "/mnt/test/junk.fsxgood")
+++ exited with 110 +++

We can see that the bad data is detected in the final MAPREAD operation
and and bad offset is at 0x1d000. If we look at the operations dump
above its clear that none of the operations should be modifying the
0x1d000 so we should have been reading 0s but yet we see some junk data
there in the file:

$ hexdump /mnt/test/junk -s 0x1c000 -n0x1020
001c000 0000 0000 0000 0000 0000 0000 0000 0000
*
001d000 22f3 91f3 7ff3 3af3 39f3 23f3 6df3 c2f3
001d010 c5f3 f6f3 a6f3 1ef3 58f3 40f3 32f3 5ff3
001d020

Another thing to not is that I can't reproduce the above on scsi-debug
device.  @Darrick, @John, could this be an issue in kernel?

Regards,
ojaswin
> 
> > 
> > Thanks,
> > Zorro
> > 
> > > 
> > > Regards,
> > > ojaswin
> > > 
> >
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 3 months, 3 weeks ago
On Tue, Oct 07, 2025 at 03:28:46PM +0530, Ojaswin Mujoo wrote:
> On Mon, Oct 06, 2025 at 06:50:03PM +0530, Ojaswin Mujoo wrote:
> > On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> > > On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > > > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > > > with fsx.
> > > > > > > > 
> > > > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > > > ---
> > > > > > > 
> > > > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > > > system?
> > > > > > 
> > > > > > Hi Zorro, 
> > > > > > Sorry for being late, I've been on vacation this week.
> > > > > > 
> > > > > > Yes so by design we should be automatically disabling atomic writes when
> > > > > > they are not supported by the stack but seems like the issue is that
> > > > > > when we do disable it we print some extra messages to stdout/err which
> > > > > > show up in the xfstests output causing failure.
> > > > > > 
> > > > > > I can think of 2 ways around this:
> > > > > > 
> > > > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > > > doesn't support them.
> > > > > > 
> > > > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > > > testing free of cost any more.
> > > > > 
> > > > > Hi Ojaswin,
> > > > > 
> > > > > Please have a nice vacation :)
> > > > > 
> > > > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > > > by:
> > > > 
> > > > Oh okay got it.
> > > > 
> > > > > 
> > > > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > > > index bdb87ca90..0a035b37b 100644
> > > > > --- a/ltp/fsx.c
> > > > > +++ b/ltp/fsx.c
> > > > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > > > >         struct statx stx;
> > > > >  
> > > > >         if (o_direct != O_DIRECT) {
> > > > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > -                               "disabling!\n");
> > > > > +               if (!quiet)
> > > > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > +                                       "disabling!\n");
> > > > >                 return 0;
> > > > >         }
> > > > >  
> > > > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > > > >                 return 1;
> > > > >         }
> > > > >  
> > > > > -       fprintf(stderr, "main: IO Stack does not support "
> > > > > -                       "atomic writes, disabling!\n");
> > > > > +       if (!quiet)
> > > > > +               fprintf(stderr, "main: IO Stack does not support "
> > > > > +                               "atomic writes, disabling!\n");
> > > > >         return 0;
> > > > >  }
> > > > 
> > > > > 
> > > > > But I hit more read or write failures e.g. [1], this failure can't be
> > > > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > > > test failure?
> > > > > 
> > > > > Thanks,
> > > > > Zorro
> > > > > 
> > > > 
> > > > <...>
> > > > 
> > > > > +244(244 mod 256): SKIPPED (no operation)
> > > > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > > > +248(248 mod 256): SKIPPED (no operation)
> > > > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > > > +252(252 mod 256): SKIPPED (no operation)
> > > > > +253(253 mod 256): SKIPPED (no operation)
> > > > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > > > +257(  1 mod 256): SKIPPED (no operation)
> > > > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > > > +268( 12 mod 256): SKIPPED (no operation)
> > > > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > > > +270( 14 mod 256): SKIPPED (no operation)
> > > > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > > > +275( 19 mod 256): SKIPPED (no operation)
> > > > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > > > +280( 24 mod 256): SKIPPED (no operation)
> > > > > +281( 25 mod 256): SKIPPED (no operation)
> > > > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > > > +283( 27 mod 256): SKIPPED (no operation)
> > > > > +284( 28 mod 256): SKIPPED (no operation)
> > > > > +285( 29 mod 256): SKIPPED (no operation)
> > > > > +286( 30 mod 256): SKIPPED (no operation)
> > > > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > > > +Correct content saved for comparison
> > > > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > > > 
> > > > > Thanks,
> > > > > Zorro
> > > > 
> > > > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > > > RWF_ATOMIC or on a kernle that does support it.
> > > 
> > > I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> > > About the RWF_ATOMIC flag in my system:
> > > 
> > > # grep -rsn RWF_ATOMIC /usr/include/
> > > /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> > > /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> > > /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> > > /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> > > /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
> > 
> > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > understand that you have run this test on an atomic writes enabled 
> > kernel where the stack also supports atomic writes.
> > 
> > Looking at the bad data log:
> > 
> > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > 	+OFFSET      GOOD    BAD     RANGE
> > 	+0x1c000     0x0000  0xcdcd  0x0
> > 	+operation# (mod 256) for the bad data may be 205
> > 
> > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > that caused this is indicated to be 205, but looking at that operation:
> > 
> > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > 
> > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > Infact, it does seem like an unlikely coincidence that the actual data
> > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > to default (fsx writes random data in even offsets and operation num in
> > odd).
> > 
> > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > in 20 runs).  I'm trying to better understand if this is a test issue or
> > not. Will keep you update.
> > 
> > I'm not sure how this will affect the upcoming release, if you want
> > shall I send a small patch to make the atomic writes feature default off
> > instead of default on till we root cause this?
> > 
> > Regards,
> > Ojaswin
> 
> Hi Zorro,
> 
> So I'm able to narrow down the opoerations and replicate it via the
> following replay file:
> 
> # -----
> # replay.fsxops
> # -----
> write_atomic 0x57000 0x1000 0x69690
> write_atomic 0x66000 0x1000 0x4de00
> write_atomic 0x18000 0x1000 0x2c800
> copy_range 0x20000 0x1000 0xe00 0x70e00
> write_atomic 0x18000 0x1000 0x70e00
> copy_range 0x21000 0x1000 0x23000 0x74218
> truncate 0x0 0x11200 0x4daef *
> write_atomic 0x43000 0x1000 0x11200 *
> write_atomic 0x15000 0x1000 0x44000
> copy_range 0xd000 0x1000 0x1d800 0x44000
> mapread 0x1c000 0x1803 0x1e400 *
> 
> 
> Command: ./ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI --replay-ops replay.fsxops $MNT/junk
> 
> $MNT/junk is always opened O_TRUNC and is an on an XFS FS where the
> disk is non-atomic so all RWF_ATOMIC writes are software emulated.
> 
> Here are the logs generated for this run:
> 
> Seed set to 1
> main: filesystem does not support exchange range, disabling!
> 
> READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/test/junk
> OFFSET      GOOD    BAD     RANGE
> 0x1d000     0x0000  0xf322  0x0
> operation# (mod 256) for the bad data may be 243
> 0x1d001     0x0000  0x22f3  0x1
> operation# (mod 256) for the bad data may be 243
> 0x1d002     0x0000  0xf391  0x2
> operation# (mod 256) for the bad data may be 243
> 0x1d003     0x0000  0x91f3  0x3
> <... a few more such lines ..>
> 
> LOG DUMP (11 total operations):
> openat(AT_FDCWD, "/mnt/test/junk.fsxops", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
> 1(  1 mod 256): WRITE    0x57000 thru 0x57fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> 2(  2 mod 256): WRITE    0x66000 thru 0x66fff   (0x1000 bytes) HOLE ATOMIC
> 3(  3 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> 4(  4 mod 256): COPY 0x20000 thru 0x20fff       (0x1000 bytes) to 0xe00 thru 0x1dff
> 5(  5 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> 6(  6 mod 256): COPY 0x21000 thru 0x21fff       (0x1000 bytes) to 0x23000 thru 0x23fff
> 7(  7 mod 256): TRUNCATE DOWN   from 0x67000 to 0x11200 ******WWWW
> 8(  8 mod 256): WRITE    0x43000 thru 0x43fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> 9(  9 mod 256): WRITE    0x15000 thru 0x15fff   (0x1000 bytes) ATOMIC
> 10( 10 mod 256): COPY 0xd000 thru 0xdfff        (0x1000 bytes) to 0x1d800 thru 0x1e7ff
> 11( 11 mod 256): MAPREAD  0x1c000 thru 0x1d802  (0x1803 bytes)  ***RRRR***
> Log of operations saved to "/mnt/test/junk.fsxops"; replay with --replay-ops
> Correct content saved for comparison
> (maybe hexdump "/mnt/test/junk" vs "/mnt/test/junk.fsxgood")
> +++ exited with 110 +++
> 
> We can see that the bad data is detected in the final MAPREAD operation
> and and bad offset is at 0x1d000. If we look at the operations dump
> above its clear that none of the operations should be modifying the
> 0x1d000 so we should have been reading 0s but yet we see some junk data
> there in the file:
> 
> $ hexdump /mnt/test/junk -s 0x1c000 -n0x1020
> 001c000 0000 0000 0000 0000 0000 0000 0000 0000
> *
> 001d000 22f3 91f3 7ff3 3af3 39f3 23f3 6df3 c2f3
> 001d010 c5f3 f6f3 a6f3 1ef3 58f3 40f3 32f3 5ff3
> 001d020
> 
> Another thing to not is that I can't reproduce the above on scsi-debug
> device.  @Darrick, @John, could this be an issue in kernel?

Hi Ojaswin,

If we can be sure this's a kernel bug, rather than a fstests (patch) issue, I think we
can merge this patchset to expose this bug. Does this make sense to you and others?

Thanks,
Zorro

> 
> Regards,
> ojaswin
> > 
> > > 
> > > Thanks,
> > > Zorro
> > > 
> > > > 
> > > > Regards,
> > > > ojaswin
> > > > 
> > > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Darrick J. Wong 3 months, 3 weeks ago
On Sat, Oct 18, 2025 at 12:01:22AM +0800, Zorro Lang wrote:
> On Tue, Oct 07, 2025 at 03:28:46PM +0530, Ojaswin Mujoo wrote:
> > On Mon, Oct 06, 2025 at 06:50:03PM +0530, Ojaswin Mujoo wrote:
> > > On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> > > > On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > > > > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > > > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > > > > with fsx.
> > > > > > > > > 
> > > > > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > > > > ---
> > > > > > > > 
> > > > > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > > > > system?
> > > > > > > 
> > > > > > > Hi Zorro, 
> > > > > > > Sorry for being late, I've been on vacation this week.
> > > > > > > 
> > > > > > > Yes so by design we should be automatically disabling atomic writes when
> > > > > > > they are not supported by the stack but seems like the issue is that
> > > > > > > when we do disable it we print some extra messages to stdout/err which
> > > > > > > show up in the xfstests output causing failure.
> > > > > > > 
> > > > > > > I can think of 2 ways around this:
> > > > > > > 
> > > > > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > > > > doesn't support them.
> > > > > > > 
> > > > > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > > > > testing free of cost any more.
> > > > > > 
> > > > > > Hi Ojaswin,
> > > > > > 
> > > > > > Please have a nice vacation :)
> > > > > > 
> > > > > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > > > > by:
> > > > > 
> > > > > Oh okay got it.
> > > > > 
> > > > > > 
> > > > > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > > > > index bdb87ca90..0a035b37b 100644
> > > > > > --- a/ltp/fsx.c
> > > > > > +++ b/ltp/fsx.c
> > > > > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > > > > >         struct statx stx;
> > > > > >  
> > > > > >         if (o_direct != O_DIRECT) {
> > > > > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > -                               "disabling!\n");
> > > > > > +               if (!quiet)
> > > > > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > +                                       "disabling!\n");
> > > > > >                 return 0;
> > > > > >         }
> > > > > >  
> > > > > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > > > > >                 return 1;
> > > > > >         }
> > > > > >  
> > > > > > -       fprintf(stderr, "main: IO Stack does not support "
> > > > > > -                       "atomic writes, disabling!\n");
> > > > > > +       if (!quiet)
> > > > > > +               fprintf(stderr, "main: IO Stack does not support "
> > > > > > +                               "atomic writes, disabling!\n");
> > > > > >         return 0;
> > > > > >  }
> > > > > 
> > > > > > 
> > > > > > But I hit more read or write failures e.g. [1], this failure can't be
> > > > > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > > > > test failure?
> > > > > > 
> > > > > > Thanks,
> > > > > > Zorro
> > > > > > 
> > > > > 
> > > > > <...>
> > > > > 
> > > > > > +244(244 mod 256): SKIPPED (no operation)
> > > > > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > > > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > > > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > > > > +248(248 mod 256): SKIPPED (no operation)
> > > > > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > > > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > > > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > > > > +252(252 mod 256): SKIPPED (no operation)
> > > > > > +253(253 mod 256): SKIPPED (no operation)
> > > > > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > > > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > > > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > > > > +257(  1 mod 256): SKIPPED (no operation)
> > > > > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > > > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > > > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > > > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > > > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > > > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > > > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > > > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > > > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > > > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > > > > +268( 12 mod 256): SKIPPED (no operation)
> > > > > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > > > > +270( 14 mod 256): SKIPPED (no operation)
> > > > > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > > > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > > > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > > > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > > > > +275( 19 mod 256): SKIPPED (no operation)
> > > > > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > > > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > > > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > > > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > > > > +280( 24 mod 256): SKIPPED (no operation)
> > > > > > +281( 25 mod 256): SKIPPED (no operation)
> > > > > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > > > > +283( 27 mod 256): SKIPPED (no operation)
> > > > > > +284( 28 mod 256): SKIPPED (no operation)
> > > > > > +285( 29 mod 256): SKIPPED (no operation)
> > > > > > +286( 30 mod 256): SKIPPED (no operation)
> > > > > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > > > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > > > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > > > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > > > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > > > > +Correct content saved for comparison
> > > > > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > > > > 
> > > > > > Thanks,
> > > > > > Zorro
> > > > > 
> > > > > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > > > > RWF_ATOMIC or on a kernle that does support it.
> > > > 
> > > > I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> > > > About the RWF_ATOMIC flag in my system:
> > > > 
> > > > # grep -rsn RWF_ATOMIC /usr/include/
> > > > /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> > > > /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> > > > /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> > > > /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> > > > /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
> > > 
> > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > understand that you have run this test on an atomic writes enabled 
> > > kernel where the stack also supports atomic writes.
> > > 
> > > Looking at the bad data log:
> > > 
> > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > 	+OFFSET      GOOD    BAD     RANGE
> > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > 	+operation# (mod 256) for the bad data may be 205
> > > 
> > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > that caused this is indicated to be 205, but looking at that operation:
> > > 
> > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > 
> > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > Infact, it does seem like an unlikely coincidence that the actual data
> > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > to default (fsx writes random data in even offsets and operation num in
> > > odd).
> > > 
> > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > not. Will keep you update.
> > > 
> > > I'm not sure how this will affect the upcoming release, if you want
> > > shall I send a small patch to make the atomic writes feature default off
> > > instead of default on till we root cause this?
> > > 
> > > Regards,
> > > Ojaswin
> > 
> > Hi Zorro,
> > 
> > So I'm able to narrow down the opoerations and replicate it via the
> > following replay file:
> > 
> > # -----
> > # replay.fsxops
> > # -----
> > write_atomic 0x57000 0x1000 0x69690
> > write_atomic 0x66000 0x1000 0x4de00
> > write_atomic 0x18000 0x1000 0x2c800
> > copy_range 0x20000 0x1000 0xe00 0x70e00
> > write_atomic 0x18000 0x1000 0x70e00
> > copy_range 0x21000 0x1000 0x23000 0x74218
> > truncate 0x0 0x11200 0x4daef *
> > write_atomic 0x43000 0x1000 0x11200 *
> > write_atomic 0x15000 0x1000 0x44000
> > copy_range 0xd000 0x1000 0x1d800 0x44000
> > mapread 0x1c000 0x1803 0x1e400 *
> > 
> > 
> > Command: ./ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI --replay-ops replay.fsxops $MNT/junk
> > 
> > $MNT/junk is always opened O_TRUNC and is an on an XFS FS where the
> > disk is non-atomic so all RWF_ATOMIC writes are software emulated.
> > 
> > Here are the logs generated for this run:
> > 
> > Seed set to 1
> > main: filesystem does not support exchange range, disabling!
> > 
> > READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/test/junk
> > OFFSET      GOOD    BAD     RANGE
> > 0x1d000     0x0000  0xf322  0x0
> > operation# (mod 256) for the bad data may be 243
> > 0x1d001     0x0000  0x22f3  0x1
> > operation# (mod 256) for the bad data may be 243
> > 0x1d002     0x0000  0xf391  0x2
> > operation# (mod 256) for the bad data may be 243
> > 0x1d003     0x0000  0x91f3  0x3
> > <... a few more such lines ..>
> > 
> > LOG DUMP (11 total operations):
> > openat(AT_FDCWD, "/mnt/test/junk.fsxops", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
> > 1(  1 mod 256): WRITE    0x57000 thru 0x57fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > 2(  2 mod 256): WRITE    0x66000 thru 0x66fff   (0x1000 bytes) HOLE ATOMIC
> > 3(  3 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > 4(  4 mod 256): COPY 0x20000 thru 0x20fff       (0x1000 bytes) to 0xe00 thru 0x1dff
> > 5(  5 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > 6(  6 mod 256): COPY 0x21000 thru 0x21fff       (0x1000 bytes) to 0x23000 thru 0x23fff
> > 7(  7 mod 256): TRUNCATE DOWN   from 0x67000 to 0x11200 ******WWWW
> > 8(  8 mod 256): WRITE    0x43000 thru 0x43fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > 9(  9 mod 256): WRITE    0x15000 thru 0x15fff   (0x1000 bytes) ATOMIC
> > 10( 10 mod 256): COPY 0xd000 thru 0xdfff        (0x1000 bytes) to 0x1d800 thru 0x1e7ff
> > 11( 11 mod 256): MAPREAD  0x1c000 thru 0x1d802  (0x1803 bytes)  ***RRRR***
> > Log of operations saved to "/mnt/test/junk.fsxops"; replay with --replay-ops
> > Correct content saved for comparison
> > (maybe hexdump "/mnt/test/junk" vs "/mnt/test/junk.fsxgood")
> > +++ exited with 110 +++
> > 
> > We can see that the bad data is detected in the final MAPREAD operation
> > and and bad offset is at 0x1d000. If we look at the operations dump
> > above its clear that none of the operations should be modifying the
> > 0x1d000 so we should have been reading 0s but yet we see some junk data
> > there in the file:
> > 
> > $ hexdump /mnt/test/junk -s 0x1c000 -n0x1020
> > 001c000 0000 0000 0000 0000 0000 0000 0000 0000
> > *
> > 001d000 22f3 91f3 7ff3 3af3 39f3 23f3 6df3 c2f3
> > 001d010 c5f3 f6f3 a6f3 1ef3 58f3 40f3 32f3 5ff3
> > 001d020
> > 
> > Another thing to not is that I can't reproduce the above on scsi-debug
> > device.  @Darrick, @John, could this be an issue in kernel?
> 
> Hi Ojaswin,
> 
> If we can be sure this's a kernel bug, rather than a fstests (patch) issue, I think we
> can merge this patchset to expose this bug. Does this make sense to you and others?

Looks like a kernel bug to me...

--D

> Thanks,
> Zorro
> 
> > 
> > Regards,
> > ojaswin
> > > 
> > > > 
> > > > Thanks,
> > > > Zorro
> > > > 
> > > > > 
> > > > > Regards,
> > > > > ojaswin
> > > > > 
> > > > 
> > 
> 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 3 months, 3 weeks ago
On Fri, Oct 17, 2025 at 09:27:57AM -0700, Darrick J. Wong wrote:
> On Sat, Oct 18, 2025 at 12:01:22AM +0800, Zorro Lang wrote:
> > On Tue, Oct 07, 2025 at 03:28:46PM +0530, Ojaswin Mujoo wrote:
> > > On Mon, Oct 06, 2025 at 06:50:03PM +0530, Ojaswin Mujoo wrote:
> > > > On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> > > > > On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > > > > > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > > > > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > > > > > with fsx.
> > > > > > > > > > 
> > > > > > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > > > > > ---
> > > > > > > > > 
> > > > > > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > > > > > system?
> > > > > > > > 
> > > > > > > > Hi Zorro, 
> > > > > > > > Sorry for being late, I've been on vacation this week.
> > > > > > > > 
> > > > > > > > Yes so by design we should be automatically disabling atomic writes when
> > > > > > > > they are not supported by the stack but seems like the issue is that
> > > > > > > > when we do disable it we print some extra messages to stdout/err which
> > > > > > > > show up in the xfstests output causing failure.
> > > > > > > > 
> > > > > > > > I can think of 2 ways around this:
> > > > > > > > 
> > > > > > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > > > > > doesn't support them.
> > > > > > > > 
> > > > > > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > > > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > > > > > testing free of cost any more.
> > > > > > > 
> > > > > > > Hi Ojaswin,
> > > > > > > 
> > > > > > > Please have a nice vacation :)
> > > > > > > 
> > > > > > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > > > > > by:
> > > > > > 
> > > > > > Oh okay got it.
> > > > > > 
> > > > > > > 
> > > > > > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > > > > > index bdb87ca90..0a035b37b 100644
> > > > > > > --- a/ltp/fsx.c
> > > > > > > +++ b/ltp/fsx.c
> > > > > > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > > > > > >         struct statx stx;
> > > > > > >  
> > > > > > >         if (o_direct != O_DIRECT) {
> > > > > > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > > -                               "disabling!\n");
> > > > > > > +               if (!quiet)
> > > > > > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > > +                                       "disabling!\n");
> > > > > > >                 return 0;
> > > > > > >         }
> > > > > > >  
> > > > > > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > > > > > >                 return 1;
> > > > > > >         }
> > > > > > >  
> > > > > > > -       fprintf(stderr, "main: IO Stack does not support "
> > > > > > > -                       "atomic writes, disabling!\n");
> > > > > > > +       if (!quiet)
> > > > > > > +               fprintf(stderr, "main: IO Stack does not support "
> > > > > > > +                               "atomic writes, disabling!\n");
> > > > > > >         return 0;
> > > > > > >  }
> > > > > > 
> > > > > > > 
> > > > > > > But I hit more read or write failures e.g. [1], this failure can't be
> > > > > > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > > > > > test failure?
> > > > > > > 
> > > > > > > Thanks,
> > > > > > > Zorro
> > > > > > > 
> > > > > > 
> > > > > > <...>
> > > > > > 
> > > > > > > +244(244 mod 256): SKIPPED (no operation)
> > > > > > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > > > > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > > > > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > > > > > +248(248 mod 256): SKIPPED (no operation)
> > > > > > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > > > > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > > > > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > > > > > +252(252 mod 256): SKIPPED (no operation)
> > > > > > > +253(253 mod 256): SKIPPED (no operation)
> > > > > > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > > > > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > > > > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > > > > > +257(  1 mod 256): SKIPPED (no operation)
> > > > > > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > > > > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > > > > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > > > > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > > > > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > > > > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > > > > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > > > > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > > > > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > > > > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > > > > > +268( 12 mod 256): SKIPPED (no operation)
> > > > > > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > > > > > +270( 14 mod 256): SKIPPED (no operation)
> > > > > > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > > > > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > > > > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > > > > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > > > > > +275( 19 mod 256): SKIPPED (no operation)
> > > > > > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > > > > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > > > > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > > > > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > > > > > +280( 24 mod 256): SKIPPED (no operation)
> > > > > > > +281( 25 mod 256): SKIPPED (no operation)
> > > > > > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > > > > > +283( 27 mod 256): SKIPPED (no operation)
> > > > > > > +284( 28 mod 256): SKIPPED (no operation)
> > > > > > > +285( 29 mod 256): SKIPPED (no operation)
> > > > > > > +286( 30 mod 256): SKIPPED (no operation)
> > > > > > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > > > > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > > > > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > > > > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > > > > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > > > > > +Correct content saved for comparison
> > > > > > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > > > > > 
> > > > > > > Thanks,
> > > > > > > Zorro
> > > > > > 
> > > > > > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > > > > > RWF_ATOMIC or on a kernle that does support it.
> > > > > 
> > > > > I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> > > > > About the RWF_ATOMIC flag in my system:
> > > > > 
> > > > > # grep -rsn RWF_ATOMIC /usr/include/
> > > > > /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> > > > > /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> > > > > /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> > > > > /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> > > > > /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
> > > > 
> > > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > > understand that you have run this test on an atomic writes enabled 
> > > > kernel where the stack also supports atomic writes.
> > > > 
> > > > Looking at the bad data log:
> > > > 
> > > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > > 	+OFFSET      GOOD    BAD     RANGE
> > > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > > 	+operation# (mod 256) for the bad data may be 205
> > > > 
> > > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > > that caused this is indicated to be 205, but looking at that operation:
> > > > 
> > > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > > 
> > > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > > Infact, it does seem like an unlikely coincidence that the actual data
> > > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > > to default (fsx writes random data in even offsets and operation num in
> > > > odd).
> > > > 
> > > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > > not. Will keep you update.
> > > > 
> > > > I'm not sure how this will affect the upcoming release, if you want
> > > > shall I send a small patch to make the atomic writes feature default off
> > > > instead of default on till we root cause this?
> > > > 
> > > > Regards,
> > > > Ojaswin
> > > 
> > > Hi Zorro,
> > > 
> > > So I'm able to narrow down the opoerations and replicate it via the
> > > following replay file:
> > > 
> > > # -----
> > > # replay.fsxops
> > > # -----
> > > write_atomic 0x57000 0x1000 0x69690
> > > write_atomic 0x66000 0x1000 0x4de00
> > > write_atomic 0x18000 0x1000 0x2c800
> > > copy_range 0x20000 0x1000 0xe00 0x70e00
> > > write_atomic 0x18000 0x1000 0x70e00
> > > copy_range 0x21000 0x1000 0x23000 0x74218
> > > truncate 0x0 0x11200 0x4daef *
> > > write_atomic 0x43000 0x1000 0x11200 *
> > > write_atomic 0x15000 0x1000 0x44000
> > > copy_range 0xd000 0x1000 0x1d800 0x44000
> > > mapread 0x1c000 0x1803 0x1e400 *
> > > 
> > > 
> > > Command: ./ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI --replay-ops replay.fsxops $MNT/junk
> > > 
> > > $MNT/junk is always opened O_TRUNC and is an on an XFS FS where the
> > > disk is non-atomic so all RWF_ATOMIC writes are software emulated.
> > > 
> > > Here are the logs generated for this run:
> > > 
> > > Seed set to 1
> > > main: filesystem does not support exchange range, disabling!
> > > 
> > > READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/test/junk
> > > OFFSET      GOOD    BAD     RANGE
> > > 0x1d000     0x0000  0xf322  0x0
> > > operation# (mod 256) for the bad data may be 243
> > > 0x1d001     0x0000  0x22f3  0x1
> > > operation# (mod 256) for the bad data may be 243
> > > 0x1d002     0x0000  0xf391  0x2
> > > operation# (mod 256) for the bad data may be 243
> > > 0x1d003     0x0000  0x91f3  0x3
> > > <... a few more such lines ..>
> > > 
> > > LOG DUMP (11 total operations):
> > > openat(AT_FDCWD, "/mnt/test/junk.fsxops", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
> > > 1(  1 mod 256): WRITE    0x57000 thru 0x57fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > > 2(  2 mod 256): WRITE    0x66000 thru 0x66fff   (0x1000 bytes) HOLE ATOMIC
> > > 3(  3 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > > 4(  4 mod 256): COPY 0x20000 thru 0x20fff       (0x1000 bytes) to 0xe00 thru 0x1dff
> > > 5(  5 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > > 6(  6 mod 256): COPY 0x21000 thru 0x21fff       (0x1000 bytes) to 0x23000 thru 0x23fff
> > > 7(  7 mod 256): TRUNCATE DOWN   from 0x67000 to 0x11200 ******WWWW
> > > 8(  8 mod 256): WRITE    0x43000 thru 0x43fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > > 9(  9 mod 256): WRITE    0x15000 thru 0x15fff   (0x1000 bytes) ATOMIC
> > > 10( 10 mod 256): COPY 0xd000 thru 0xdfff        (0x1000 bytes) to 0x1d800 thru 0x1e7ff
> > > 11( 11 mod 256): MAPREAD  0x1c000 thru 0x1d802  (0x1803 bytes)  ***RRRR***
> > > Log of operations saved to "/mnt/test/junk.fsxops"; replay with --replay-ops
> > > Correct content saved for comparison
> > > (maybe hexdump "/mnt/test/junk" vs "/mnt/test/junk.fsxgood")
> > > +++ exited with 110 +++
> > > 
> > > We can see that the bad data is detected in the final MAPREAD operation
> > > and and bad offset is at 0x1d000. If we look at the operations dump
> > > above its clear that none of the operations should be modifying the
> > > 0x1d000 so we should have been reading 0s but yet we see some junk data
> > > there in the file:
> > > 
> > > $ hexdump /mnt/test/junk -s 0x1c000 -n0x1020
> > > 001c000 0000 0000 0000 0000 0000 0000 0000 0000
> > > *
> > > 001d000 22f3 91f3 7ff3 3af3 39f3 23f3 6df3 c2f3
> > > 001d010 c5f3 f6f3 a6f3 1ef3 58f3 40f3 32f3 5ff3
> > > 001d020
> > > 
> > > Another thing to not is that I can't reproduce the above on scsi-debug
> > > device.  @Darrick, @John, could this be an issue in kernel?
> > 
> > Hi Ojaswin,
> > 
> > If we can be sure this's a kernel bug, rather than a fstests (patch) issue, I think we
> > can merge this patchset to expose this bug. Does this make sense to you and others?
> 
> Looks like a kernel bug to me...

Thanks Darrick! If I merge this patchset, the fstests users might hit some unexpected
fsx test failures in their regular regression test. I don't want to make panic, if no
one mind that, I'll merge it. Or do you want to disable the atomic write fsx test by
default currently?

Thanks,
Zorro

> 
> --D
> 
> > Thanks,
> > Zorro
> > 
> > > 
> > > Regards,
> > > ojaswin
> > > > 
> > > > > 
> > > > > Thanks,
> > > > > Zorro
> > > > > 
> > > > > > 
> > > > > > Regards,
> > > > > > ojaswin
> > > > > > 
> > > > > 
> > > 
> > 
> > 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Darrick J. Wong 3 months, 3 weeks ago
On Sat, Oct 18, 2025 at 02:47:24AM +0800, Zorro Lang wrote:
> On Fri, Oct 17, 2025 at 09:27:57AM -0700, Darrick J. Wong wrote:
> > On Sat, Oct 18, 2025 at 12:01:22AM +0800, Zorro Lang wrote:
> > > On Tue, Oct 07, 2025 at 03:28:46PM +0530, Ojaswin Mujoo wrote:
> > > > On Mon, Oct 06, 2025 at 06:50:03PM +0530, Ojaswin Mujoo wrote:
> > > > > On Sun, Oct 05, 2025 at 11:39:56PM +0800, Zorro Lang wrote:
> > > > > > On Sun, Oct 05, 2025 at 06:27:24PM +0530, Ojaswin Mujoo wrote:
> > > > > > > On Sat, Oct 04, 2025 at 01:19:32AM +0800, Zorro Lang wrote:
> > > > > > > > On Thu, Oct 02, 2025 at 11:26:45PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > > On Sun, Sep 28, 2025 at 09:19:24PM +0800, Zorro Lang wrote:
> > > > > > > > > > On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> > > > > > > > > > > Implement atomic write support to help fuzz atomic writes
> > > > > > > > > > > with fsx.
> > > > > > > > > > > 
> > > > > > > > > > > Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> > > > > > > > > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > > > > > > > > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > > > > > > > > > > Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> > > > > > > > > > > ---
> > > > > > > > > > 
> > > > > > > > > > Hmm... this patch causes more regular fsx test cases fail on old kernel,
> > > > > > > > > > (e.g. g/760, g/617, g/263 ...) except set "FSX_AVOID=-a". Is there a way
> > > > > > > > > > to disable "atomic write" automatically if it's not supported by current
> > > > > > > > > > system?
> > > > > > > > > 
> > > > > > > > > Hi Zorro, 
> > > > > > > > > Sorry for being late, I've been on vacation this week.
> > > > > > > > > 
> > > > > > > > > Yes so by design we should be automatically disabling atomic writes when
> > > > > > > > > they are not supported by the stack but seems like the issue is that
> > > > > > > > > when we do disable it we print some extra messages to stdout/err which
> > > > > > > > > show up in the xfstests output causing failure.
> > > > > > > > > 
> > > > > > > > > I can think of 2 ways around this:
> > > > > > > > > 
> > > > > > > > > 1. Don't print anything and just silently drop atomic writes if stack
> > > > > > > > > doesn't support them.
> > > > > > > > > 
> > > > > > > > > 2. Make atomic writes as a default off instead of default on feature but
> > > > > > > > > his loses a bit of coverage as existing tests wont get atomic write
> > > > > > > > > testing free of cost any more.
> > > > > > > > 
> > > > > > > > Hi Ojaswin,
> > > > > > > > 
> > > > > > > > Please have a nice vacation :)
> > > > > > > > 
> > > > > > > > It's not the "extra messages" cause failure, those "quiet" failures can be fixed
> > > > > > > > by:
> > > > > > > 
> > > > > > > Oh okay got it.
> > > > > > > 
> > > > > > > > 
> > > > > > > > diff --git a/ltp/fsx.c b/ltp/fsx.c
> > > > > > > > index bdb87ca90..0a035b37b 100644
> > > > > > > > --- a/ltp/fsx.c
> > > > > > > > +++ b/ltp/fsx.c
> > > > > > > > @@ -1847,8 +1847,9 @@ int test_atomic_writes(void) {
> > > > > > > >         struct statx stx;
> > > > > > > >  
> > > > > > > >         if (o_direct != O_DIRECT) {
> > > > > > > > -               fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > > > -                               "disabling!\n");
> > > > > > > > +               if (!quiet)
> > > > > > > > +                       fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> > > > > > > > +                                       "disabling!\n");
> > > > > > > >                 return 0;
> > > > > > > >         }
> > > > > > > >  
> > > > > > > > @@ -1867,8 +1868,9 @@ int test_atomic_writes(void) {
> > > > > > > >                 return 1;
> > > > > > > >         }
> > > > > > > >  
> > > > > > > > -       fprintf(stderr, "main: IO Stack does not support "
> > > > > > > > -                       "atomic writes, disabling!\n");
> > > > > > > > +       if (!quiet)
> > > > > > > > +               fprintf(stderr, "main: IO Stack does not support "
> > > > > > > > +                               "atomic writes, disabling!\n");
> > > > > > > >         return 0;
> > > > > > > >  }
> > > > > > > 
> > > > > > > > 
> > > > > > > > But I hit more read or write failures e.g. [1], this failure can't be
> > > > > > > > reproduced with FSX_AVOID=-a. Is it a atomic write bug or an unexpected
> > > > > > > > test failure?
> > > > > > > > 
> > > > > > > > Thanks,
> > > > > > > > Zorro
> > > > > > > > 
> > > > > > > 
> > > > > > > <...>
> > > > > > > 
> > > > > > > > +244(244 mod 256): SKIPPED (no operation)
> > > > > > > > +245(245 mod 256): FALLOC   0x695c5 thru 0x6a2e6	(0xd21 bytes) INTERIOR
> > > > > > > > +246(246 mod 256): MAPWRITE 0x5ac00 thru 0x5b185	(0x586 bytes)
> > > > > > > > +247(247 mod 256): WRITE    0x31200 thru 0x313ff	(0x200 bytes)
> > > > > > > > +248(248 mod 256): SKIPPED (no operation)
> > > > > > > > +249(249 mod 256): TRUNCATE DOWN	from 0x78242 to 0xf200	******WWWW
> > > > > > > > +250(250 mod 256): FALLOC   0x65000 thru 0x66f26	(0x1f26 bytes) PAST_EOF
> > > > > > > > +251(251 mod 256): WRITE    0x45400 thru 0x467ff	(0x1400 bytes) HOLE	***WWWW
> > > > > > > > +252(252 mod 256): SKIPPED (no operation)
> > > > > > > > +253(253 mod 256): SKIPPED (no operation)
> > > > > > > > +254(254 mod 256): MAPWRITE 0x4be00 thru 0x4daee	(0x1cef bytes)
> > > > > > > > +255(255 mod 256): MAPREAD  0xc000 thru 0xcae9	(0xaea bytes)
> > > > > > > > +256(  0 mod 256): READ     0x3e000 thru 0x3efff	(0x1000 bytes)
> > > > > > > > +257(  1 mod 256): SKIPPED (no operation)
> > > > > > > > +258(  2 mod 256): INSERT 0x45000 thru 0x45fff	(0x1000 bytes)
> > > > > > > > +259(  3 mod 256): ZERO     0x1d7d5 thru 0x1f399	(0x1bc5 bytes)	******ZZZZ
> > > > > > > > +260(  4 mod 256): TRUNCATE DOWN	from 0x4eaef to 0x11200	******WWWW
> > > > > > > > +261(  5 mod 256): WRITE    0x43000 thru 0x43fff	(0x1000 bytes) HOLE	***WWWW
> > > > > > > > +262(  6 mod 256): WRITE    0x2200 thru 0x31ff	(0x1000 bytes)
> > > > > > > > +263(  7 mod 256): WRITE    0x15000 thru 0x15fff	(0x1000 bytes)
> > > > > > > > +264(  8 mod 256): WRITE    0x2e400 thru 0x2e7ff	(0x400 bytes)
> > > > > > > > +265(  9 mod 256): COPY 0xd000 thru 0xdfff	(0x1000 bytes) to 0x1d800 thru 0x1e7ff	******EEEE
> > > > > > > > +266( 10 mod 256): CLONE 0x2a000 thru 0x2afff	(0x1000 bytes) to 0x21000 thru 0x21fff
> > > > > > > > +267( 11 mod 256): MAPREAD  0x31000 thru 0x31d0a	(0xd0b bytes)
> > > > > > > > +268( 12 mod 256): SKIPPED (no operation)
> > > > > > > > +269( 13 mod 256): WRITE    0x25000 thru 0x25fff	(0x1000 bytes)
> > > > > > > > +270( 14 mod 256): SKIPPED (no operation)
> > > > > > > > +271( 15 mod 256): MAPREAD  0x30000 thru 0x30577	(0x578 bytes)
> > > > > > > > +272( 16 mod 256): PUNCH    0x1a267 thru 0x1c093	(0x1e2d bytes)
> > > > > > > > +273( 17 mod 256): MAPREAD  0x1f000 thru 0x1f9c9	(0x9ca bytes)
> > > > > > > > +274( 18 mod 256): WRITE    0x40800 thru 0x40dff	(0x600 bytes)
> > > > > > > > +275( 19 mod 256): SKIPPED (no operation)
> > > > > > > > +276( 20 mod 256): MAPWRITE 0x20600 thru 0x22115	(0x1b16 bytes)
> > > > > > > > +277( 21 mod 256): MAPWRITE 0x3d000 thru 0x3ee5a	(0x1e5b bytes)
> > > > > > > > +278( 22 mod 256): WRITE    0x2ee00 thru 0x2efff	(0x200 bytes)
> > > > > > > > +279( 23 mod 256): WRITE    0x76200 thru 0x769ff	(0x800 bytes) HOLE
> > > > > > > > +280( 24 mod 256): SKIPPED (no operation)
> > > > > > > > +281( 25 mod 256): SKIPPED (no operation)
> > > > > > > > +282( 26 mod 256): MAPREAD  0xa000 thru 0xa5e7	(0x5e8 bytes)
> > > > > > > > +283( 27 mod 256): SKIPPED (no operation)
> > > > > > > > +284( 28 mod 256): SKIPPED (no operation)
> > > > > > > > +285( 29 mod 256): SKIPPED (no operation)
> > > > > > > > +286( 30 mod 256): SKIPPED (no operation)
> > > > > > > > +287( 31 mod 256): COLLAPSE 0x11000 thru 0x11fff	(0x1000 bytes)
> > > > > > > > +288( 32 mod 256): COPY 0x5d000 thru 0x5dfff	(0x1000 bytes) to 0x4ca00 thru 0x4d9ff
> > > > > > > > +289( 33 mod 256): TRUNCATE DOWN	from 0x75a00 to 0x1e400
> > > > > > > > +290( 34 mod 256): MAPREAD  0x1c000 thru 0x1d802	(0x1803 bytes)	***RRRR***
> > > > > > > > +Log of operations saved to "/mnt/xfstests/test/junk.fsxops"; replay with --replay-ops
> > > > > > > > +Correct content saved for comparison
> > > > > > > > +(maybe hexdump "/mnt/xfstests/test/junk" vs "/mnt/xfstests/test/junk.fsxgood")
> > > > > > > > 
> > > > > > > > Thanks,
> > > > > > > > Zorro
> > > > > > > 
> > > > > > > Hi Zorro, just to confirm is this on an older kernel that doesnt support
> > > > > > > RWF_ATOMIC or on a kernle that does support it.
> > > > > > 
> > > > > > I tested on linux 6.16 and current latest linux v6.17+ (will be 6.18-rc1 later).
> > > > > > About the RWF_ATOMIC flag in my system:
> > > > > > 
> > > > > > # grep -rsn RWF_ATOMIC /usr/include/
> > > > > > /usr/include/bits/uio-ext.h:51:#define RWF_ATOMIC       0x00000040 /* Write is to be issued with torn-write
> > > > > > /usr/include/linux/fs.h:424:#define RWF_ATOMIC  ((__kernel_rwf_t)0x00000040)
> > > > > > /usr/include/linux/fs.h:431:                     RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
> > > > > > /usr/include/xfs/linux.h:236:#ifndef RWF_ATOMIC
> > > > > > /usr/include/xfs/linux.h:237:#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
> > > > > 
> > > > > Hi Zorro, thanks for checking this. So correct me if im wrong but I
> > > > > understand that you have run this test on an atomic writes enabled 
> > > > > kernel where the stack also supports atomic writes.
> > > > > 
> > > > > Looking at the bad data log:
> > > > > 
> > > > > 	+READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/xfstests/test/junk
> > > > > 	+OFFSET      GOOD    BAD     RANGE
> > > > > 	+0x1c000     0x0000  0xcdcd  0x0
> > > > > 	+operation# (mod 256) for the bad data may be 205
> > > > > 
> > > > > We see that 0x0000 was expected but we got 0xcdcd. Now the operation
> > > > > that caused this is indicated to be 205, but looking at that operation:
> > > > > 
> > > > > +205(205 mod 256): ZERO     0x6dbe6 thru 0x6e6aa	(0xac5 bytes)
> > > > > 
> > > > > This doesn't even overlap the range that is bad. (0x1c000 to 0x1c00f).
> > > > > Infact, it does seem like an unlikely coincidence that the actual data
> > > > > in the bad range is 0xcdcd which is something xfs_io -c "pwrite" writes
> > > > > to default (fsx writes random data in even offsets and operation num in
> > > > > odd).
> > > > > 
> > > > > I am able to replicate this but only on XFS but not on ext4 (atleast not
> > > > > in 20 runs).  I'm trying to better understand if this is a test issue or
> > > > > not. Will keep you update.
> > > > > 
> > > > > I'm not sure how this will affect the upcoming release, if you want
> > > > > shall I send a small patch to make the atomic writes feature default off
> > > > > instead of default on till we root cause this?
> > > > > 
> > > > > Regards,
> > > > > Ojaswin
> > > > 
> > > > Hi Zorro,
> > > > 
> > > > So I'm able to narrow down the opoerations and replicate it via the
> > > > following replay file:
> > > > 
> > > > # -----
> > > > # replay.fsxops
> > > > # -----
> > > > write_atomic 0x57000 0x1000 0x69690
> > > > write_atomic 0x66000 0x1000 0x4de00
> > > > write_atomic 0x18000 0x1000 0x2c800
> > > > copy_range 0x20000 0x1000 0xe00 0x70e00
> > > > write_atomic 0x18000 0x1000 0x70e00
> > > > copy_range 0x21000 0x1000 0x23000 0x74218
> > > > truncate 0x0 0x11200 0x4daef *
> > > > write_atomic 0x43000 0x1000 0x11200 *
> > > > write_atomic 0x15000 0x1000 0x44000
> > > > copy_range 0xd000 0x1000 0x1d800 0x44000
> > > > mapread 0x1c000 0x1803 0x1e400 *
> > > > 
> > > > 
> > > > Command: ./ltp/fsx -N 10000 -o 8192 -l 500000 -r 4096 -t 512 -w 512 -Z -FKuHzI --replay-ops replay.fsxops $MNT/junk
> > > > 
> > > > $MNT/junk is always opened O_TRUNC and is an on an XFS FS where the
> > > > disk is non-atomic so all RWF_ATOMIC writes are software emulated.
> > > > 
> > > > Here are the logs generated for this run:
> > > > 
> > > > Seed set to 1
> > > > main: filesystem does not support exchange range, disabling!
> > > > 
> > > > READ BAD DATA: offset = 0x1c000, size = 0x1803, fname = /mnt/test/junk
> > > > OFFSET      GOOD    BAD     RANGE
> > > > 0x1d000     0x0000  0xf322  0x0
> > > > operation# (mod 256) for the bad data may be 243
> > > > 0x1d001     0x0000  0x22f3  0x1
> > > > operation# (mod 256) for the bad data may be 243
> > > > 0x1d002     0x0000  0xf391  0x2
> > > > operation# (mod 256) for the bad data may be 243
> > > > 0x1d003     0x0000  0x91f3  0x3
> > > > <... a few more such lines ..>
> > > > 
> > > > LOG DUMP (11 total operations):
> > > > openat(AT_FDCWD, "/mnt/test/junk.fsxops", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
> > > > 1(  1 mod 256): WRITE    0x57000 thru 0x57fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > > > 2(  2 mod 256): WRITE    0x66000 thru 0x66fff   (0x1000 bytes) HOLE ATOMIC
> > > > 3(  3 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > > > 4(  4 mod 256): COPY 0x20000 thru 0x20fff       (0x1000 bytes) to 0xe00 thru 0x1dff
> > > > 5(  5 mod 256): WRITE    0x18000 thru 0x18fff   (0x1000 bytes) ATOMIC
> > > > 6(  6 mod 256): COPY 0x21000 thru 0x21fff       (0x1000 bytes) to 0x23000 thru 0x23fff
> > > > 7(  7 mod 256): TRUNCATE DOWN   from 0x67000 to 0x11200 ******WWWW
> > > > 8(  8 mod 256): WRITE    0x43000 thru 0x43fff   (0x1000 bytes) HOLE     ***WWWW ATOMIC
> > > > 9(  9 mod 256): WRITE    0x15000 thru 0x15fff   (0x1000 bytes) ATOMIC
> > > > 10( 10 mod 256): COPY 0xd000 thru 0xdfff        (0x1000 bytes) to 0x1d800 thru 0x1e7ff
> > > > 11( 11 mod 256): MAPREAD  0x1c000 thru 0x1d802  (0x1803 bytes)  ***RRRR***
> > > > Log of operations saved to "/mnt/test/junk.fsxops"; replay with --replay-ops
> > > > Correct content saved for comparison
> > > > (maybe hexdump "/mnt/test/junk" vs "/mnt/test/junk.fsxgood")
> > > > +++ exited with 110 +++
> > > > 
> > > > We can see that the bad data is detected in the final MAPREAD operation
> > > > and and bad offset is at 0x1d000. If we look at the operations dump
> > > > above its clear that none of the operations should be modifying the
> > > > 0x1d000 so we should have been reading 0s but yet we see some junk data
> > > > there in the file:
> > > > 
> > > > $ hexdump /mnt/test/junk -s 0x1c000 -n0x1020
> > > > 001c000 0000 0000 0000 0000 0000 0000 0000 0000
> > > > *
> > > > 001d000 22f3 91f3 7ff3 3af3 39f3 23f3 6df3 c2f3
> > > > 001d010 c5f3 f6f3 a6f3 1ef3 58f3 40f3 32f3 5ff3
> > > > 001d020
> > > > 
> > > > Another thing to not is that I can't reproduce the above on scsi-debug
> > > > device.  @Darrick, @John, could this be an issue in kernel?
> > > 
> > > Hi Ojaswin,
> > > 
> > > If we can be sure this's a kernel bug, rather than a fstests (patch) issue, I think we
> > > can merge this patchset to expose this bug. Does this make sense to you and others?
> > 
> > Looks like a kernel bug to me...
> 
> Thanks Darrick! If I merge this patchset, the fstests users might hit some unexpected
> fsx test failures in their regular regression test. I don't want to make panic, if no
> one mind that, I'll merge it. Or do you want to disable the atomic write fsx test by
> default currently?

Nah, leave it enabled, or else nobody will be testing it.

--D

> Thanks,
> Zorro
> 
> > 
> > --D
> > 
> > > Thanks,
> > > Zorro
> > > 
> > > > 
> > > > Regards,
> > > > ojaswin
> > > > > 
> > > > > > 
> > > > > > Thanks,
> > > > > > Zorro
> > > > > > 
> > > > > > > 
> > > > > > > Regards,
> > > > > > > ojaswin
> > > > > > > 
> > > > > > 
> > > > 
> > > 
> > > 
> > 
> 
>
Re: [PATCH v7 04/12] ltp/fsx.c: Add atomic writes support to fsx
Posted by Zorro Lang 4 months, 1 week ago
On Fri, Sep 19, 2025 at 12:17:57PM +0530, Ojaswin Mujoo wrote:
> Implement atomic write support to help fuzz atomic writes
> with fsx.
> 
> Suggested-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: John Garry <john.g.garry@oracle.com>
> Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
> ---
>  ltp/fsx.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 110 insertions(+), 5 deletions(-)
> 
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 163b9453..bdb87ca9 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -40,6 +40,7 @@
>  #include <liburing.h>
>  #endif
>  #include <sys/syscall.h>
> +#include "statx.h"
>  
>  #ifndef MAP_FILE
>  # define MAP_FILE 0
> @@ -49,6 +50,10 @@
>  #define RWF_DONTCACHE	0x80
>  #endif
>  
> +#ifndef RWF_ATOMIC
> +#define RWF_ATOMIC	0x40
> +#endif
> +
>  #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
>  
>  /* Operation flags (bitmask) */
> @@ -110,6 +115,7 @@ enum {
>  	OP_READ_DONTCACHE,
>  	OP_WRITE,
>  	OP_WRITE_DONTCACHE,
> +	OP_WRITE_ATOMIC,
>  	OP_MAPREAD,
>  	OP_MAPWRITE,
>  	OP_MAX_LITE,
> @@ -200,6 +206,11 @@ int	uring = 0;
>  int	mark_nr = 0;
>  int	dontcache_io = 1;
>  int	hugepages = 0;                  /* -h flag */
> +int	do_atomic_writes = 1;		/* -a flag disables */
> +
> +/* User for atomic writes */
> +int awu_min = 0;
> +int awu_max = 0;
>  
>  /* Stores info needed to periodically collapse hugepages */
>  struct hugepages_collapse_info {
> @@ -288,6 +299,7 @@ static const char *op_names[] = {
>  	[OP_READ_DONTCACHE] = "read_dontcache",
>  	[OP_WRITE] = "write",
>  	[OP_WRITE_DONTCACHE] = "write_dontcache",
> +	[OP_WRITE_ATOMIC] = "write_atomic",
>  	[OP_MAPREAD] = "mapread",
>  	[OP_MAPWRITE] = "mapwrite",
>  	[OP_TRUNCATE] = "truncate",
> @@ -422,6 +434,7 @@ logdump(void)
>  				prt("\t***RRRR***");
>  			break;
>  		case OP_WRITE_DONTCACHE:
> +		case OP_WRITE_ATOMIC:
>  		case OP_WRITE:
>  			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
>  			    lp->args[0], lp->args[0] + lp->args[1] - 1,
> @@ -1073,6 +1086,25 @@ update_file_size(unsigned offset, unsigned size)
>  	file_size = offset + size;
>  }
>  
> +static int is_power_of_2(unsigned n) {
> +	return ((n & (n - 1)) == 0);
> +}
> +
> +/*
> + * Round down n to nearest power of 2.
> + * If n is already a power of 2, return n;
> + */
> +static int rounddown_pow_of_2(int n) {
> +	int i = 0;
> +
> +	if (is_power_of_2(n))
> +		return n;
> +
> +	for (; (1 << i) < n; i++);
> +
> +	return 1 << (i - 1);
> +}
> +
>  void
>  dowrite(unsigned offset, unsigned size, int flags)
>  {
> @@ -1081,6 +1113,27 @@ dowrite(unsigned offset, unsigned size, int flags)
>  	offset -= offset % writebdy;
>  	if (o_direct)
>  		size -= size % writebdy;
> +	if (flags & RWF_ATOMIC) {
> +		/* atomic write len must be between awu_min and awu_max */
> +		if (size < awu_min)
> +			size = awu_min;
> +		if (size > awu_max)
> +			size = awu_max;
> +
> +		/* atomic writes need power-of-2 sizes */
> +		size = rounddown_pow_of_2(size);
> +
> +		/* atomic writes need naturally aligned offsets */
> +		offset -= offset % size;
> +
> +		/* Skip the write if we are crossing max filesize */
> +		if ((offset + size) > maxfilelen) {
> +			if (!quiet && testcalls > simulatedopcount)
> +				prt("skipping atomic write past maxfilelen\n");
> +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> +			return;
> +		}
> +	}
>  	if (size == 0) {
>  		if (!quiet && testcalls > simulatedopcount && !o_direct)
>  			prt("skipping zero size write\n");
> @@ -1088,7 +1141,10 @@ dowrite(unsigned offset, unsigned size, int flags)
>  		return;
>  	}
>  
> -	log4(OP_WRITE, offset, size, FL_NONE);
> +	if (flags & RWF_ATOMIC)
> +		log4(OP_WRITE_ATOMIC, offset, size, FL_NONE);
> +	else
> +		log4(OP_WRITE, offset, size, FL_NONE);
>  
>  	gendata(original_buf, good_buf, offset, size);
>  	if (offset + size > file_size) {
> @@ -1108,8 +1164,9 @@ dowrite(unsigned offset, unsigned size, int flags)
>  		       (monitorstart == -1 ||
>  			(offset + size > monitorstart &&
>  			(monitorend == -1 || offset <= monitorend))))))
> -		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d\n", testcalls,
> -		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0);
> +		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\tdontcache=%d atomic_wr=%d\n", testcalls,
> +		    offset, offset + size - 1, size, (flags & RWF_DONTCACHE) != 0,
> +		    (flags & RWF_ATOMIC) != 0);
>  	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
>  	if (iret != size) {
>  		if (iret == -1)
> @@ -1785,6 +1842,36 @@ do_dedupe_range(unsigned offset, unsigned length, unsigned dest)
>  }
>  #endif
>  
> +int test_atomic_writes(void) {
> +	int ret;
> +	struct statx stx;
> +
> +	if (o_direct != O_DIRECT) {
> +		fprintf(stderr, "main: atomic writes need O_DIRECT (-Z), "
> +				"disabling!\n");

This patch causes some fsx test cases fail (e.g. g/127, g/231) as:

--- /dev/fd/63	2025-09-26 17:25:48.413212392 -0400
+++ generic/127.out.bad	2025-09-26 17:25:48.177803644 -0400
@@ -1,7 +1,13 @@
 QA output created by 127
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!
+main: atomic writes need O_DIRECT (-Z), disabling!
 All 100000 operations completed A-OK!

Due to you didn't deal with the "-q" (quiet) option at here. If you don't have
objection, I'll change this part as:

  if (o_direct != O_DIRECT && !quiet)

And ...

> +		return 0;
> +	}
> +
> +	ret = xfstests_statx(AT_FDCWD, fname, 0, STATX_WRITE_ATOMIC, &stx);
> +	if (ret < 0) {
> +		fprintf(stderr, "main: Statx failed with %d."
> +			" Failed to determine atomic write limits, "
> +			" disabling!\n", ret);
> +		return 0;
> +	}
> +
> +	if (stx.stx_attributes & STATX_ATTR_WRITE_ATOMIC &&
> +	    stx.stx_atomic_write_unit_min > 0) {
> +		awu_min = stx.stx_atomic_write_unit_min;
> +		awu_max = stx.stx_atomic_write_unit_max;
> +		return 1;
> +	}
> +
> +	fprintf(stderr, "main: IO Stack does not support "
> +			"atomic writes, disabling!\n");

... change this line as:

if (!quiet)
	fprintf(stderr, "main: IO Stack does not support "
			"atomic writes, disabling!\n");

When I merge it.

Thanks,
Zorro

> +	return 0;
> +}
> +
>  #ifdef HAVE_COPY_FILE_RANGE
>  int
>  test_copy_range(void)
> @@ -2356,6 +2443,12 @@ have_op:
>  			goto out;
>  		}
>  		break;
> +	case OP_WRITE_ATOMIC:
> +		if (!do_atomic_writes) {
> +			log4(OP_WRITE_ATOMIC, offset, size, FL_SKIPPED);
> +			goto out;
> +		}
> +		break;
>  	}
>  
>  	switch (op) {
> @@ -2385,6 +2478,11 @@ have_op:
>  			dowrite(offset, size, 0);
>  		break;
>  
> +	case OP_WRITE_ATOMIC:
> +		TRIM_OFF_LEN(offset, size, maxfilelen);
> +		dowrite(offset, size, RWF_ATOMIC);
> +		break;
> +
>  	case OP_MAPREAD:
>  		TRIM_OFF_LEN(offset, size, file_size);
>  		domapread(offset, size);
> @@ -2511,13 +2609,14 @@ void
>  usage(void)
>  {
>  	fprintf(stdout, "usage: %s",
> -		"fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
> +		"fsx [-adfhknqxyzBEFHIJKLORWXZ0]\n\
>  	   [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
>  	   [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
>  	   [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
>  	   [-A|-U] [-D startingop] [-N numops] [-P dirpath] [-S seed]\n\
>  	   [--replay-ops=opsfile] [--record-ops[=opsfile]] [--duration=seconds]\n\
>  	   ... fname\n\
> +	-a: disable atomic writes\n\
>  	-b opnum: beginning operation number (default 1)\n\
>  	-c P: 1 in P chance of file close+open at each op (default infinity)\n\
>  	-d: debug output for all operations\n\
> @@ -3059,9 +3158,13 @@ main(int argc, char **argv)
>  	setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
>  
>  	while ((ch = getopt_long(argc, argv,
> -				 "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> +				 "0ab:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
>  				 longopts, NULL)) != EOF)
>  		switch (ch) {
> +		case 'a':
> +			prt("main(): Atomic writes disabled\n");
> +			do_atomic_writes = 0;
> +			break;
>  		case 'b':
>  			simulatedopcount = getnum(optarg, &endp);
>  			if (!quiet)
> @@ -3475,6 +3578,8 @@ main(int argc, char **argv)
>  		exchange_range_calls = test_exchange_range();
>  	if (dontcache_io)
>  		dontcache_io = test_dontcache_io();
> +	if (do_atomic_writes)
> +		do_atomic_writes = test_atomic_writes();
>  
>  	while (keep_running())
>  		if (!test())
> -- 
> 2.49.0
>