[PATCH] qga: skip bind mounts in fs list

Jean-Louis Dupond posted 1 patch 1 month, 3 weeks ago
qga/commands-linux.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
[PATCH] qga: skip bind mounts in fs list
Posted by Jean-Louis Dupond 1 month, 3 weeks ago
The filesystem list in build_fs_mount_list should skip bind mounts.
This because we end up in locking situations when doing fsFreeze. Like
mentioned in [1] and [2].

Next to that, the build_fs_mount_list call did a fallback via
build_fs_mount_list_from_mtab if mountinfo did not exist.
There it skipped bind mounts, but this is broken for newer OS.
This as mounts does not return the path of the bind mount but the
underlying dev/partition, so S_ISDIR will never return true in
dev_major_minor call.

This patch simply checks the existing devmajor:devminor tuple in the
mounts, and if it already exists, this means we have the same devices
mounted again, a bind mount. So skip this.

Same approach is used in open-vm-tools [3].

[1]: https://gitlab.com/qemu-project/qemu/-/issues/592
[2]: https://gitlab.com/qemu-project/qemu/-/issues/520
[3]: https://github.com/vmware/open-vm-tools/commit/d58847b497e212737007958c945af1df22a8ab58

Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
---
 qga/commands-linux.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/qga/commands-linux.c b/qga/commands-linux.c
index 51d5e3d927..426b040ab8 100644
--- a/qga/commands-linux.c
+++ b/qga/commands-linux.c
@@ -59,6 +59,22 @@ static int dev_major_minor(const char *devpath,
     return -1;
 }
 
+/*
+ * Check if we already have the devmajor:devminor in the mounts
+ * If thats the case return true.
+ */
+static bool dev_exists(FsMountList *mounts, unsigned int devmajor, unsigned int devminor)
+{
+    FsMount *mount;
+
+    QTAILQ_FOREACH(mount, mounts, next) {
+        if (mount->devmajor == devmajor && mount->devminor == devminor) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
 {
     struct mntent *ment;
@@ -89,6 +105,10 @@ static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
             /* Skip bind mounts */
             continue;
         }
+        if (dev_exists(mounts, devmajor, devminor)) {
+            /* Skip already existing devices (bind mounts) */
+            continue;
+        }
 
         mount = g_new0(FsMount, 1);
         mount->dirname = g_strdup(ment->mnt_dir);
@@ -172,6 +192,11 @@ bool build_fs_mount_list(FsMountList *mounts, Error **errp)
             }
         }
 
+        if (dev_exists(mounts, devmajor, devminor)) {
+            /* Skip already existing devices (bind mounts) */
+            continue;
+        }
+
         mount = g_new0(FsMount, 1);
         mount->dirname = g_strdup(line + dir_s);
         mount->devtype = g_strdup(dash + type_s);
-- 
2.46.2
Re: [PATCH] qga: skip bind mounts in fs list
Posted by Jean-Louis Dupond 1 month, 2 weeks ago
On 2/10/2024 12:06, Jean-Louis Dupond wrote:
> The filesystem list in build_fs_mount_list should skip bind mounts.
> This because we end up in locking situations when doing fsFreeze. Like
> mentioned in [1] and [2].
>
> Next to that, the build_fs_mount_list call did a fallback via
> build_fs_mount_list_from_mtab if mountinfo did not exist.
> There it skipped bind mounts, but this is broken for newer OS.
> This as mounts does not return the path of the bind mount but the
> underlying dev/partition, so S_ISDIR will never return true in
> dev_major_minor call.
>
> This patch simply checks the existing devmajor:devminor tuple in the
> mounts, and if it already exists, this means we have the same devices
> mounted again, a bind mount. So skip this.
>
> Same approach is used in open-vm-tools [3].
>
> [1]: https://gitlab.com/qemu-project/qemu/-/issues/592
> [2]: https://gitlab.com/qemu-project/qemu/-/issues/520
> [3]: https://github.com/vmware/open-vm-tools/commit/d58847b497e212737007958c945af1df22a8ab58
>
> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
> ---
>   qga/commands-linux.c | 25 +++++++++++++++++++++++++
>   1 file changed, 25 insertions(+)
>
> diff --git a/qga/commands-linux.c b/qga/commands-linux.c
> index 51d5e3d927..426b040ab8 100644
> --- a/qga/commands-linux.c
> +++ b/qga/commands-linux.c
> @@ -59,6 +59,22 @@ static int dev_major_minor(const char *devpath,
>       return -1;
>   }
>   
> +/*
> + * Check if we already have the devmajor:devminor in the mounts
> + * If thats the case return true.
> + */
> +static bool dev_exists(FsMountList *mounts, unsigned int devmajor, unsigned int devminor)
> +{
> +    FsMount *mount;
> +
> +    QTAILQ_FOREACH(mount, mounts, next) {
> +        if (mount->devmajor == devmajor && mount->devminor == devminor) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
>   static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
>   {
>       struct mntent *ment;
> @@ -89,6 +105,10 @@ static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
>               /* Skip bind mounts */
>               continue;
>           }
> +        if (dev_exists(mounts, devmajor, devminor)) {
> +            /* Skip already existing devices (bind mounts) */
> +            continue;
> +        }
>   
>           mount = g_new0(FsMount, 1);
>           mount->dirname = g_strdup(ment->mnt_dir);
> @@ -172,6 +192,11 @@ bool build_fs_mount_list(FsMountList *mounts, Error **errp)
>               }
>           }
>   
> +        if (dev_exists(mounts, devmajor, devminor)) {
> +            /* Skip already existing devices (bind mounts) */
> +            continue;
> +        }
> +
>           mount = g_new0(FsMount, 1);
>           mount->dirname = g_strdup(line + dir_s);
>           mount->devtype = g_strdup(dash + type_s);


Ping + add kkostiuk@redhat.com as I missed him in the initial mail.
Re: [PATCH] qga: skip bind mounts in fs list
Posted by Jean-Louis Dupond 4 weeks, 1 day ago
On 9/10/2024 10:34, Jean-Louis Dupond wrote:
> On 2/10/2024 12:06, Jean-Louis Dupond wrote:
>> The filesystem list in build_fs_mount_list should skip bind mounts.
>> This because we end up in locking situations when doing fsFreeze. Like
>> mentioned in [1] and [2].
>>
>> Next to that, the build_fs_mount_list call did a fallback via
>> build_fs_mount_list_from_mtab if mountinfo did not exist.
>> There it skipped bind mounts, but this is broken for newer OS.
>> This as mounts does not return the path of the bind mount but the
>> underlying dev/partition, so S_ISDIR will never return true in
>> dev_major_minor call.
>>
>> This patch simply checks the existing devmajor:devminor tuple in the
>> mounts, and if it already exists, this means we have the same devices
>> mounted again, a bind mount. So skip this.
>>
>> Same approach is used in open-vm-tools [3].
>>
>> [1]: https://gitlab.com/qemu-project/qemu/-/issues/592
>> [2]: https://gitlab.com/qemu-project/qemu/-/issues/520
>> [3]: 
>> https://github.com/vmware/open-vm-tools/commit/d58847b497e212737007958c945af1df22a8ab58
>>
>> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
>> ---
>>   qga/commands-linux.c | 25 +++++++++++++++++++++++++
>>   1 file changed, 25 insertions(+)
>>
>> diff --git a/qga/commands-linux.c b/qga/commands-linux.c
>> index 51d5e3d927..426b040ab8 100644
>> --- a/qga/commands-linux.c
>> +++ b/qga/commands-linux.c
>> @@ -59,6 +59,22 @@ static int dev_major_minor(const char *devpath,
>>       return -1;
>>   }
>>   +/*
>> + * Check if we already have the devmajor:devminor in the mounts
>> + * If thats the case return true.
>> + */
>> +static bool dev_exists(FsMountList *mounts, unsigned int devmajor, 
>> unsigned int devminor)
>> +{
>> +    FsMount *mount;
>> +
>> +    QTAILQ_FOREACH(mount, mounts, next) {
>> +        if (mount->devmajor == devmajor && mount->devminor == 
>> devminor) {
>> +            return true;
>> +        }
>> +    }
>> +    return false;
>> +}
>> +
>>   static bool build_fs_mount_list_from_mtab(FsMountList *mounts, 
>> Error **errp)
>>   {
>>       struct mntent *ment;
>> @@ -89,6 +105,10 @@ static bool 
>> build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
>>               /* Skip bind mounts */
>>               continue;
>>           }
>> +        if (dev_exists(mounts, devmajor, devminor)) {
>> +            /* Skip already existing devices (bind mounts) */
>> +            continue;
>> +        }
>>             mount = g_new0(FsMount, 1);
>>           mount->dirname = g_strdup(ment->mnt_dir);
>> @@ -172,6 +192,11 @@ bool build_fs_mount_list(FsMountList *mounts, 
>> Error **errp)
>>               }
>>           }
>>   +        if (dev_exists(mounts, devmajor, devminor)) {
>> +            /* Skip already existing devices (bind mounts) */
>> +            continue;
>> +        }
>> +
>>           mount = g_new0(FsMount, 1);
>>           mount->dirname = g_strdup(line + dir_s);
>>           mount->devtype = g_strdup(dash + type_s);
>
>
> Ping + add kkostiuk@redhat.com as I missed him in the initial mail.
>

Any chance on a review or getting it merged?
Think it's a good (of course ;)) improvement.


Re: [PATCH] qga: skip bind mounts in fs list
Posted by Konstantin Kostiuk 3 weeks, 5 days ago
Hi Jean-Louis,

Thanks for your patch. I hope next week, I will test and review this patch.

Just a question, did you have a chance to test that this patch fix kernel
crash?

Best Regards,
Konstantin Kostiuk.


On Fri, Oct 25, 2024 at 1:06 PM Jean-Louis Dupond <jean-louis@dupond.be>
wrote:

> On 9/10/2024 10:34, Jean-Louis Dupond wrote:
> > On 2/10/2024 12:06, Jean-Louis Dupond wrote:
> >> The filesystem list in build_fs_mount_list should skip bind mounts.
> >> This because we end up in locking situations when doing fsFreeze. Like
> >> mentioned in [1] and [2].
> >>
> >> Next to that, the build_fs_mount_list call did a fallback via
> >> build_fs_mount_list_from_mtab if mountinfo did not exist.
> >> There it skipped bind mounts, but this is broken for newer OS.
> >> This as mounts does not return the path of the bind mount but the
> >> underlying dev/partition, so S_ISDIR will never return true in
> >> dev_major_minor call.
> >>
> >> This patch simply checks the existing devmajor:devminor tuple in the
> >> mounts, and if it already exists, this means we have the same devices
> >> mounted again, a bind mount. So skip this.
> >>
> >> Same approach is used in open-vm-tools [3].
> >>
> >> [1]: https://gitlab.com/qemu-project/qemu/-/issues/592
> >> [2]: https://gitlab.com/qemu-project/qemu/-/issues/520
> >> [3]:
> >>
> https://github.com/vmware/open-vm-tools/commit/d58847b497e212737007958c945af1df22a8ab58
> >>
> >> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
> >> ---
> >>   qga/commands-linux.c | 25 +++++++++++++++++++++++++
> >>   1 file changed, 25 insertions(+)
> >>
> >> diff --git a/qga/commands-linux.c b/qga/commands-linux.c
> >> index 51d5e3d927..426b040ab8 100644
> >> --- a/qga/commands-linux.c
> >> +++ b/qga/commands-linux.c
> >> @@ -59,6 +59,22 @@ static int dev_major_minor(const char *devpath,
> >>       return -1;
> >>   }
> >>   +/*
> >> + * Check if we already have the devmajor:devminor in the mounts
> >> + * If thats the case return true.
> >> + */
> >> +static bool dev_exists(FsMountList *mounts, unsigned int devmajor,
> >> unsigned int devminor)
> >> +{
> >> +    FsMount *mount;
> >> +
> >> +    QTAILQ_FOREACH(mount, mounts, next) {
> >> +        if (mount->devmajor == devmajor && mount->devminor ==
> >> devminor) {
> >> +            return true;
> >> +        }
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>   static bool build_fs_mount_list_from_mtab(FsMountList *mounts,
> >> Error **errp)
> >>   {
> >>       struct mntent *ment;
> >> @@ -89,6 +105,10 @@ static bool
> >> build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
> >>               /* Skip bind mounts */
> >>               continue;
> >>           }
> >> +        if (dev_exists(mounts, devmajor, devminor)) {
> >> +            /* Skip already existing devices (bind mounts) */
> >> +            continue;
> >> +        }
> >>             mount = g_new0(FsMount, 1);
> >>           mount->dirname = g_strdup(ment->mnt_dir);
> >> @@ -172,6 +192,11 @@ bool build_fs_mount_list(FsMountList *mounts,
> >> Error **errp)
> >>               }
> >>           }
> >>   +        if (dev_exists(mounts, devmajor, devminor)) {
> >> +            /* Skip already existing devices (bind mounts) */
> >> +            continue;
> >> +        }
> >> +
> >>           mount = g_new0(FsMount, 1);
> >>           mount->dirname = g_strdup(line + dir_s);
> >>           mount->devtype = g_strdup(dash + type_s);
> >
> >
> > Ping + add kkostiuk@redhat.com as I missed him in the initial mail.
> >
>
> Any chance on a review or getting it merged?
> Think it's a good (of course ;)) improvement.
>
>
Re: [PATCH] qga: skip bind mounts in fs list
Posted by Jean-Louis Dupond 3 weeks, 5 days ago
Hi Konstantin,

Thanks for your response.
What I observed was when running CloudLinux is that with a /tmp on a 
loop device, is that the underlying fs was first freezed, and then the 
/tmp was getting a freeze call.
But this was hanging, because it couldn't freeze as the underlying fs 
was already freezed.

So the whole system became unresponsive expect if you send a unfreeze 
via sysrq.

Build a qemu-ga with this patch included, and then it worked fine.
Because it skips the bind mounts and therefor made sure that the loop 
device was first freezed and afterwards the underlying fs.
Which works fine :)

I did not see real kernel crashes, so that was not debugged/tested.

Thanks
Jean-Louis

On 28/10/2024 11:57, Konstantin Kostiuk wrote:
> Hi Jean-Louis,
>
> Thanks for your patch. I hope next week, I will test and review this 
> patch.
>
> Just a question, did you have a chance to test that this patch fix 
> kernel crash?
>
> Best Regards,
> Konstantin Kostiuk.
>
>
> On Fri, Oct 25, 2024 at 1:06 PM Jean-Louis Dupond 
> <jean-louis@dupond.be> wrote:
>
>     On 9/10/2024 10:34, Jean-Louis Dupond wrote:
>     > On 2/10/2024 12:06, Jean-Louis Dupond wrote:
>     >> The filesystem list in build_fs_mount_list should skip bind mounts.
>     >> This because we end up in locking situations when doing
>     fsFreeze. Like
>     >> mentioned in [1] and [2].
>     >>
>     >> Next to that, the build_fs_mount_list call did a fallback via
>     >> build_fs_mount_list_from_mtab if mountinfo did not exist.
>     >> There it skipped bind mounts, but this is broken for newer OS.
>     >> This as mounts does not return the path of the bind mount but the
>     >> underlying dev/partition, so S_ISDIR will never return true in
>     >> dev_major_minor call.
>     >>
>     >> This patch simply checks the existing devmajor:devminor tuple
>     in the
>     >> mounts, and if it already exists, this means we have the same
>     devices
>     >> mounted again, a bind mount. So skip this.
>     >>
>     >> Same approach is used in open-vm-tools [3].
>     >>
>     >> [1]: https://gitlab.com/qemu-project/qemu/-/issues/592
>     >> [2]: https://gitlab.com/qemu-project/qemu/-/issues/520
>     >> [3]:
>     >>
>     https://github.com/vmware/open-vm-tools/commit/d58847b497e212737007958c945af1df22a8ab58
>     >>
>     >> Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
>     >> ---
>     >>   qga/commands-linux.c | 25 +++++++++++++++++++++++++
>     >>   1 file changed, 25 insertions(+)
>     >>
>     >> diff --git a/qga/commands-linux.c b/qga/commands-linux.c
>     >> index 51d5e3d927..426b040ab8 100644
>     >> --- a/qga/commands-linux.c
>     >> +++ b/qga/commands-linux.c
>     >> @@ -59,6 +59,22 @@ static int dev_major_minor(const char *devpath,
>     >>       return -1;
>     >>   }
>     >>   +/*
>     >> + * Check if we already have the devmajor:devminor in the mounts
>     >> + * If thats the case return true.
>     >> + */
>     >> +static bool dev_exists(FsMountList *mounts, unsigned int
>     devmajor,
>     >> unsigned int devminor)
>     >> +{
>     >> +    FsMount *mount;
>     >> +
>     >> +    QTAILQ_FOREACH(mount, mounts, next) {
>     >> +        if (mount->devmajor == devmajor && mount->devminor ==
>     >> devminor) {
>     >> +            return true;
>     >> +        }
>     >> +    }
>     >> +    return false;
>     >> +}
>     >> +
>     >>   static bool build_fs_mount_list_from_mtab(FsMountList *mounts,
>     >> Error **errp)
>     >>   {
>     >>       struct mntent *ment;
>     >> @@ -89,6 +105,10 @@ static bool
>     >> build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
>     >>               /* Skip bind mounts */
>     >>               continue;
>     >>           }
>     >> +        if (dev_exists(mounts, devmajor, devminor)) {
>     >> +            /* Skip already existing devices (bind mounts) */
>     >> +            continue;
>     >> +        }
>     >>             mount = g_new0(FsMount, 1);
>     >>           mount->dirname = g_strdup(ment->mnt_dir);
>     >> @@ -172,6 +192,11 @@ bool build_fs_mount_list(FsMountList *mounts,
>     >> Error **errp)
>     >>               }
>     >>           }
>     >>   +        if (dev_exists(mounts, devmajor, devminor)) {
>     >> +            /* Skip already existing devices (bind mounts) */
>     >> +            continue;
>     >> +        }
>     >> +
>     >>           mount = g_new0(FsMount, 1);
>     >>           mount->dirname = g_strdup(line + dir_s);
>     >>           mount->devtype = g_strdup(dash + type_s);
>     >
>     >
>     > Ping + add kkostiuk@redhat.com as I missed him in the initial mail.
>     >
>
>     Any chance on a review or getting it merged?
>     Think it's a good (of course ;)) improvement.
>