[PATCH 21/22] Implement shmat(2) and shmdt(2)

Karim Taha posted 22 patches 2 years, 5 months ago
Only 21 patches received!
[PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Karim Taha 2 years, 5 months ago
From: Stacey Son <sson@FreeBSD.org>

Signed-off-by: Stacey Son <sson@FreeBSD.org>
Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
---
 bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
 bsd-user/freebsd/os-syscall.c |  8 ++++
 2 files changed, 80 insertions(+)

diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
index 221ad76d8c..f737b94885 100644
--- a/bsd-user/bsd-mem.h
+++ b/bsd-user/bsd-mem.h
@@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd,
     return ret;
 }
 
+/* shmat(2) */
+static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg)
+{
+    abi_ulong raddr;
+    abi_long ret;
+    void *host_raddr;
+    struct shmid_ds shm_info;
+    int i;
+
+    /* Find out the length of the shared memory segment. */
+    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
+    if (is_error(ret)) {
+        /* Can't get the length */
+        return ret;
+    }
+
+    mmap_lock();
+
+    if (shmaddr) {
+        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
+    } else {
+        abi_ulong mmap_start;
+
+        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
+
+        if (mmap_start == -1) {
+            errno = ENOMEM;
+            host_raddr = (void *)-1;
+        } else {
+            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
+                shmflg); /* | SHM_REMAP XXX WHY? */
+        }
+    }
+
+    if (host_raddr == (void *)-1) {
+        mmap_unlock();
+        return get_errno((long)host_raddr);
+    }
+    raddr = h2g((unsigned long)host_raddr);
+
+    page_set_flags(raddr, raddr + shm_info.shm_segsz,
+        PAGE_VALID | PAGE_READ | ((shmflg & SHM_RDONLY) ? 0 : PAGE_WRITE));
+
+    for (i = 0; i < N_BSD_SHM_REGIONS; i++) {
+        if (bsd_shm_regions[i].start == 0) {
+            bsd_shm_regions[i].start = raddr;
+            bsd_shm_regions[i].size = shm_info.shm_segsz;
+            break;
+        }
+    }
+
+    mmap_unlock();
+    return raddr;
+}
+
+/* shmdt(2) */
+static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
+{
+    int i;
+
+    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
+        if (bsd_shm_regions[i].start == shmaddr) {
+            bsd_shm_regions[i].start = 0;
+            page_set_flags(shmaddr,
+                shmaddr + bsd_shm_regions[i].size, 0);
+            break;
+        }
+    }
+
+    return get_errno(shmdt(g2h_untagged(shmaddr)));
+}
+
 #endif /* BSD_USER_BSD_MEM_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 9681c65ce9..f76bc1eb38 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -559,6 +559,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1,
         ret = do_bsd_shmctl(arg1, arg2, arg3);
         break;
 
+    case TARGET_FREEBSD_NR_shmat: /* shmat(2) */
+        ret = do_bsd_shmat(arg1, arg2, arg3);
+        break;
+
+    case TARGET_FREEBSD_NR_shmdt: /* shmdt(2) */
+        ret = do_bsd_shmdt(arg1);
+        break;
+
         /*
          * Misc
          */
-- 
2.40.0
Re: [PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Richard Henderson 2 years, 5 months ago
On 8/19/23 02:48, Karim Taha wrote:
> From: Stacey Son <sson@FreeBSD.org>
> 
> Signed-off-by: Stacey Son <sson@FreeBSD.org>
> Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
> ---
>   bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
>   bsd-user/freebsd/os-syscall.c |  8 ++++
>   2 files changed, 80 insertions(+)
> 
> diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
> index 221ad76d8c..f737b94885 100644
> --- a/bsd-user/bsd-mem.h
> +++ b/bsd-user/bsd-mem.h
> @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long shmid, abi_long cmd,
>       return ret;
>   }
>   
> +/* shmat(2) */
> +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int shmflg)
> +{
> +    abi_ulong raddr;
> +    abi_long ret;
> +    void *host_raddr;
> +    struct shmid_ds shm_info;
> +    int i;
> +
> +    /* Find out the length of the shared memory segment. */
> +    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
> +    if (is_error(ret)) {
> +        /* Can't get the length */
> +        return ret;
> +    }
> +
> +    mmap_lock();
> +
> +    if (shmaddr) {
> +        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);

Missing

     if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
         return -TARGET_EINVAL;
     }

> +    } else {
> +        abi_ulong mmap_start;
> +
> +        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
> +
> +        if (mmap_start == -1) {
> +            errno = ENOMEM;
> +            host_raddr = (void *)-1;
> +        } else {
> +            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
> +                shmflg); /* | SHM_REMAP XXX WHY? */

With reserved_va, the entire guest address space is mapped PROT_NONE so that it is 
reserved, so that the kernel does not use it for something else.  You need the SHM_REMAP 
to replace the reservation mapping.

> +/* shmdt(2) */
> +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
> +{
> +    int i;
> +
> +    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
> +        if (bsd_shm_regions[i].start == shmaddr) {
> +            bsd_shm_regions[i].start = 0;
> +            page_set_flags(shmaddr,
> +                shmaddr + bsd_shm_regions[i].size, 0);
> +            break;
> +        }
> +    }
> +
> +    return get_errno(shmdt(g2h_untagged(shmaddr)));
> +}

Hmm, bug with linux-user as well, because here we should re-establish the reserved_va 
reservation.

Also, we should not be using a fixed sized array.  Nothing good happens when the array 
fills up.


r~
Re: [PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Warner Losh 2 years, 5 months ago
On Sun, Aug 20, 2023 at 9:30 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 8/19/23 02:48, Karim Taha wrote:
> > From: Stacey Son <sson@FreeBSD.org>
> >
> > Signed-off-by: Stacey Son <sson@FreeBSD.org>
> > Signed-off-by: Karim Taha <kariem.taha2.7@gmail.com>
> > ---
> >   bsd-user/bsd-mem.h            | 72 +++++++++++++++++++++++++++++++++++
> >   bsd-user/freebsd/os-syscall.c |  8 ++++
> >   2 files changed, 80 insertions(+)
> >
> > diff --git a/bsd-user/bsd-mem.h b/bsd-user/bsd-mem.h
> > index 221ad76d8c..f737b94885 100644
> > --- a/bsd-user/bsd-mem.h
> > +++ b/bsd-user/bsd-mem.h
> > @@ -335,4 +335,76 @@ static inline abi_long do_bsd_shmctl(abi_long
> shmid, abi_long cmd,
> >       return ret;
> >   }
> >
> > +/* shmat(2) */
> > +static inline abi_long do_bsd_shmat(int shmid, abi_ulong shmaddr, int
> shmflg)
> > +{
> > +    abi_ulong raddr;
> > +    abi_long ret;
> > +    void *host_raddr;
> > +    struct shmid_ds shm_info;
> > +    int i;
> > +
> > +    /* Find out the length of the shared memory segment. */
> > +    ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
> > +    if (is_error(ret)) {
> > +        /* Can't get the length */
> > +        return ret;
> > +    }
> > +
> > +    mmap_lock();
> > +
> > +    if (shmaddr) {
> > +        host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr),
> shmflg);
>
> Missing
>
>      if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
>          return -TARGET_EINVAL;
>      }
>
> > +    } else {
> > +        abi_ulong mmap_start;
> > +
> > +        mmap_start = mmap_find_vma(0, shm_info.shm_segsz);
> > +
> > +        if (mmap_start == -1) {
> > +            errno = ENOMEM;
> > +            host_raddr = (void *)-1;
> > +        } else {
> > +            host_raddr = shmat(shmid, g2h_untagged(mmap_start),
> > +                shmflg); /* | SHM_REMAP XXX WHY? */
>
> With reserved_va, the entire guest address space is mapped PROT_NONE so
> that it is
> reserved, so that the kernel does not use it for something else.  You need
> the SHM_REMAP
> to replace the reservation mapping.
>
> > +/* shmdt(2) */
> > +static inline abi_long do_bsd_shmdt(abi_ulong shmaddr)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < N_BSD_SHM_REGIONS; ++i) {
> > +        if (bsd_shm_regions[i].start == shmaddr) {
> > +            bsd_shm_regions[i].start = 0;
> > +            page_set_flags(shmaddr,
> > +                shmaddr + bsd_shm_regions[i].size, 0);
> > +            break;
> > +        }
> > +    }
> > +
> > +    return get_errno(shmdt(g2h_untagged(shmaddr)));
> > +}
>
> Hmm, bug with linux-user as well, because here we should re-establish the
> reserved_va
> reservation.
>

... of the shared memory region we just detached? Right?


> Also, we should not be using a fixed sized array.  Nothing good happens
> when the array
> fills up.
>

File this as https://github.com/qemu-bsd-user/qemu-bsd-user/issues/47 so we
don't forget.
It's good enough for the moment since the programs we've seen have a very
limited number
of segments... but longer term, it should be dynamic.

Warner
Re: [PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Richard Henderson 2 years, 5 months ago
On 8/22/23 11:03, Warner Losh wrote:
>     Hmm, bug with linux-user as well, because here we should re-establish the reserved_va
>     reservation.
> 
> 
> ... of the shared memory region we just detached? Right?

Correct.

On a related note, on FreeBSD is there any practical difference between

     PROT_NONE, MAP_ANON
and
     PROT_NONE, MAP_GUARD

for large memory regions?

I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid allocation of 
gigabytes.


r~
Re: [PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Warner Losh 2 years, 5 months ago
On Tue, Aug 22, 2023 at 12:11 PM Richard Henderson <
richard.henderson@linaro.org> wrote:

> On 8/22/23 11:03, Warner Losh wrote:
> >     Hmm, bug with linux-user as well, because here we should
> re-establish the reserved_va
> >     reservation.
> >
> >
> > ... of the shared memory region we just detached? Right?
>
> Correct.
>
> On a related note, on FreeBSD is there any practical difference between
>
>      PROT_NONE, MAP_ANON
> and
>      PROT_NONE, MAP_GUARD
>
> for large memory regions?
>

They do different things. MAP_ANON maps the memory without a backing
device. This means it allocates the VA space right away, but lazily
allocates
the backing pages as the pages are dirtied.

MAP_GUARD creates the VA mapping, but never maps any pages to those
pages (well, until it's remapped). Any read/write/exec access to MAP_GUARD
pages results in a SIGSEGV.


> I ask since FreeBSD doesn't have MAP_NORESERVE, which Linux uses to avoid
> allocation of
> gigabytes
>

Yea. It sounds like MAP_NORESERVE is what FreeBSD's default behavior is: We
don't
allocate backing store in the swap areas until there's memory pressure. You
can safely
allocate GB of space with MAP_ANON and get similar behavior to the
MAP_NORESERVE.

MAP_GUARD could be used if you wanted to reserve the VA space, but didn't
want to assign
anything to the VA space until later.

As a practical matter, they both consume about the same resources until the
MAP_ANON
region starts to get populated with data...

With PROT_NONE, I think they would have the same effect. If it is to be a
backing store for
something like malloc, then MAP_ANON would be best. If you are replacing it
with a lot of
things, like a mix of files, devices and/or anon memory, then MAP_GUARD and
replace it
with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the
area, and then
MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc.

Does that tell you what you need to know?

Warner


> r~
>
Re: [PATCH 21/22] Implement shmat(2) and shmdt(2)
Posted by Richard Henderson 2 years, 5 months ago
On 8/22/23 12:54, Warner Losh wrote:
> As a practical matter, they both consume about the same resources until the MAP_ANON
> region starts to get populated with data...
> 
> With PROT_NONE, I think they would have the same effect. If it is to be a backing store for
> something like malloc, then MAP_ANON would be best. If you are replacing it with a lot of
> things, like a mix of files, devices and/or anon memory, then MAP_GUARD and replace it
> with MAP_FIXED later. Most likely you'll want MAP_GUARD to reserve the area, and then
> MAP_FIXED to use it for mmap'd memory, shared memory, executable pages, etc.
> 
> Does that tell you what you need to know?

Yes.

The reserved_va area is replaced with a mix of files, anon, etc, based on whatever the 
guest requires.  So it might be reasonable to adjust bsd-user/mmap.c to use MAP_GUARD for 
managing the reserved_va area instead of MAP_ANON.  No rush, of course.


r~