[PATCH 4/4] LoongArch: Use atomic operation with set_pte and pte_clear function

Bibo Mao posted 4 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH 4/4] LoongArch: Use atomic operation with set_pte and pte_clear function
Posted by Bibo Mao 1 month, 2 weeks ago
For kernel space area on LoongArch system, both two consecutive page
table entries should be enabled with PAGE_GLOBAL bit. So with function
set_pte() and pte_clear(), pte buddy entry is checked and set besides
its own pte entry. However it is not atomic operation to set both two
pte entries, there is problem with test_vmalloc test case.

With previous patch, all page table entries are set with PAGE_GLOBAL
bit at beginning. Only its own pte entry need update with function
set_pte() and pte_clear(), nothing to do with buddy pte entry.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/loongarch/include/asm/pgtable.h | 44 ++++++++++------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 22e3a8f96213..4be3f0dbecda 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -325,40 +325,26 @@ extern void paging_init(void);
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
 	WRITE_ONCE(*ptep, pteval);
+}
 
-	if (pte_val(pteval) & _PAGE_GLOBAL) {
-		pte_t *buddy = ptep_buddy(ptep);
-		/*
-		 * Make sure the buddy is global too (if it's !none,
-		 * it better already be global)
-		 */
-		if (pte_none(ptep_get(buddy))) {
-#ifdef CONFIG_SMP
-			/*
-			 * For SMP, multiple CPUs can race, so we need
-			 * to do this atomically.
-			 */
-			__asm__ __volatile__(
-			__AMOR "$zero, %[global], %[buddy] \n"
-			: [buddy] "+ZB" (buddy->pte)
-			: [global] "r" (_PAGE_GLOBAL)
-			: "memory");
-
-			DBAR(0b11000); /* o_wrw = 0b11000 */
-#else /* !CONFIG_SMP */
-			WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
-#endif /* CONFIG_SMP */
-		}
-	}
+static inline unsigned long __ptep_get_and_clear(pte_t *ptep)
+{
+	return atomic64_fetch_and(_PAGE_GLOBAL, (atomic64_t *)&pte_val(*ptep));
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	/* Preserve global status for the pair */
-	if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL)
-		set_pte(ptep, __pte(_PAGE_GLOBAL));
-	else
-		set_pte(ptep, __pte(0));
+	__ptep_get_and_clear(ptep);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+					unsigned long addr, pte_t *ptep)
+{
+	unsigned long val;
+
+	val = __ptep_get_and_clear(ptep);
+	return __pte(val);
 }
 
 #define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
-- 
2.39.3
Re: [PATCH 4/4] LoongArch: Use atomic operation with set_pte and pte_clear function
Posted by Huacai Chen 1 month, 2 weeks ago
Hi, Bibo,

On Thu, Oct 10, 2024 at 11:50 AM Bibo Mao <maobibo@loongson.cn> wrote:
>
> For kernel space area on LoongArch system, both two consecutive page
> table entries should be enabled with PAGE_GLOBAL bit. So with function
> set_pte() and pte_clear(), pte buddy entry is checked and set besides
> its own pte entry. However it is not atomic operation to set both two
> pte entries, there is problem with test_vmalloc test case.
>
> With previous patch, all page table entries are set with PAGE_GLOBAL
> bit at beginning. Only its own pte entry need update with function
> set_pte() and pte_clear(), nothing to do with buddy pte entry.
>
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  arch/loongarch/include/asm/pgtable.h | 44 ++++++++++------------------
>  1 file changed, 15 insertions(+), 29 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index 22e3a8f96213..4be3f0dbecda 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -325,40 +325,26 @@ extern void paging_init(void);
>  static inline void set_pte(pte_t *ptep, pte_t pteval)
>  {
>         WRITE_ONCE(*ptep, pteval);
> +}
>
> -       if (pte_val(pteval) & _PAGE_GLOBAL) {
> -               pte_t *buddy = ptep_buddy(ptep);
> -               /*
> -                * Make sure the buddy is global too (if it's !none,
> -                * it better already be global)
> -                */
> -               if (pte_none(ptep_get(buddy))) {
> -#ifdef CONFIG_SMP
> -                       /*
> -                        * For SMP, multiple CPUs can race, so we need
> -                        * to do this atomically.
> -                        */
> -                       __asm__ __volatile__(
> -                       __AMOR "$zero, %[global], %[buddy] \n"
> -                       : [buddy] "+ZB" (buddy->pte)
> -                       : [global] "r" (_PAGE_GLOBAL)
> -                       : "memory");
> -
> -                       DBAR(0b11000); /* o_wrw = 0b11000 */
> -#else /* !CONFIG_SMP */
> -                       WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
> -#endif /* CONFIG_SMP */
> -               }
> -       }
> +static inline unsigned long __ptep_get_and_clear(pte_t *ptep)
> +{
> +       return atomic64_fetch_and(_PAGE_GLOBAL, (atomic64_t *)&pte_val(*ptep));
>  }
>
>  static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>  {
> -       /* Preserve global status for the pair */
> -       if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL)
> -               set_pte(ptep, __pte(_PAGE_GLOBAL));
> -       else
> -               set_pte(ptep, __pte(0));
> +       __ptep_get_and_clear(ptep);
With the first patch, a kernel pte always take _PAGE_GLOBAL, so we
don't need an expensive atomic operation, just
"set_pte(pte_val(ptep_get(ptep)) & _PAGE_GLOBAL)" is OK here. And then
we don't need a custom ptep_get_and_clear().


Huacai

> +}
> +
> +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
> +static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
> +                                       unsigned long addr, pte_t *ptep)
> +{
> +       unsigned long val;
> +
> +       val = __ptep_get_and_clear(ptep);
> +       return __pte(val);
>  }
>
>  #define PGD_T_LOG2     (__builtin_ffs(sizeof(pgd_t)) - 1)
> --
> 2.39.3
>
Re: [PATCH 4/4] LoongArch: Use atomic operation with set_pte and pte_clear function
Posted by maobibo 1 month, 2 weeks ago
Huacai,

On 2024/10/12 上午10:16, Huacai Chen wrote:
> Hi, Bibo,
> 
> On Thu, Oct 10, 2024 at 11:50 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>> For kernel space area on LoongArch system, both two consecutive page
>> table entries should be enabled with PAGE_GLOBAL bit. So with function
>> set_pte() and pte_clear(), pte buddy entry is checked and set besides
>> its own pte entry. However it is not atomic operation to set both two
>> pte entries, there is problem with test_vmalloc test case.
>>
>> With previous patch, all page table entries are set with PAGE_GLOBAL
>> bit at beginning. Only its own pte entry need update with function
>> set_pte() and pte_clear(), nothing to do with buddy pte entry.
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>> ---
>>   arch/loongarch/include/asm/pgtable.h | 44 ++++++++++------------------
>>   1 file changed, 15 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
>> index 22e3a8f96213..4be3f0dbecda 100644
>> --- a/arch/loongarch/include/asm/pgtable.h
>> +++ b/arch/loongarch/include/asm/pgtable.h
>> @@ -325,40 +325,26 @@ extern void paging_init(void);
>>   static inline void set_pte(pte_t *ptep, pte_t pteval)
>>   {
>>          WRITE_ONCE(*ptep, pteval);
>> +}
>>
>> -       if (pte_val(pteval) & _PAGE_GLOBAL) {
>> -               pte_t *buddy = ptep_buddy(ptep);
>> -               /*
>> -                * Make sure the buddy is global too (if it's !none,
>> -                * it better already be global)
>> -                */
>> -               if (pte_none(ptep_get(buddy))) {
>> -#ifdef CONFIG_SMP
>> -                       /*
>> -                        * For SMP, multiple CPUs can race, so we need
>> -                        * to do this atomically.
>> -                        */
>> -                       __asm__ __volatile__(
>> -                       __AMOR "$zero, %[global], %[buddy] \n"
>> -                       : [buddy] "+ZB" (buddy->pte)
>> -                       : [global] "r" (_PAGE_GLOBAL)
>> -                       : "memory");
>> -
>> -                       DBAR(0b11000); /* o_wrw = 0b11000 */
>> -#else /* !CONFIG_SMP */
>> -                       WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL));
>> -#endif /* CONFIG_SMP */
>> -               }
>> -       }
>> +static inline unsigned long __ptep_get_and_clear(pte_t *ptep)
>> +{
>> +       return atomic64_fetch_and(_PAGE_GLOBAL, (atomic64_t *)&pte_val(*ptep));
>>   }
>>
>>   static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
>>   {
>> -       /* Preserve global status for the pair */
>> -       if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL)
>> -               set_pte(ptep, __pte(_PAGE_GLOBAL));
>> -       else
>> -               set_pte(ptep, __pte(0));
>> +       __ptep_get_and_clear(ptep);
> With the first patch, a kernel pte always take _PAGE_GLOBAL, so we
> don't need an expensive atomic operation, just
> "set_pte(pte_val(ptep_get(ptep)) & _PAGE_GLOBAL)" is OK here. And then
> we don't need a custom ptep_get_and_clear().
Will use non-atomic method and test again, also will remove customed 
function ptep_get_and_clear().

Regards
Bibo Mao
> 
> 
> Huacai
> 
>> +}
>> +
>> +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
>> +static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
>> +                                       unsigned long addr, pte_t *ptep)
>> +{
>> +       unsigned long val;
>> +
>> +       val = __ptep_get_and_clear(ptep);
>> +       return __pte(val);
>>   }
>>
>>   #define PGD_T_LOG2     (__builtin_ffs(sizeof(pgd_t)) - 1)
>> --
>> 2.39.3
>>