Considering the pmd entries of a CONT-PMD hugetlb can not span on
multiple PMDs, we can change to use the PMD page lock, which can
be much finer grain that lock in the mm.
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
include/linux/hugetlb.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3a96f67..d4803a89 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -892,9 +892,17 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
- VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
+ unsigned long hp_size = huge_page_size(h);
- if (huge_page_size(h) == PMD_SIZE) {
+ VM_BUG_ON(hp_size == PAGE_SIZE);
+
+ /*
+ * Considering CONT-PMD size hugetlb, since the CONT-PMD entry
+ * can not span multiple PMDs, then we can use the fine grained
+ * PMD page lock.
+ */
+ if (hp_size == PMD_SIZE ||
+ (hp_size > PMD_SIZE && hp_size < PUD_SIZE)) {
return pmd_lockptr(mm, (pmd_t *) pte);
} else if (huge_page_size(h) < PMD_SIZE) {
unsigned long mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
--
1.8.3.1
On 23.08.22 09:50, Baolin Wang wrote:
> Considering the pmd entries of a CONT-PMD hugetlb can not span on
> multiple PMDs, we can change to use the PMD page lock, which can
> be much finer grain that lock in the mm.
>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
> include/linux/hugetlb.h | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 3a96f67..d4803a89 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -892,9 +892,17 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
> static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
> struct mm_struct *mm, pte_t *pte)
> {
> - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
> + unsigned long hp_size = huge_page_size(h);
>
> - if (huge_page_size(h) == PMD_SIZE) {
> + VM_BUG_ON(hp_size == PAGE_SIZE);
> +
> + /*
> + * Considering CONT-PMD size hugetlb, since the CONT-PMD entry
> + * can not span multiple PMDs, then we can use the fine grained
> + * PMD page lock.
> + */
> + if (hp_size == PMD_SIZE ||
> + (hp_size > PMD_SIZE && hp_size < PUD_SIZE)) {
> return pmd_lockptr(mm, (pmd_t *) pte);
> } else if (huge_page_size(h) < PMD_SIZE) {
> unsigned long mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
Is there a measurable performance gain? IOW, do we really care?
--
Thanks,
David / dhildenb
On 8/23/2022 4:14 PM, David Hildenbrand wrote:
> On 23.08.22 09:50, Baolin Wang wrote:
>> Considering the pmd entries of a CONT-PMD hugetlb can not span on
>> multiple PMDs, we can change to use the PMD page lock, which can
>> be much finer grain that lock in the mm.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>> include/linux/hugetlb.h | 12 ++++++++++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 3a96f67..d4803a89 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -892,9 +892,17 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
>> static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
>> struct mm_struct *mm, pte_t *pte)
>> {
>> - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE);
>> + unsigned long hp_size = huge_page_size(h);
>>
>> - if (huge_page_size(h) == PMD_SIZE) {
>> + VM_BUG_ON(hp_size == PAGE_SIZE);
>> +
>> + /*
>> + * Considering CONT-PMD size hugetlb, since the CONT-PMD entry
>> + * can not span multiple PMDs, then we can use the fine grained
>> + * PMD page lock.
>> + */
>> + if (hp_size == PMD_SIZE ||
>> + (hp_size > PMD_SIZE && hp_size < PUD_SIZE)) {
>> return pmd_lockptr(mm, (pmd_t *) pte);
>> } else if (huge_page_size(h) < PMD_SIZE) {
>> unsigned long mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
>
> Is there a measurable performance gain? IOW, do we really care?
IMO, It's just a theoretical analysis now:) Let me think about how to
measure the performance gain.
© 2016 - 2026 Red Hat, Inc.