lib/dedupe.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
We have already use xxh64() for filtering first for dedupe, when we
need to skip the same xxh64 hash, no need to do memcpy and sha256(),
relocate the code to avoid it.
Signed-off-by: wangzijie <wangzijie1@honor.com>
---
lib/dedupe.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/lib/dedupe.c b/lib/dedupe.c
index 074cae3..bdd890c 100644
--- a/lib/dedupe.c
+++ b/lib/dedupe.c
@@ -162,18 +162,9 @@ int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
if (!di)
return -ENOMEM;
- di->original_length = e->length;
- erofs_sha256(original_data, window_size, di->prefix_sha256);
-
di->prefix_xxh64 = xxh64(original_data, window_size, 0);
di->hash = erofs_rolling_hash_init(original_data,
window_size, true);
- memcpy(di->extra_data, original_data + window_size,
- e->length - window_size);
- di->pstart = e->pstart;
- di->plen = e->plen;
- di->partial = e->partial;
- di->raw = e->raw;
/* skip the same xxh64 hash */
p = &dedupe_tree[di->hash & (ARRAY_SIZE(dedupe_tree) - 1)];
@@ -183,6 +174,15 @@ int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
return 0;
}
}
+
+ di->original_length = e->length;
+ erofs_sha256(original_data, window_size, di->prefix_sha256);
+ memcpy(di->extra_data, original_data + window_size,
+ e->length - window_size);
+ di->pstart = e->pstart;
+ di->plen = e->plen;
+ di->partial = e->partial;
+ di->raw = e->raw;
di->chain = dedupe_subtree;
dedupe_subtree = di;
list_add_tail(&di->list, p);
--
2.25.1
Hi Zijie, On 2025/8/15 16:44, wangzijie wrote: > We have already use xxh64() for filtering first for dedupe, when we > need to skip the same xxh64 hash, no need to do memcpy and sha256(), > relocate the code to avoid it. > > Signed-off-by: wangzijie <wangzijie1@honor.com> Thanks for the patch, it makes sense to me since we only keep one record according to xxh64 (instead of sha256) for now: Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Although I think multi-threaded deduplication is more useful, see: https://github.com/erofs/erofs-utils/issues/25 but I'm not sure if you're interested in it... ;-) Thanks, Gao Xiang
> Hi Zijie, > > On 2025/8/15 16:44, wangzijie wrote: > > We have already use xxh64() for filtering first for dedupe, when we > > need to skip the same xxh64 hash, no need to do memcpy and sha256(), > > relocate the code to avoid it. > > > > Signed-off-by: wangzijie <wangzijie1@honor.com> > > Thanks for the patch, it makes sense to me since we only keep one > record according to xxh64 (instead of sha256) for now: > > Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> > > Although I think multi-threaded deduplication is more useful, see: > https://github.com/erofs/erofs-utils/issues/25 > but I'm not sure if you're interested in it... ;-) Hi Xiang, Thank you for providing this information, I want to optimize mkfs time with dedupe option and send this patch. I will find time to research Yifan's demo of multi-threaded deduplication and try to provide some help. > Thanks, > Gao Xiang
Hi Zijie, It would be quite appreciated if you could help us polish the multithreading -Ededupe implementation. I will try to rebase the existing code to the latest codebase ASAP. You could find the design decision in multithreading -Ededupe in this paper: https://dl.acm.org/doi/pdf/10.1145/3671016.3671395 Thanks, Yifan On 2025/8/15 17:54, wangzijie wrote: >> Hi Zijie, >> >> On 2025/8/15 16:44, wangzijie wrote: >>> We have already use xxh64() for filtering first for dedupe, when we >>> need to skip the same xxh64 hash, no need to do memcpy and sha256(), >>> relocate the code to avoid it. >>> >>> Signed-off-by: wangzijie <wangzijie1@honor.com> >> Thanks for the patch, it makes sense to me since we only keep one >> record according to xxh64 (instead of sha256) for now: >> >> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> >> >> Although I think multi-threaded deduplication is more useful, see: >> https://github.com/erofs/erofs-utils/issues/25 >> but I'm not sure if you're interested in it... ;-) > Hi Xiang, > Thank you for providing this information, I want to optimize mkfs time with > dedupe option and send this patch. I will find time to research Yifan's demo > of multi-threaded deduplication and try to provide some help. > >> Thanks, >> Gao Xiang > >
© 2016 - 2025 Red Hat, Inc.