1 | zblock is a special purpose allocator for storing compressed pages. | 1 | zblock is a special purpose allocator for storing compressed pages. |
---|---|---|---|
2 | It stores integer number of compressed objects per its block. These | 2 | It stores integer number of same size objects per its block. These |
3 | blocks consist of several physical pages (2**n, i. e. 1/2/4/8). | 3 | blocks consist of several physical pages (2**n, i. e. 1/2/4/8). |
4 | 4 | ||
5 | With zblock, it is possible to densely arrange objects of various sizes | 5 | With zblock, it is possible to densely arrange objects of various sizes |
6 | resulting in low internal fragmentation. Also this allocator tries to | 6 | resulting in low internal fragmentation. Also this allocator tries to |
7 | fill incomplete blocks instead of adding new ones, in many cases | 7 | fill incomplete blocks instead of adding new ones, in many cases |
8 | providing a compression ratio substantially higher than z3fold and zbud | 8 | providing a compression ratio comparable to zmalloc's. |
9 | (though lower than zmalloc's). | ||
10 | 9 | ||
11 | zblock does not require MMU to operate and also is superior to zsmalloc | 10 | zblock is also in most cases superior to zsmalloc with regard to |
12 | with regard to average performance and worst execution times, thus | 11 | average performance and worst execution times, thus allowing for better |
13 | allowing for better response time and real-time characteristics of the | 12 | response time and real-time characteristics of the whole system. |
14 | whole system. | ||
15 | 13 | ||
16 | E. g. on a series of stress-ng tests run on a Raspberry Pi 5, we get | 14 | High memory and page migration are currently not supported by zblock. |
17 | 5-10% higher value for bogo ops/s in zblock/zsmalloc comparison. | ||
18 | 15 | ||
19 | Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se> | 16 | Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.se> |
20 | Signed-off-by: Igor Belousov <igor.b@beldev.am> | 17 | Signed-off-by: Igor Belousov <igor.b@beldev.am> |
21 | --- | 18 | --- |
22 | Changes since v1: | 19 | v3: https://patchwork.kernel.org/project/linux-mm/patch/20250408125211.1611879-1-vitaly.wool@konsulko.se/ |
23 | - adaptations to better handle 16K pages | 20 | Changes since v3: |
24 | - block table is dropped in favor of a linked list (the list entry on | 21 | - rebased and tested against the latest -mm tree |
25 | top of the list always has empty slots) | 22 | - the block descriptors table was updated for better compression ratio |
26 | - rbtree is created at the initialization phase to speed up the search | 23 | - fixed the bug with wrong SLOT_BITS value |
27 | for an appropriate block type | 24 | - slot search moved to find_and_claim_block() |
28 | 25 | ||
29 | Performance metrics: | 26 | Test results (zstd compressor, 8 core Ryzen 9 VM, make bzImage): |
27 | - zblock: | ||
28 | real 6m52.621s | ||
29 | user 33m41.771s | ||
30 | sys 6m28.825s | ||
31 | Zswap: 162328 kB | ||
32 | Zswapped: 754468 kB | ||
33 | zswpin 93851 | ||
34 | zswpout 542481 | ||
35 | zswpwb 935 | ||
36 | - zsmalloc: | ||
37 | real 7m4.355s | ||
38 | user 34m37.538s | ||
39 | sys 6m22.086s | ||
40 | zswpin 101243 | ||
41 | zswpout 448217 | ||
42 | zswpwb 640 | ||
43 | Zswap: 175704 kB | ||
44 | Zswapped: 778692 kB | ||
30 | 45 | ||
31 | 1 Kernel build on a Raspberry Pi 5 | 46 | Documentation/mm/zblock.rst | 24 ++ |
32 | Kernel build on a tmpfs with zblock/zsmalloc as zswap backend (LZ4 | ||
33 | compression): | ||
34 | |||
35 | 1.1 zblock | ||
36 | |||
37 | real 25m53.040s | ||
38 | user 96m43.424s | ||
39 | sys 4m56.652s | ||
40 | |||
41 | real 25m20.748s | ||
42 | user 94m24.324s | ||
43 | sys 4m58.005s | ||
44 | |||
45 | real 25m37.486s | ||
46 | user 95m35.913s | ||
47 | sys 4m55.892s | ||
48 | |||
49 | 1.2 zsmalloc | ||
50 | |||
51 | real 26m17.934s | ||
52 | user 97m13.342s | ||
53 | sys 5m2.415s | ||
54 | |||
55 | real 25m50.694s | ||
56 | user 95m22.065s | ||
57 | sys 5m1.305s | ||
58 | |||
59 | real 25m57.714s | ||
60 | user 96m14.675s | ||
61 | sys 4m59.081s | ||
62 | |||
63 | Since zswap is used starting from minute 21, this gives 9% in average in | ||
64 | advantage for zblock. | ||
65 | |||
66 | 2 stress-ng | ||
67 | Command: stress-ng --vm 4 --vm-bytes 8G --vm-keep --timeout 10m --metrics-brief | ||
68 | Results: | ||
69 | 2.1 zblock | ||
70 | |||
71 | stress-ng: metrc: [421] stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s | ||
72 | stress-ng: metrc: [421] (secs) (secs) (secs) (real time) (usr+sys time) | ||
73 | stress-ng: metrc: [421] vm 29701982 601.32 1088.63 209.89 49394.38 22873.66 | ||
74 | |||
75 | 2.2 zsmalloc | ||
76 | |||
77 | stress-ng: metrc: [479] stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s | ||
78 | stress-ng: metrc: [479] (secs) (secs) (secs) (real time) (usr+sys time) | ||
79 | stress-ng: metrc: [479] vm 29561584 600.59 916.38 895.74 49221.11 16313.22 | ||
80 | |||
81 | Object file size comparison | ||
82 | |||
83 | 1 ARM64 | ||
84 | 1.1 zblock: 10k | ||
85 | 1.2 zsmalloc: 36K | ||
86 | |||
87 | 2 RISC-V | ||
88 | 1.1 zblock: 12k | ||
89 | 1.2 zsmalloc: 77k | ||
90 | |||
91 | Documentation/mm/zblock.rst | 24 +++ | ||
92 | MAINTAINERS | 7 + | 47 | MAINTAINERS | 7 + |
93 | mm/Kconfig | 8 + | 48 | mm/Kconfig | 12 + |
94 | mm/Makefile | 1 + | 49 | mm/Makefile | 1 + |
95 | mm/zblock.c | 418 ++++++++++++++++++++++++++++++++++++ | 50 | mm/zblock.c | 443 ++++++++++++++++++++++++++++++++++++ |
96 | mm/zblock.h | 136 ++++++++++++ | 51 | mm/zblock.h | 176 ++++++++++++++ |
97 | 6 files changed, 594 insertions(+) | 52 | 6 files changed, 663 insertions(+) |
98 | create mode 100644 Documentation/mm/zblock.rst | 53 | create mode 100644 Documentation/mm/zblock.rst |
99 | create mode 100644 mm/zblock.c | 54 | create mode 100644 mm/zblock.c |
100 | create mode 100644 mm/zblock.h | 55 | create mode 100644 mm/zblock.h |
101 | 56 | ||
102 | diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst | 57 | diff --git a/Documentation/mm/zblock.rst b/Documentation/mm/zblock.rst |
103 | new file mode 100644 | 58 | new file mode 100644 |
104 | index XXXXXXX..XXXXXXX | 59 | index XXXXXXX..XXXXXXX |
105 | --- /dev/null | 60 | --- /dev/null |
106 | +++ b/Documentation/mm/zblock.rst | 61 | +++ b/Documentation/mm/zblock.rst |
107 | @@ -XXX,XX +XXX,XX @@ | 62 | @@ -XXX,XX +XXX,XX @@ |
108 | +. SPDX-License-Identifier: GPL-2.0 | 63 | +.. SPDX-License-Identifier: GPL-2.0 |
109 | + | 64 | + |
110 | +====== | 65 | +====== |
111 | +zblock | 66 | +zblock |
112 | +====== | 67 | +====== |
113 | + | 68 | + |
... | ... | ||
142 | +L: linux-mm@kvack.org | 97 | +L: linux-mm@kvack.org |
143 | +S: Maintained | 98 | +S: Maintained |
144 | +F: Documentation/mm/zblock.rst | 99 | +F: Documentation/mm/zblock.rst |
145 | +F: mm/zblock.[ch] | 100 | +F: mm/zblock.[ch] |
146 | + | 101 | + |
147 | ZBUD COMPRESSED PAGE ALLOCATOR | 102 | ZD1211RW WIRELESS DRIVER |
148 | M: Seth Jennings <sjenning@redhat.com> | 103 | L: linux-wireless@vger.kernel.org |
149 | M: Dan Streetman <ddstreet@ieee.org> | 104 | S: Orphan |
150 | diff --git a/mm/Kconfig b/mm/Kconfig | 105 | diff --git a/mm/Kconfig b/mm/Kconfig |
151 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
152 | --- a/mm/Kconfig | 107 | --- a/mm/Kconfig |
153 | +++ b/mm/Kconfig | 108 | +++ b/mm/Kconfig |
154 | @@ -XXX,XX +XXX,XX @@ config Z3FOLD_DEPRECATED | 109 | @@ -XXX,XX +XXX,XX @@ config ZSWAP_ZPOOL_DEFAULT |
155 | page. It is a ZBUD derivative so the simplicity and determinism are | 110 | default "zsmalloc" if ZSWAP_ZPOOL_DEFAULT_ZSMALLOC |
156 | still there. | 111 | default "" |
157 | 112 | ||
158 | +config ZBLOCK | 113 | +config ZBLOCK |
159 | + tristate "Fast compression allocator with high density" | 114 | + tristate "Fast compression allocator with high density" |
160 | + depends on ZPOOL | 115 | + depends on ZPOOL |
161 | + help | 116 | + help |
162 | + A special purpose allocator for storing compressed pages. | 117 | + A special purpose allocator for storing compressed pages. |
163 | + It is designed to store same size compressed pages in blocks of | 118 | + It stores integer number of same size compressed objects per |
164 | + physical pages. | 119 | + its block. These blocks consist of several physical pages |
165 | + | 120 | + (2**n, i. e. 1/2/4/8). |
166 | config Z3FOLD | 121 | + |
122 | + With zblock, it is possible to densely arrange objects of | ||
123 | + various sizes resulting in low internal fragmentation. | ||
124 | + | ||
125 | config ZSMALLOC | ||
167 | tristate | 126 | tristate |
168 | default y if Z3FOLD_DEPRECATED=y | 127 | prompt "N:1 compression allocator (zsmalloc)" if (ZSWAP || ZRAM) |
169 | diff --git a/mm/Makefile b/mm/Makefile | 128 | diff --git a/mm/Makefile b/mm/Makefile |
170 | index XXXXXXX..XXXXXXX 100644 | 129 | index XXXXXXX..XXXXXXX 100644 |
171 | --- a/mm/Makefile | 130 | --- a/mm/Makefile |
172 | +++ b/mm/Makefile | 131 | +++ b/mm/Makefile |
173 | @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_ZPOOL) += zpool.o | 132 | @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o |
174 | obj-$(CONFIG_ZBUD) += zbud.o | 133 | obj-$(CONFIG_PAGE_OWNER) += page_owner.o |
134 | obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o | ||
135 | obj-$(CONFIG_ZPOOL) += zpool.o | ||
136 | +obj-$(CONFIG_ZBLOCK) += zblock.o | ||
175 | obj-$(CONFIG_ZSMALLOC) += zsmalloc.o | 137 | obj-$(CONFIG_ZSMALLOC) += zsmalloc.o |
176 | obj-$(CONFIG_Z3FOLD) += z3fold.o | ||
177 | +obj-$(CONFIG_ZBLOCK) += zblock.o | ||
178 | obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o | 138 | obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o |
179 | obj-$(CONFIG_CMA) += cma.o | 139 | obj-$(CONFIG_CMA) += cma.o |
180 | obj-$(CONFIG_NUMA) += numa.o | ||
181 | diff --git a/mm/zblock.c b/mm/zblock.c | 140 | diff --git a/mm/zblock.c b/mm/zblock.c |
182 | new file mode 100644 | 141 | new file mode 100644 |
183 | index XXXXXXX..XXXXXXX | 142 | index XXXXXXX..XXXXXXX |
184 | --- /dev/null | 143 | --- /dev/null |
185 | +++ b/mm/zblock.c | 144 | +++ b/mm/zblock.c |
... | ... | ||
212 | +#include <linux/zpool.h> | 171 | +#include <linux/zpool.h> |
213 | +#include "zblock.h" | 172 | +#include "zblock.h" |
214 | + | 173 | + |
215 | +static struct rb_root block_desc_tree = RB_ROOT; | 174 | +static struct rb_root block_desc_tree = RB_ROOT; |
216 | + | 175 | + |
217 | +/* add a new block to the list */ | 176 | +/* Encode handle of a particular slot in the pool using metadata */ |
218 | +static inline void add_block(struct zblock_block *block, | 177 | +static inline unsigned long metadata_to_handle(struct zblock_block *block, |
219 | + struct block_list *block_list) | 178 | + unsigned int block_type, unsigned int slot) |
220 | +{ | 179 | +{ |
221 | + list_add(&block->link, &block_list->list); | 180 | + return (unsigned long)(block) | (block_type << SLOT_BITS) | slot; |
181 | +} | ||
182 | + | ||
183 | +/* Return block, block type and slot in the pool corresponding to handle */ | ||
184 | +static inline struct zblock_block *handle_to_metadata(unsigned long handle, | ||
185 | + unsigned int *block_type, unsigned int *slot) | ||
186 | +{ | ||
187 | + *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS; | ||
188 | + *slot = handle & SLOT_MASK; | ||
189 | + return (struct zblock_block *)(handle & PAGE_MASK); | ||
222 | +} | 190 | +} |
223 | + | 191 | + |
224 | +/* | 192 | +/* |
225 | + * Find a block with at least one free slot and claim it. | 193 | + * Find a block with at least one free slot and claim it. |
226 | + * We make sure that the first block, if exists, will always work. | 194 | + * We make sure that the first block, if exists, will always work. |
227 | + */ | 195 | + */ |
228 | +static inline struct zblock_block *find_block(struct block_list *block_list) | 196 | +static inline struct zblock_block *find_and_claim_block(struct block_list *b, |
229 | +{ | 197 | + int block_type, unsigned long *handle) |
230 | + struct list_head *l = &block_list->list; | 198 | +{ |
199 | + struct list_head *l = &b->active_list; | ||
200 | + unsigned int slot; | ||
231 | + | 201 | + |
232 | + if (!list_empty(l)) { | 202 | + if (!list_empty(l)) { |
233 | + struct zblock_block *z = list_first_entry(l, typeof(*z), link); | 203 | + struct zblock_block *z = list_first_entry(l, typeof(*z), link); |
234 | + | 204 | + |
235 | + if (z->free_slots > 0) { | 205 | + if (--z->free_slots == 0) |
236 | + if (--z->free_slots == 0) | 206 | + list_move(&z->link, &b->full_list); |
237 | + list_move_tail(&z->link, l); | 207 | + /* |
238 | + return z; | 208 | + * There is a slot in the block and we just made sure it would |
209 | + * remain. | ||
210 | + * Find that slot and set the busy bit. | ||
211 | + */ | ||
212 | + for (slot = find_first_zero_bit(z->slot_info, | ||
213 | + block_desc[block_type].slots_per_block); | ||
214 | + slot < block_desc[block_type].slots_per_block; | ||
215 | + slot = find_next_zero_bit(z->slot_info, | ||
216 | + block_desc[block_type].slots_per_block, | ||
217 | + slot)) { | ||
218 | + if (!test_and_set_bit(slot, z->slot_info)) | ||
219 | + break; | ||
220 | + barrier(); | ||
239 | + } | 221 | + } |
222 | + | ||
223 | + WARN_ON(slot >= block_desc[block_type].slots_per_block); | ||
224 | + *handle = metadata_to_handle(z, block_type, slot); | ||
225 | + return z; | ||
240 | + } | 226 | + } |
241 | + return NULL; | 227 | + return NULL; |
242 | +} | 228 | +} |
243 | + | ||
244 | +/* remove block from the list */ | ||
245 | +static inline void remove_block(struct zblock_block *block) | ||
246 | +{ | ||
247 | + list_del_init(&block->link); | ||
248 | +} | ||
249 | + | ||
250 | +/* Encodes the handle of a particular slot in the pool using metadata */ | ||
251 | +static inline unsigned long metadata_to_handle(struct zblock_block *block, | ||
252 | + unsigned int block_type, unsigned int slot) | ||
253 | +{ | ||
254 | + return (unsigned long)(block) | (block_type << SLOT_BITS) | slot; | ||
255 | +} | ||
256 | + | ||
257 | +/* Returns block, block type and slot in the pool corresponding to handle */ | ||
258 | +static inline struct zblock_block *handle_to_metadata(unsigned long handle, | ||
259 | + unsigned int *block_type, unsigned int *slot) | ||
260 | +{ | ||
261 | + *block_type = (handle & (PAGE_SIZE - 1)) >> SLOT_BITS; | ||
262 | + *slot = handle & SLOT_MASK; | ||
263 | + return (struct zblock_block *)(handle & PAGE_MASK); | ||
264 | +} | ||
265 | + | ||
266 | + | 229 | + |
267 | +/* | 230 | +/* |
268 | + * allocate new block and add it to corresponding block list | 231 | + * allocate new block and add it to corresponding block list |
269 | + */ | 232 | + */ |
270 | +static struct zblock_block *alloc_block(struct zblock_pool *pool, | 233 | +static struct zblock_block *alloc_block(struct zblock_pool *pool, |
... | ... | ||
285 | + memset(&block->slot_info, 0, sizeof(block->slot_info)); | 248 | + memset(&block->slot_info, 0, sizeof(block->slot_info)); |
286 | + set_bit(0, block->slot_info); | 249 | + set_bit(0, block->slot_info); |
287 | + *handle = metadata_to_handle(block, block_type, 0); | 250 | + *handle = metadata_to_handle(block, block_type, 0); |
288 | + | 251 | + |
289 | + spin_lock(&block_list->lock); | 252 | + spin_lock(&block_list->lock); |
290 | + add_block(block, block_list); | 253 | + list_add(&block->link, &block_list->active_list); |
291 | + block_list->block_count++; | 254 | + block_list->block_count++; |
292 | + spin_unlock(&block_list->lock); | 255 | + spin_unlock(&block_list->lock); |
293 | + return block; | 256 | + return block; |
294 | +} | 257 | +} |
295 | + | 258 | + |
... | ... | ||
316 | + | 279 | + |
317 | + /* init each block list */ | 280 | + /* init each block list */ |
318 | + for (i = 0; i < ARRAY_SIZE(block_desc); i++) { | 281 | + for (i = 0; i < ARRAY_SIZE(block_desc); i++) { |
319 | + block_list = &pool->block_lists[i]; | 282 | + block_list = &pool->block_lists[i]; |
320 | + spin_lock_init(&block_list->lock); | 283 | + spin_lock_init(&block_list->lock); |
321 | + INIT_LIST_HEAD(&block_list->list); | 284 | + INIT_LIST_HEAD(&block_list->full_list); |
285 | + INIT_LIST_HEAD(&block_list->active_list); | ||
322 | + block_list->block_count = 0; | 286 | + block_list->block_count = 0; |
323 | + } | 287 | + } |
324 | + return pool; | 288 | + return pool; |
325 | +} | 289 | +} |
326 | + | 290 | + |
... | ... | ||
348 | + */ | 312 | + */ |
349 | +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp, | 313 | +static int zblock_alloc(struct zblock_pool *pool, size_t size, gfp_t gfp, |
350 | + unsigned long *handle) | 314 | + unsigned long *handle) |
351 | +{ | 315 | +{ |
352 | + int block_type = -1; | 316 | + int block_type = -1; |
353 | + unsigned int slot; | ||
354 | + struct zblock_block *block; | 317 | + struct zblock_block *block; |
355 | + struct block_list *block_list; | 318 | + struct block_list *block_list; |
356 | + | 319 | + |
357 | + if (!size) | 320 | + if (!size) |
358 | + return -EINVAL; | 321 | + return -EINVAL; |
... | ... | ||
377 | + block_type = block_node->block_idx + 1; | 340 | + block_type = block_node->block_idx + 1; |
378 | + break; | 341 | + break; |
379 | + } | 342 | + } |
380 | + } | 343 | + } |
381 | + } | 344 | + } |
382 | + if (WARN_ON(block_type < 0 || block_type >= ARRAY_SIZE(block_desc))) | 345 | + if (WARN_ON(block_type < 0)) |
383 | + return -EINVAL; | 346 | + return -EINVAL; |
347 | + if (block_type >= ARRAY_SIZE(block_desc)) | ||
348 | + return -ENOSPC; | ||
384 | + | 349 | + |
385 | + block_list = &pool->block_lists[block_type]; | 350 | + block_list = &pool->block_lists[block_type]; |
386 | + | 351 | + |
387 | + spin_lock(&block_list->lock); | 352 | + spin_lock(&block_list->lock); |
388 | + block = find_block(block_list); | 353 | + block = find_and_claim_block(block_list, block_type, handle); |
389 | + spin_unlock(&block_list->lock); | 354 | + spin_unlock(&block_list->lock); |
390 | + if (block) | 355 | + if (block) |
391 | + goto found; | 356 | + return 0; |
392 | + | 357 | + |
393 | + /* not found block with free slots try to allocate new empty block */ | 358 | + /* not found block with free slots try to allocate new empty block */ |
394 | + block = alloc_block(pool, block_type, gfp, handle); | 359 | + block = alloc_block(pool, block_type, gfp & ~(__GFP_MOVABLE | __GFP_HIGHMEM), handle); |
395 | + return block ? 0 : -ENOMEM; | 360 | + return block ? 0 : -ENOMEM; |
396 | + | ||
397 | +found: | ||
398 | + /* find the first free slot in block */ | ||
399 | + for (slot = find_first_zero_bit(block->slot_info, | ||
400 | + block_desc[block_type].slots_per_block); | ||
401 | + slot < block_desc[block_type].slots_per_block; | ||
402 | + slot = find_next_zero_bit(block->slot_info, | ||
403 | + block_desc[block_type].slots_per_block, | ||
404 | + slot)) { | ||
405 | + if (!test_and_set_bit(slot, block->slot_info)) | ||
406 | + break; | ||
407 | + barrier(); | ||
408 | + } | ||
409 | + BUG_ON(slot >= block_desc[block_type].slots_per_block); | ||
410 | + *handle = metadata_to_handle(block, block_type, slot); | ||
411 | + return 0; | ||
412 | +} | 361 | +} |
413 | + | 362 | + |
414 | +/** | 363 | +/** |
415 | + * zblock_free() - frees the allocation associated with the given handle | 364 | + * zblock_free() - frees the allocation associated with the given handle |
416 | + * @pool: pool in which the allocation resided | 365 | + * @pool: pool in which the allocation resided |
... | ... | ||
428 | + | 377 | + |
429 | + spin_lock(&block_list->lock); | 378 | + spin_lock(&block_list->lock); |
430 | + /* if all slots in block are empty delete whole block */ | 379 | + /* if all slots in block are empty delete whole block */ |
431 | + if (++block->free_slots == block_desc[block_type].slots_per_block) { | 380 | + if (++block->free_slots == block_desc[block_type].slots_per_block) { |
432 | + block_list->block_count--; | 381 | + block_list->block_count--; |
433 | + remove_block(block); | 382 | + list_del(&block->link); |
434 | + spin_unlock(&block_list->lock); | 383 | + spin_unlock(&block_list->lock); |
435 | + free_pages((unsigned long)block, block_desc[block_type].order); | 384 | + free_pages((unsigned long)block, block_desc[block_type].order); |
436 | + return; | 385 | + return; |
437 | + } | 386 | + } else if (block->free_slots == 1) |
387 | + list_move_tail(&block->link, &block_list->active_list); | ||
388 | + clear_bit(slot, block->slot_info); | ||
438 | + spin_unlock(&block_list->lock); | 389 | + spin_unlock(&block_list->lock); |
439 | + | ||
440 | + clear_bit(slot, block->slot_info); | ||
441 | +} | 390 | +} |
442 | + | 391 | + |
443 | +/** | 392 | +/** |
444 | + * zblock_map() - maps the allocation associated with the given handle | 393 | + * zblock_map() - maps the allocation associated with the given handle |
445 | + * @pool: pool in which the allocation resides | 394 | + * @pool: pool in which the allocation resides |
... | ... | ||
469 | +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle) | 418 | +static void zblock_unmap(struct zblock_pool *pool, unsigned long handle) |
470 | +{ | 419 | +{ |
471 | +} | 420 | +} |
472 | + | 421 | + |
473 | +/** | 422 | +/** |
423 | + * zblock_write() - write to the memory area defined by handle | ||
424 | + * @pool: pool in which the allocation resides | ||
425 | + * @handle: handle associated with the allocation | ||
426 | + * @handle_mem: pointer to source memory block | ||
427 | + * @mem_len: length of the memory block to write | ||
428 | + */ | ||
429 | +static void zblock_write(struct zblock_pool *pool, unsigned long handle, | ||
430 | + void *handle_mem, size_t mem_len) | ||
431 | +{ | ||
432 | + unsigned int block_type, slot; | ||
433 | + struct zblock_block *block; | ||
434 | + unsigned long offs; | ||
435 | + void *p; | ||
436 | + | ||
437 | + block = handle_to_metadata(handle, &block_type, &slot); | ||
438 | + offs = ZBLOCK_HEADER_SIZE + slot * block_desc[block_type].slot_size; | ||
439 | + p = (void *)block + offs; | ||
440 | + memcpy(p, handle_mem, mem_len); | ||
441 | +} | ||
442 | + | ||
443 | +/** | ||
474 | + * zblock_get_total_pages() - gets the zblock pool size in pages | 444 | + * zblock_get_total_pages() - gets the zblock pool size in pages |
475 | + * @pool: pool being queried | 445 | + * @pool: pool being queried |
476 | + * | 446 | + * |
477 | + * Returns: size in bytes of the given pool. | 447 | + * Returns: size in bytes of the given pool. |
478 | + */ | 448 | + */ |
... | ... | ||
511 | +static void zblock_zpool_free(void *pool, unsigned long handle) | 481 | +static void zblock_zpool_free(void *pool, unsigned long handle) |
512 | +{ | 482 | +{ |
513 | + zblock_free(pool, handle); | 483 | + zblock_free(pool, handle); |
514 | +} | 484 | +} |
515 | + | 485 | + |
516 | +static void *zblock_zpool_map(void *pool, unsigned long handle, | 486 | +static void *zblock_zpool_read_begin(void *pool, unsigned long handle, |
517 | + enum zpool_mapmode mm) | 487 | + void *local_copy) |
518 | +{ | 488 | +{ |
519 | + return zblock_map(pool, handle); | 489 | + return zblock_map(pool, handle); |
520 | +} | 490 | +} |
521 | + | 491 | + |
522 | +static void zblock_zpool_unmap(void *pool, unsigned long handle) | 492 | +static void zblock_zpool_obj_write(void *pool, unsigned long handle, |
493 | + void *handle_mem, size_t mem_len) | ||
494 | +{ | ||
495 | + zblock_write(pool, handle, handle_mem, mem_len); | ||
496 | +} | ||
497 | + | ||
498 | +static void zblock_zpool_read_end(void *pool, unsigned long handle, | ||
499 | + void *handle_mem) | ||
523 | +{ | 500 | +{ |
524 | + zblock_unmap(pool, handle); | 501 | + zblock_unmap(pool, handle); |
525 | +} | 502 | +} |
526 | + | 503 | + |
527 | +static u64 zblock_zpool_total_pages(void *pool) | 504 | +static u64 zblock_zpool_total_pages(void *pool) |
528 | +{ | 505 | +{ |
529 | + return zblock_get_total_pages(pool); | 506 | + return zblock_get_total_pages(pool); |
530 | +} | 507 | +} |
531 | + | 508 | + |
532 | +static struct zpool_driver zblock_zpool_driver = { | 509 | +static struct zpool_driver zblock_zpool_driver = { |
533 | + .type = "zblock", | 510 | + .type = "zblock", |
534 | + .owner = THIS_MODULE, | 511 | + .owner = THIS_MODULE, |
535 | + .create = zblock_zpool_create, | 512 | + .create = zblock_zpool_create, |
536 | + .destroy = zblock_zpool_destroy, | 513 | + .destroy = zblock_zpool_destroy, |
537 | + .malloc = zblock_zpool_malloc, | 514 | + .malloc = zblock_zpool_malloc, |
538 | + .free = zblock_zpool_free, | 515 | + .free = zblock_zpool_free, |
539 | + .map = zblock_zpool_map, | 516 | + .obj_read_begin = zblock_zpool_read_begin, |
540 | + .unmap = zblock_zpool_unmap, | 517 | + .obj_read_end = zblock_zpool_read_end, |
541 | + .total_pages = zblock_zpool_total_pages, | 518 | + .obj_write = zblock_zpool_obj_write, |
519 | + .total_pages = zblock_zpool_total_pages, | ||
542 | +}; | 520 | +}; |
543 | + | 521 | + |
544 | +MODULE_ALIAS("zpool-zblock"); | 522 | +MODULE_ALIAS("zpool-zblock"); |
545 | + | 523 | + |
546 | +static void delete_rbtree(void) | 524 | +static void delete_rbtree(void) |
... | ... | ||
559 | + struct rb_node **new = &block_desc_tree.rb_node, *parent = NULL; | 537 | + struct rb_node **new = &block_desc_tree.rb_node, *parent = NULL; |
560 | + | 538 | + |
561 | + if (!block_node) { | 539 | + if (!block_node) { |
562 | + delete_rbtree(); | 540 | + delete_rbtree(); |
563 | + return -ENOMEM; | 541 | + return -ENOMEM; |
542 | + } | ||
543 | + if (i > 0 && block_desc[i].slot_size <= block_desc[i-1].slot_size) { | ||
544 | + pr_err("%s: block descriptors not in ascending order\n", | ||
545 | + __func__); | ||
546 | + delete_rbtree(); | ||
547 | + return -EINVAL; | ||
564 | + } | 548 | + } |
565 | + block_node->this_slot_size = block_desc[i].slot_size; | 549 | + block_node->this_slot_size = block_desc[i].slot_size; |
566 | + block_node->block_idx = i; | 550 | + block_node->block_idx = i; |
567 | + if (i == ARRAY_SIZE(block_desc) - 1) | 551 | + if (i == ARRAY_SIZE(block_desc) - 1) |
568 | + block_node->next_slot_size = PAGE_SIZE; | 552 | + block_node->next_slot_size = PAGE_SIZE; |
... | ... | ||
623 | +#define SLOT_FREE 0 | 607 | +#define SLOT_FREE 0 |
624 | +#define BIT_SLOT_OCCUPIED 0 | 608 | +#define BIT_SLOT_OCCUPIED 0 |
625 | +#define BIT_SLOT_MAPPED 1 | 609 | +#define BIT_SLOT_MAPPED 1 |
626 | + | 610 | + |
627 | +#if PAGE_SIZE == 0x1000 | 611 | +#if PAGE_SIZE == 0x1000 |
628 | +#define SLOT_BITS 5 | 612 | +/* max 128 slots per block, max table size 32 */ |
613 | +#define SLOT_BITS 7 | ||
629 | +#elif PAGE_SIZE == 0x4000 | 614 | +#elif PAGE_SIZE == 0x4000 |
615 | +/* max 256 slots per block, max table size 64 */ | ||
630 | +#define SLOT_BITS 8 | 616 | +#define SLOT_BITS 8 |
631 | +#else | 617 | +#else |
632 | +#error Unsupported PAGE_SIZE | 618 | +#error Unsupported PAGE_SIZE |
633 | +#endif | 619 | +#endif |
634 | + | 620 | + |
... | ... | ||
675 | + unsigned int next_slot_size; | 661 | + unsigned int next_slot_size; |
676 | + unsigned int block_idx; | 662 | + unsigned int block_idx; |
677 | +}; | 663 | +}; |
678 | + | 664 | + |
679 | +static const struct block_desc block_desc[] = { | 665 | +static const struct block_desc block_desc[] = { |
680 | +#if PAGE_SIZE == 0x4000 | 666 | +#if PAGE_SIZE == 0x1000 |
681 | + { SLOT_SIZE(181, 0), 181, 0 }, | 667 | + { SLOT_SIZE(63, 0), 63, 0 }, |
682 | + { SLOT_SIZE(150, 0), 150, 0 }, | ||
683 | + { SLOT_SIZE(116, 0), 116, 0 }, | ||
684 | + { SLOT_SIZE(94, 0), 94, 0 }, | ||
685 | + { SLOT_SIZE(72, 0), 72, 0 }, | ||
686 | + { SLOT_SIZE(54, 0), 54, 0 }, | ||
687 | + { SLOT_SIZE(42, 0), 42, 0 }, | ||
688 | +#endif /* PAGE_SIZE */ | ||
689 | + { SLOT_SIZE(32, 0), 32, 0 }, | 668 | + { SLOT_SIZE(32, 0), 32, 0 }, |
690 | + { SLOT_SIZE(22, 0), 22, 0 }, | 669 | + { SLOT_SIZE(21, 0), 21, 0 }, |
670 | + { SLOT_SIZE(15, 0), 15, 0 }, | ||
671 | + { SLOT_SIZE(12, 0), 12, 0 }, | ||
672 | + { SLOT_SIZE(10, 0), 10, 0 }, | ||
673 | + { SLOT_SIZE(9, 0), 9, 0 }, | ||
674 | + { SLOT_SIZE(8, 0), 8, 0 }, | ||
675 | + { SLOT_SIZE(29, 2), 29, 2 }, | ||
676 | + { SLOT_SIZE(13, 1), 13, 1 }, | ||
677 | + { SLOT_SIZE(6, 0), 6, 0 }, | ||
678 | + { SLOT_SIZE(11, 1), 11, 1 }, | ||
679 | + { SLOT_SIZE(5, 0), 5, 0 }, | ||
680 | + { SLOT_SIZE(9, 1), 9, 1 }, | ||
681 | + { SLOT_SIZE(8, 1), 8, 1 }, | ||
682 | + { SLOT_SIZE(29, 3), 29, 3 }, | ||
683 | + { SLOT_SIZE(13, 2), 13, 2 }, | ||
684 | + { SLOT_SIZE(12, 2), 12, 2 }, | ||
685 | + { SLOT_SIZE(11, 2), 11, 2 }, | ||
686 | + { SLOT_SIZE(10, 2), 10, 2 }, | ||
687 | + { SLOT_SIZE(9, 2), 9, 2 }, | ||
688 | + { SLOT_SIZE(17, 3), 17, 3 }, | ||
689 | + { SLOT_SIZE(8, 2), 8, 2 }, | ||
690 | + { SLOT_SIZE(15, 3), 15, 3 }, | ||
691 | + { SLOT_SIZE(14, 3), 14, 3 }, | ||
692 | + { SLOT_SIZE(13, 3), 13, 3 }, | ||
693 | + { SLOT_SIZE(6, 2), 6, 2 }, | ||
694 | + { SLOT_SIZE(11, 3), 11, 3 }, | ||
695 | + { SLOT_SIZE(10, 3), 10, 3 }, | ||
696 | + { SLOT_SIZE(9, 3), 9, 3 }, | ||
697 | + { SLOT_SIZE(4, 2), 4, 2 }, | ||
698 | +#elif PAGE_SIZE == 0x4000 | ||
699 | + { SLOT_SIZE(255, 0), 255, 0 }, | ||
700 | + { SLOT_SIZE(185, 0), 185, 0 }, | ||
701 | + { SLOT_SIZE(145, 0), 145, 0 }, | ||
702 | + { SLOT_SIZE(113, 0), 113, 0 }, | ||
703 | + { SLOT_SIZE(92, 0), 92, 0 }, | ||
704 | + { SLOT_SIZE(75, 0), 75, 0 }, | ||
705 | + { SLOT_SIZE(60, 0), 60, 0 }, | ||
706 | + { SLOT_SIZE(51, 0), 51, 0 }, | ||
707 | + { SLOT_SIZE(43, 0), 43, 0 }, | ||
708 | + { SLOT_SIZE(37, 0), 37, 0 }, | ||
709 | + { SLOT_SIZE(32, 0), 32, 0 }, | ||
710 | + { SLOT_SIZE(27, 0), 27, 0 }, | ||
711 | + { SLOT_SIZE(23, 0), 23, 0 }, | ||
712 | + { SLOT_SIZE(19, 0), 19, 0 }, | ||
691 | + { SLOT_SIZE(17, 0), 17, 0 }, | 713 | + { SLOT_SIZE(17, 0), 17, 0 }, |
714 | + { SLOT_SIZE(15, 0), 15, 0 }, | ||
692 | + { SLOT_SIZE(13, 0), 13, 0 }, | 715 | + { SLOT_SIZE(13, 0), 13, 0 }, |
693 | + { SLOT_SIZE(11, 0), 11, 0 }, | 716 | + { SLOT_SIZE(11, 0), 11, 0 }, |
717 | + { SLOT_SIZE(10, 0), 10, 0 }, | ||
694 | + { SLOT_SIZE(9, 0), 9, 0 }, | 718 | + { SLOT_SIZE(9, 0), 9, 0 }, |
695 | + { SLOT_SIZE(8, 0), 8, 0 }, | 719 | + { SLOT_SIZE(8, 0), 8, 0 }, |
720 | + { SLOT_SIZE(15, 1), 15, 1 }, | ||
696 | + { SLOT_SIZE(14, 1), 14, 1 }, | 721 | + { SLOT_SIZE(14, 1), 14, 1 }, |
722 | + { SLOT_SIZE(13, 1), 13, 1 }, | ||
697 | + { SLOT_SIZE(12, 1), 12, 1 }, | 723 | + { SLOT_SIZE(12, 1), 12, 1 }, |
698 | + { SLOT_SIZE(11, 1), 11, 1 }, | 724 | + { SLOT_SIZE(11, 1), 11, 1 }, |
699 | + { SLOT_SIZE(10, 1), 10, 1 }, | 725 | + { SLOT_SIZE(10, 1), 10, 1 }, |
700 | + { SLOT_SIZE(9, 1), 9, 1 }, | 726 | + { SLOT_SIZE(9, 1), 9, 1 }, |
701 | + { SLOT_SIZE(8, 1), 8, 1 }, | 727 | + { SLOT_SIZE(8, 1), 8, 1 }, |
... | ... | ||
705 | + { SLOT_SIZE(12, 2), 12, 2 }, | 731 | + { SLOT_SIZE(12, 2), 12, 2 }, |
706 | + { SLOT_SIZE(11, 2), 11, 2 }, | 732 | + { SLOT_SIZE(11, 2), 11, 2 }, |
707 | + { SLOT_SIZE(10, 2), 10, 2 }, | 733 | + { SLOT_SIZE(10, 2), 10, 2 }, |
708 | + { SLOT_SIZE(9, 2), 9, 2 }, | 734 | + { SLOT_SIZE(9, 2), 9, 2 }, |
709 | + { SLOT_SIZE(8, 2), 8, 2 }, | 735 | + { SLOT_SIZE(8, 2), 8, 2 }, |
710 | + { SLOT_SIZE(15, 3), 15, 3 }, | 736 | + { SLOT_SIZE(7, 2), 7, 2 }, |
711 | + { SLOT_SIZE(14, 3), 14, 3 }, | 737 | + { SLOT_SIZE(6, 2), 6, 2 }, |
712 | + { SLOT_SIZE(13, 3), 13, 3 }, | 738 | + { SLOT_SIZE(5, 2), 5, 2 }, |
713 | + { SLOT_SIZE(12, 3), 12, 3 }, | 739 | +#endif /* PAGE_SIZE */ |
714 | + { SLOT_SIZE(11, 3), 11, 3 }, | ||
715 | + { SLOT_SIZE(10, 3), 10, 3 }, | ||
716 | + { SLOT_SIZE(9, 3), 9, 3 }, | ||
717 | + { SLOT_SIZE(7, 3), 7, 3 } | ||
718 | +}; | 740 | +}; |
719 | + | 741 | + |
720 | +/** | 742 | +/** |
721 | + * struct block_list - stores metadata of particular list | 743 | + * struct block_list - stores metadata of particular list |
722 | + * lock: protects the list of blocks | 744 | + * lock: protects the list of blocks |
723 | + * list: linked list of blocks | 745 | + * active_list: linked list of active (non-full) blocks |
746 | + * full_list: linked list of full blocks | ||
724 | + * block_count: total number of blocks in the list | 747 | + * block_count: total number of blocks in the list |
725 | + */ | 748 | + */ |
726 | +struct block_list { | 749 | +struct block_list { |
727 | + spinlock_t lock; | 750 | + spinlock_t lock; |
728 | + struct list_head list; | 751 | + struct list_head active_list; |
752 | + struct list_head full_list; | ||
729 | + unsigned long block_count; | 753 | + unsigned long block_count; |
730 | +}; | 754 | +}; |
731 | + | 755 | + |
732 | +/** | 756 | +/** |
733 | + * struct zblock_pool - stores metadata for each zblock pool | 757 | + * struct zblock_pool - stores metadata for each zblock pool |
... | ... | ||
743 | +}; | 767 | +}; |
744 | + | 768 | + |
745 | + | 769 | + |
746 | +#endif | 770 | +#endif |
747 | -- | 771 | -- |
748 | 2.39.2 | 772 | 2.49.0 | diff view generated by jsdifflib |