1 | The whole shebang can also be found at: | ||
---|---|---|---|
2 | https://web.git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/log/?h=work.freeze | ||
3 | |||
4 | I know nothing about power or hibernation. I've tested it as best as I | ||
5 | could. Works for me (TM). | ||
6 | |||
7 | I need to catch some actual sleep now... | ||
8 | |||
9 | --- | ||
10 | |||
11 | Now all the pieces are in place to actually allow the power subsystem to | 1 | Now all the pieces are in place to actually allow the power subsystem to |
12 | freeze/thaw filesystems during suspend/resume. Filesystems are only | 2 | freeze/thaw filesystems during suspend/resume. Filesystems are only |
13 | frozen and thawed if the power subsystem does actually own the freeze. | 3 | frozen and thawed if the power subsystem does actually own the freeze. |
14 | 4 | ||
15 | Othwerwise it risks thawing filesystems it didn't own. This could be | 5 | Othwerwise it risks thawing filesystems it didn't own. This could be |
... | ... | ||
32 | with it before making our lives even harder (and uglier) than we have | 22 | with it before making our lives even harder (and uglier) than we have |
33 | to. | 23 | to. |
34 | 24 | ||
35 | Signed-off-by: Christian Brauner <brauner@kernel.org> | 25 | Signed-off-by: Christian Brauner <brauner@kernel.org> |
36 | --- | 26 | --- |
37 | Christian Brauner (3): | 27 | Changes in v2: |
28 | - Drop all patches that remove TASK_FREEZABLE. | ||
29 | - Expand commit messages a bit. | ||
30 | - Link to v1: https://lore.kernel.org/r/20250401-work-freeze-v1-0-d000611d4ab0@kernel.org | ||
31 | |||
32 | --- | ||
33 | Christian Brauner (4): | ||
38 | fs: add owner of freeze/thaw | 34 | fs: add owner of freeze/thaw |
39 | fs: allow pagefault based writers to be frozen | 35 | fs: allow all writers to be frozen |
40 | power: freeze filesystems during suspend/resume | 36 | power: freeze filesystems during suspend/resume |
37 | kernfs: add warning about implementing freeze/thaw | ||
41 | 38 | ||
42 | Luis Chamberlain (3): | ||
43 | ext4: replace kthread freezing with auto fs freezing | ||
44 | btrfs: replace kthread freezing with auto fs freezing | ||
45 | xfs: replace kthread freezing with auto fs freezing | ||
46 | |||
47 | fs/btrfs/disk-io.c | 4 +-- | ||
48 | fs/btrfs/scrub.c | 2 +- | ||
49 | fs/ext4/mballoc.c | 2 +- | ||
50 | fs/ext4/super.c | 3 -- | ||
51 | fs/f2fs/gc.c | 6 ++-- | 39 | fs/f2fs/gc.c | 6 ++-- |
52 | fs/gfs2/super.c | 20 ++++++----- | 40 | fs/gfs2/super.c | 20 ++++++----- |
53 | fs/gfs2/sys.c | 4 +-- | 41 | fs/gfs2/sys.c | 4 +-- |
54 | fs/ioctl.c | 8 ++--- | 42 | fs/ioctl.c | 8 ++--- |
43 | fs/kernfs/mount.c | 15 +++++++++ | ||
55 | fs/super.c | 82 ++++++++++++++++++++++++++++++++++++--------- | 44 | fs/super.c | 82 ++++++++++++++++++++++++++++++++++++--------- |
56 | fs/xfs/scrub/fscounters.c | 4 +-- | 45 | fs/xfs/scrub/fscounters.c | 4 +-- |
57 | fs/xfs/xfs_discard.c | 2 +- | ||
58 | fs/xfs/xfs_log.c | 3 +- | ||
59 | fs/xfs/xfs_log_cil.c | 2 +- | ||
60 | fs/xfs/xfs_mru_cache.c | 2 +- | ||
61 | fs/xfs/xfs_notify_failure.c | 6 ++-- | 46 | fs/xfs/xfs_notify_failure.c | 6 ++-- |
62 | fs/xfs/xfs_pwork.c | 2 +- | 47 | include/linux/fs.h | 16 +++++---- |
63 | fs/xfs/xfs_super.c | 14 ++++---- | 48 | kernel/power/hibernate.c | 16 ++++++++- |
64 | fs/xfs/xfs_trans_ail.c | 3 -- | 49 | kernel/power/main.c | 31 +++++++++++++++++ |
65 | fs/xfs/xfs_zone_gc.c | 2 -- | 50 | kernel/power/power.h | 4 +++ |
66 | include/linux/fs.h | 16 ++++++--- | 51 | kernel/power/suspend.c | 7 ++++ |
67 | kernel/power/hibernate.c | 13 ++++++- | 52 | 13 files changed, 174 insertions(+), 45 deletions(-) |
68 | kernel/power/suspend.c | 8 +++++ | ||
69 | 22 files changed, 139 insertions(+), 69 deletions(-) | ||
70 | --- | 53 | --- |
71 | base-commit: a68c99192db8060f383a2680333866c0be688ece | 54 | base-commit: 62dfd8d59e2d16873398ede5b1835e302df789b3 |
72 | change-id: 20250401-work-freeze-693b5b5a78e0 | 55 | change-id: 20250401-work-freeze-693b5b5a78e0 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Luis Chamberlain <mcgrof@kernel.org> | ||
2 | 1 | ||
3 | The kernel power management now supports allowing the VFS | ||
4 | to handle filesystem freezing freezes and thawing. Take advantage | ||
5 | of that and remove the kthread freezing. This is needed so that we | ||
6 | properly really stop IO in flight without races after userspace | ||
7 | has been frozen. Without this we rely on kthread freezing and | ||
8 | its semantics are loose and error prone. | ||
9 | |||
10 | The filesystem therefore is in charge of properly dealing with | ||
11 | quiescing of the filesystem through its callbacks if it thinks | ||
12 | it knows better than how the VFS handles it. | ||
13 | |||
14 | The following Coccinelle rule was used as to remove the now superfluous | ||
15 | freezer calls: | ||
16 | |||
17 | make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/ext4 | ||
18 | |||
19 | virtual patch | ||
20 | |||
21 | @ remove_set_freezable @ | ||
22 | expression time; | ||
23 | statement S, S2; | ||
24 | expression task, current; | ||
25 | @@ | ||
26 | |||
27 | ( | ||
28 | - set_freezable(); | ||
29 | | | ||
30 | - if (try_to_freeze()) | ||
31 | - continue; | ||
32 | | | ||
33 | - try_to_freeze(); | ||
34 | | | ||
35 | - freezable_schedule(); | ||
36 | + schedule(); | ||
37 | | | ||
38 | - freezable_schedule_timeout(time); | ||
39 | + schedule_timeout(time); | ||
40 | | | ||
41 | - if (freezing(task)) { S } | ||
42 | | | ||
43 | - if (freezing(task)) { S } | ||
44 | - else | ||
45 | { S2 } | ||
46 | | | ||
47 | - freezing(current) | ||
48 | ) | ||
49 | |||
50 | @ remove_wq_freezable @ | ||
51 | expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4; | ||
52 | identifier fs_wq_fn; | ||
53 | @@ | ||
54 | |||
55 | ( | ||
56 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
57 | - WQ_ARG2 | WQ_FREEZABLE, | ||
58 | + WQ_ARG2, | ||
59 | ...); | ||
60 | | | ||
61 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
62 | - WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3, | ||
63 | + WQ_ARG2 | WQ_ARG3, | ||
64 | ...); | ||
65 | | | ||
66 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
67 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE, | ||
68 | + WQ_ARG2 | WQ_ARG3, | ||
69 | ...); | ||
70 | | | ||
71 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
72 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4, | ||
73 | + WQ_ARG2 | WQ_ARG3 | WQ_ARG4, | ||
74 | ...); | ||
75 | | | ||
76 | WQ_E = | ||
77 | - WQ_ARG1 | WQ_FREEZABLE | ||
78 | + WQ_ARG1 | ||
79 | | | ||
80 | WQ_E = | ||
81 | - WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3 | ||
82 | + WQ_ARG1 | WQ_ARG3 | ||
83 | | | ||
84 | fs_wq_fn( | ||
85 | - WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3 | ||
86 | + WQ_ARG2 | WQ_ARG3 | ||
87 | ) | ||
88 | | | ||
89 | fs_wq_fn( | ||
90 | - WQ_FREEZABLE | WQ_ARG2 | ||
91 | + WQ_ARG2 | ||
92 | ) | ||
93 | | | ||
94 | fs_wq_fn( | ||
95 | - WQ_FREEZABLE | ||
96 | + 0 | ||
97 | ) | ||
98 | ) | ||
99 | |||
100 | @ add_auto_flag @ | ||
101 | expression E1; | ||
102 | identifier fs_type; | ||
103 | @@ | ||
104 | |||
105 | struct file_system_type fs_type = { | ||
106 | .fs_flags = E1 | ||
107 | + | FS_AUTOFREEZE | ||
108 | , | ||
109 | }; | ||
110 | |||
111 | Generated-by: Coccinelle SmPL | ||
112 | Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> | ||
113 | Link: https://lore.kernel.org/r/20250326112220.1988619-5-mcgrof@kernel.org | ||
114 | Signed-off-by: Christian Brauner <brauner@kernel.org> | ||
115 | --- | ||
116 | fs/ext4/mballoc.c | 2 +- | ||
117 | fs/ext4/super.c | 3 --- | ||
118 | 2 files changed, 1 insertion(+), 4 deletions(-) | ||
119 | |||
120 | diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/fs/ext4/mballoc.c | ||
123 | +++ b/fs/ext4/mballoc.c | ||
124 | @@ -XXX,XX +XXX,XX @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, | ||
125 | |||
126 | static bool ext4_trim_interrupted(void) | ||
127 | { | ||
128 | - return fatal_signal_pending(current) || freezing(current); | ||
129 | + return fatal_signal_pending(current); | ||
130 | } | ||
131 | |||
132 | static int ext4_try_to_trim_range(struct super_block *sb, | ||
133 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/fs/ext4/super.c | ||
136 | +++ b/fs/ext4/super.c | ||
137 | @@ -XXX,XX +XXX,XX @@ static int ext4_lazyinit_thread(void *arg) | ||
138 | unsigned long next_wakeup, cur; | ||
139 | |||
140 | BUG_ON(NULL == eli); | ||
141 | - set_freezable(); | ||
142 | |||
143 | cont_thread: | ||
144 | while (true) { | ||
145 | @@ -XXX,XX +XXX,XX @@ static int ext4_lazyinit_thread(void *arg) | ||
146 | } | ||
147 | mutex_unlock(&eli->li_list_mtx); | ||
148 | |||
149 | - try_to_freeze(); | ||
150 | - | ||
151 | cur = jiffies; | ||
152 | if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) { | ||
153 | cond_resched(); | ||
154 | |||
155 | -- | ||
156 | 2.47.2 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Luis Chamberlain <mcgrof@kernel.org> | ||
2 | 1 | ||
3 | The kernel power management now supports allowing the VFS | ||
4 | to handle filesystem freezing freezes and thawing. Take advantage | ||
5 | of that and remove the kthread freezing. This is needed so that we | ||
6 | properly really stop IO in flight without races after userspace | ||
7 | has been frozen. Without this we rely on kthread freezing and | ||
8 | its semantics are loose and error prone. | ||
9 | |||
10 | The filesystem therefore is in charge of properly dealing with | ||
11 | quiescing of the filesystem through its callbacks if it thinks | ||
12 | it knows better than how the VFS handles it. | ||
13 | |||
14 | The following Coccinelle rule was used as to remove the now superfluous | ||
15 | freezer calls: | ||
16 | |||
17 | make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/btrfs | ||
18 | |||
19 | virtual patch | ||
20 | |||
21 | @ remove_set_freezable @ | ||
22 | expression time; | ||
23 | statement S, S2; | ||
24 | expression task, current; | ||
25 | @@ | ||
26 | |||
27 | ( | ||
28 | - set_freezable(); | ||
29 | | | ||
30 | - if (try_to_freeze()) | ||
31 | - continue; | ||
32 | | | ||
33 | - try_to_freeze(); | ||
34 | | | ||
35 | - freezable_schedule(); | ||
36 | + schedule(); | ||
37 | | | ||
38 | - freezable_schedule_timeout(time); | ||
39 | + schedule_timeout(time); | ||
40 | | | ||
41 | - if (freezing(task)) { S } | ||
42 | | | ||
43 | - if (freezing(task)) { S } | ||
44 | - else | ||
45 | { S2 } | ||
46 | | | ||
47 | - freezing(current) | ||
48 | ) | ||
49 | |||
50 | @ remove_wq_freezable @ | ||
51 | expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4; | ||
52 | identifier fs_wq_fn; | ||
53 | @@ | ||
54 | |||
55 | ( | ||
56 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
57 | - WQ_ARG2 | WQ_FREEZABLE, | ||
58 | + WQ_ARG2, | ||
59 | ...); | ||
60 | | | ||
61 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
62 | - WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3, | ||
63 | + WQ_ARG2 | WQ_ARG3, | ||
64 | ...); | ||
65 | | | ||
66 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
67 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE, | ||
68 | + WQ_ARG2 | WQ_ARG3, | ||
69 | ...); | ||
70 | | | ||
71 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
72 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4, | ||
73 | + WQ_ARG2 | WQ_ARG3 | WQ_ARG4, | ||
74 | ...); | ||
75 | | | ||
76 | WQ_E = | ||
77 | - WQ_ARG1 | WQ_FREEZABLE | ||
78 | + WQ_ARG1 | ||
79 | | | ||
80 | WQ_E = | ||
81 | - WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3 | ||
82 | + WQ_ARG1 | WQ_ARG3 | ||
83 | | | ||
84 | fs_wq_fn( | ||
85 | - WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3 | ||
86 | + WQ_ARG2 | WQ_ARG3 | ||
87 | ) | ||
88 | | | ||
89 | fs_wq_fn( | ||
90 | - WQ_FREEZABLE | WQ_ARG2 | ||
91 | + WQ_ARG2 | ||
92 | ) | ||
93 | | | ||
94 | fs_wq_fn( | ||
95 | - WQ_FREEZABLE | ||
96 | + 0 | ||
97 | ) | ||
98 | ) | ||
99 | |||
100 | @ add_auto_flag @ | ||
101 | expression E1; | ||
102 | identifier fs_type; | ||
103 | @@ | ||
104 | |||
105 | struct file_system_type fs_type = { | ||
106 | .fs_flags = E1 | ||
107 | + | FS_AUTOFREEZE | ||
108 | , | ||
109 | }; | ||
110 | |||
111 | Generated-by: Coccinelle SmPL | ||
112 | Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> | ||
113 | Link: https://lore.kernel.org/r/20250326112220.1988619-6-mcgrof@kernel.org | ||
114 | Signed-off-by: Christian Brauner <brauner@kernel.org> | ||
115 | --- | ||
116 | fs/btrfs/disk-io.c | 4 ++-- | ||
117 | fs/btrfs/scrub.c | 2 +- | ||
118 | 2 files changed, 3 insertions(+), 3 deletions(-) | ||
119 | |||
120 | diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c | ||
121 | index XXXXXXX..XXXXXXX 100644 | ||
122 | --- a/fs/btrfs/disk-io.c | ||
123 | +++ b/fs/btrfs/disk-io.c | ||
124 | @@ -XXX,XX +XXX,XX @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) | ||
125 | static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) | ||
126 | { | ||
127 | u32 max_active = fs_info->thread_pool_size; | ||
128 | - unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
129 | - unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE; | ||
130 | + unsigned int flags = WQ_MEM_RECLAIM | WQ_UNBOUND; | ||
131 | + unsigned int ordered_flags = WQ_MEM_RECLAIM; | ||
132 | |||
133 | fs_info->workers = | ||
134 | btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); | ||
135 | diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c | ||
136 | index XXXXXXX..XXXXXXX 100644 | ||
137 | --- a/fs/btrfs/scrub.c | ||
138 | +++ b/fs/btrfs/scrub.c | ||
139 | @@ -XXX,XX +XXX,XX @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info) | ||
140 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info) | ||
141 | { | ||
142 | struct workqueue_struct *scrub_workers = NULL; | ||
143 | - unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
144 | + unsigned int flags = WQ_UNBOUND; | ||
145 | int max_active = fs_info->thread_pool_size; | ||
146 | int ret = -ENOMEM; | ||
147 | |||
148 | |||
149 | -- | ||
150 | 2.47.2 | diff view generated by jsdifflib |
1 | For some kernel subsystems it is paramount that they are guaranteed that | 1 | For some kernel subsystems it is paramount that they are guaranteed that |
---|---|---|---|
2 | they are the owner of the freeze to avoid any risk of deadlocks. This is | 2 | they are the owner of the freeze to avoid any risk of deadlocks. This is |
3 | the case for the power subsystem. Enable it to recognize whether it did | 3 | the case for the power subsystem. Enable it to recognize whether it did |
4 | actually freeze the filesystem. | 4 | actually freeze the filesystem. |
5 | 5 | ||
6 | If userspace has 10 filesystems and suspend/hibernate manges to freeze 5 | ||
7 | and then fails on the 6th for whatever odd reason (current or future) | ||
8 | then power needs to undo the freeze of the first 5 filesystems. It can't | ||
9 | just walk the list again because while it's unlikely that a new | ||
10 | filesystem got added in the meantime it still cannot tell which | ||
11 | filesystems the power subsystem actually managed to get a freeze | ||
12 | reference count on that needs to be dropped during thaw. | ||
13 | |||
14 | There's various ways out of this ugliness. For example, record the | ||
15 | filesystems the power subsystem managed to freeze on a temporary list in | ||
16 | the callbacks and then walk that list backwards during thaw to undo the | ||
17 | freezing or make sure that the power subsystem just actually exclusively | ||
18 | freezes things it can freeze and marking such filesystems as being owned | ||
19 | by power for the duration of the suspend or resume cycle. I opted for | ||
20 | the latter as that seemed the clean thing to do even if it means more | ||
21 | code changes. | ||
22 | |||
6 | Signed-off-by: Christian Brauner <brauner@kernel.org> | 23 | Signed-off-by: Christian Brauner <brauner@kernel.org> |
7 | --- | 24 | --- |
8 | fs/f2fs/gc.c | 6 ++-- | 25 | fs/f2fs/gc.c | 6 ++-- |
9 | fs/gfs2/super.c | 20 +++++++------ | 26 | fs/gfs2/super.c | 20 ++++++------ |
10 | fs/gfs2/sys.c | 4 +-- | 27 | fs/gfs2/sys.c | 4 +-- |
11 | fs/ioctl.c | 8 +++--- | 28 | fs/ioctl.c | 8 ++--- |
12 | fs/super.c | 68 +++++++++++++++++++++++++++++++++++++-------- | 29 | fs/super.c | 76 ++++++++++++++++++++++++++++++++++++--------- |
13 | fs/xfs/scrub/fscounters.c | 4 +-- | 30 | fs/xfs/scrub/fscounters.c | 4 +-- |
14 | fs/xfs/xfs_notify_failure.c | 6 ++-- | 31 | fs/xfs/xfs_notify_failure.c | 6 ++-- |
15 | include/linux/fs.h | 13 ++++++--- | 32 | include/linux/fs.h | 13 +++++--- |
16 | 8 files changed, 91 insertions(+), 38 deletions(-) | 33 | 8 files changed, 95 insertions(+), 42 deletions(-) |
17 | 34 | ||
18 | diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c | 35 | diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c |
19 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/fs/f2fs/gc.c | 37 | --- a/fs/f2fs/gc.c |
21 | +++ b/fs/f2fs/gc.c | 38 | +++ b/fs/f2fs/gc.c |
... | ... | ||
192 | - thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE); | 209 | - thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE); |
193 | + thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE, NULL); | 210 | + thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE, NULL); |
194 | return; | 211 | return; |
195 | } | 212 | } |
196 | 213 | ||
214 | @@ -XXX,XX +XXX,XX @@ static void filesystems_freeze_callback(struct super_block *sb, void *unused) | ||
215 | return; | ||
216 | |||
217 | if (sb->s_op->freeze_super) | ||
218 | - sb->s_op->freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | ||
219 | + sb->s_op->freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); | ||
220 | else | ||
221 | - freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | ||
222 | + freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); | ||
223 | |||
224 | deactivate_super(sb); | ||
225 | } | ||
226 | @@ -XXX,XX +XXX,XX @@ static void filesystems_thaw_callback(struct super_block *sb, void *unused) | ||
227 | return; | ||
228 | |||
229 | if (sb->s_op->thaw_super) | ||
230 | - sb->s_op->thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | ||
231 | + sb->s_op->thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); | ||
232 | else | ||
233 | - thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | ||
234 | + thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); | ||
235 | |||
236 | deactivate_super(sb); | ||
237 | } | ||
197 | @@ -XXX,XX +XXX,XX @@ static int fs_bdev_freeze(struct block_device *bdev) | 238 | @@ -XXX,XX +XXX,XX @@ static int fs_bdev_freeze(struct block_device *bdev) |
198 | 239 | ||
199 | if (sb->s_op->freeze_super) | 240 | if (sb->s_op->freeze_super) |
200 | error = sb->s_op->freeze_super(sb, | 241 | error = sb->s_op->freeze_super(sb, |
201 | - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); | 242 | - FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE); |
... | ... | diff view generated by jsdifflib |
1 | Otherwise tasks such as systemd-journald that mmap a file and write to | 1 | During freeze/thaw we need to be able to freeze all writers during |
---|---|---|---|
2 | it will not be frozen after we've frozen the filesystem. | 2 | suspend/hibernate. Otherwise tasks such as systemd-journald that mmap a |
3 | file and write to it will not be frozen after we've already frozen the | ||
4 | filesystem. | ||
5 | |||
6 | This has some risk of not being able to freeze processes in case a | ||
7 | process has acquired SB_FREEZE_PAGEFAULT under mmap_sem or | ||
8 | SB_FREEZE_INTERNAL under some other filesytem specific lock. If the | ||
9 | filesystem is frozen, a task can block on the frozen filesystem with | ||
10 | e.g., mmap_sem held. If some other task then blocks on grabbing that | ||
11 | mmap_sem, hibernation ill fail because it is unable to hibernate a task | ||
12 | holding mmap_sem. This could be fixed by making a range of filesystem | ||
13 | related locks use freezable sleeping. That's impractical and not | ||
14 | warranted just for suspend/hibernate. Assume that this is an infrequent | ||
15 | problem and we've given userspace a way to skip filesystem freezing | ||
16 | through a sysfs file. | ||
3 | 17 | ||
4 | Signed-off-by: Christian Brauner <brauner@kernel.org> | 18 | Signed-off-by: Christian Brauner <brauner@kernel.org> |
5 | --- | 19 | --- |
6 | include/linux/fs.h | 3 ++- | 20 | include/linux/fs.h | 3 +-- |
7 | 1 file changed, 2 insertions(+), 1 deletion(-) | 21 | 1 file changed, 1 insertion(+), 2 deletions(-) |
8 | 22 | ||
9 | diff --git a/include/linux/fs.h b/include/linux/fs.h | 23 | diff --git a/include/linux/fs.h b/include/linux/fs.h |
10 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/include/linux/fs.h | 25 | --- a/include/linux/fs.h |
12 | +++ b/include/linux/fs.h | 26 | +++ b/include/linux/fs.h |
13 | @@ -XXX,XX +XXX,XX @@ static inline void __sb_end_write(struct super_block *sb, int level) | 27 | @@ -XXX,XX +XXX,XX @@ static inline void __sb_end_write(struct super_block *sb, int level) |
28 | |||
14 | static inline void __sb_start_write(struct super_block *sb, int level) | 29 | static inline void __sb_start_write(struct super_block *sb, int level) |
15 | { | 30 | { |
16 | percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, | 31 | - percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, |
17 | - level == SB_FREEZE_WRITE); | 32 | - level == SB_FREEZE_WRITE); |
18 | + (level == SB_FREEZE_WRITE || | 33 | + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); |
19 | + level == SB_FREEZE_PAGEFAULT)); | ||
20 | } | 34 | } |
21 | 35 | ||
22 | static inline bool __sb_start_write_trylock(struct super_block *sb, int level) | 36 | static inline bool __sb_start_write_trylock(struct super_block *sb, int level) |
23 | 37 | ||
24 | -- | 38 | -- |
25 | 2.47.2 | 39 | 2.47.2 | diff view generated by jsdifflib |
1 | Now all the pieces are in place to actually allow the power subsystem | 1 | Now all the pieces are in place to actually allow the power subsystem |
---|---|---|---|
2 | to freeze/thaw filesystems during suspend/resume. Filesystems are only | 2 | to freeze/thaw filesystems during suspend/resume. Filesystems are only |
3 | frozen and thawed if the power subsystem does actually own the freeze. | 3 | frozen and thawed if the power subsystem does actually own the freeze. |
4 | |||
5 | Othwerwise it risks thawing filesystems it didn't own. This could be | ||
6 | done differently be e.g., keepin the filesystems that were actually | ||
7 | frozen on a list and then unfreezing them from that list. This is | ||
8 | disgustingly unclean though and reeks of an ugly hack. | ||
9 | |||
10 | If the filesystem is already frozen by the time we've frozen all | ||
11 | userspace processes we don't care to freeze it again. That's userspace's | ||
12 | job once the process resumes. We only actually freeze filesystems if we | ||
13 | absolutely have to and we ignore other failures to freeze for now. | ||
14 | 4 | ||
15 | We could bubble up errors and fail suspend/resume if the error isn't | 5 | We could bubble up errors and fail suspend/resume if the error isn't |
16 | EBUSY (aka it's already frozen) but I don't think that this is worth it. | 6 | EBUSY (aka it's already frozen) but I don't think that this is worth it. |
17 | Filesystem freezing during suspend/resume is best-effort. If the user | 7 | Filesystem freezing during suspend/resume is best-effort. If the user |
18 | has 500 ext4 filesystems mounted and 4 fail to freeze for whatever | 8 | has 500 ext4 filesystems mounted and 4 fail to freeze for whatever |
19 | reason then we simply skip them. | 9 | reason then we simply skip them. |
20 | 10 | ||
21 | What we have now is already a big improvement and let's see how we fare | 11 | What we have now is already a big improvement and let's see how we fare |
22 | with it before making our lives even harder (and uglier) than we have | 12 | with it before making our lives even harder (and uglier) than we have |
23 | to. | 13 | to. |
24 | 14 | ||
15 | We add a new sysctl know /sys/power/freeze_filesystems that will allow | ||
16 | userspace to freeze filesystems during suspend/hibernate. For now it | ||
17 | defaults to off. The thaw logic doesn't require checking whether | ||
18 | freezing is enabled because the power subsystem exclusively owns frozen | ||
19 | filesystems for the duration of suspend/hibernate and is able to skip | ||
20 | filesystems it doesn't need to freeze. | ||
21 | |||
22 | Also it is technically possible that filesystem | ||
23 | filesystem_freeze_enabled is true and power freezes the filesystems but | ||
24 | before freezing all processes another process disables | ||
25 | filesystem_freeze_enabled. If power were to place the filesystems_thaw() | ||
26 | call under filesystems_freeze_enabled it would fail to thaw the | ||
27 | fileystems it frozw. The exclusive holder mechanism makes it possible to | ||
28 | iterate through the list without any concern making sure that no | ||
29 | filesystems are left frozen. | ||
30 | |||
25 | Signed-off-by: Christian Brauner <brauner@kernel.org> | 31 | Signed-off-by: Christian Brauner <brauner@kernel.org> |
26 | --- | 32 | --- |
27 | fs/super.c | 14 ++++++++++---- | 33 | fs/super.c | 14 ++++++++++---- |
28 | kernel/power/hibernate.c | 13 ++++++++++++- | 34 | kernel/power/hibernate.c | 16 +++++++++++++++- |
29 | kernel/power/suspend.c | 8 ++++++++ | 35 | kernel/power/main.c | 31 +++++++++++++++++++++++++++++++ |
30 | 3 files changed, 30 insertions(+), 5 deletions(-) | 36 | kernel/power/power.h | 4 ++++ |
37 | kernel/power/suspend.c | 7 +++++++ | ||
38 | 5 files changed, 67 insertions(+), 5 deletions(-) | ||
31 | 39 | ||
32 | diff --git a/fs/super.c b/fs/super.c | 40 | diff --git a/fs/super.c b/fs/super.c |
33 | index XXXXXXX..XXXXXXX 100644 | 41 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/fs/super.c | 42 | --- a/fs/super.c |
35 | +++ b/fs/super.c | 43 | +++ b/fs/super.c |
36 | @@ -XXX,XX +XXX,XX @@ static inline bool get_active_super(struct super_block *sb) | 44 | @@ -XXX,XX +XXX,XX @@ static inline bool get_active_super(struct super_block *sb) |
37 | return active; | 45 | return active; |
38 | } | 46 | } |
39 | 47 | ||
40 | +static const void *filesystems_freeze_ptr; | 48 | +static const char *filesystems_freeze_ptr = "filesystems_freeze"; |
41 | + | 49 | + |
42 | static void filesystems_freeze_callback(struct super_block *sb, void *unused) | 50 | static void filesystems_freeze_callback(struct super_block *sb, void *unused) |
43 | { | 51 | { |
44 | if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) | 52 | if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) |
45 | @@ -XXX,XX +XXX,XX @@ static void filesystems_freeze_callback(struct super_block *sb, void *unused) | 53 | @@ -XXX,XX +XXX,XX @@ static void filesystems_freeze_callback(struct super_block *sb, void *unused) |
46 | return; | 54 | return; |
47 | 55 | ||
48 | if (sb->s_op->freeze_super) | 56 | if (sb->s_op->freeze_super) |
49 | - sb->s_op->freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | 57 | - sb->s_op->freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); |
50 | + sb->s_op->freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, | 58 | + sb->s_op->freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, |
51 | + filesystems_freeze_ptr); | 59 | + filesystems_freeze_ptr); |
52 | else | 60 | else |
53 | - freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | 61 | - freeze_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); |
54 | + freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, | 62 | + freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, |
55 | + filesystems_freeze_ptr); | 63 | + filesystems_freeze_ptr); |
56 | 64 | ||
57 | deactivate_super(sb); | 65 | deactivate_super(sb); |
58 | } | 66 | } |
59 | @@ -XXX,XX +XXX,XX @@ static void filesystems_thaw_callback(struct super_block *sb, void *unused) | 67 | @@ -XXX,XX +XXX,XX @@ static void filesystems_thaw_callback(struct super_block *sb, void *unused) |
60 | return; | 68 | return; |
61 | 69 | ||
62 | if (sb->s_op->thaw_super) | 70 | if (sb->s_op->thaw_super) |
63 | - sb->s_op->thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | 71 | - sb->s_op->thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); |
64 | + sb->s_op->thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, | 72 | + sb->s_op->thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, |
65 | + filesystems_freeze_ptr); | 73 | + filesystems_freeze_ptr); |
66 | else | 74 | else |
67 | - thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL); | 75 | - thaw_super(sb, FREEZE_MAY_NEST | FREEZE_HOLDER_KERNEL, NULL); |
68 | + thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, | 76 | + thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL, |
69 | + filesystems_freeze_ptr); | 77 | + filesystems_freeze_ptr); |
70 | 78 | ||
71 | deactivate_super(sb); | 79 | deactivate_super(sb); |
72 | } | 80 | } |
... | ... | ||
76 | +++ b/kernel/power/hibernate.c | 84 | +++ b/kernel/power/hibernate.c |
77 | @@ -XXX,XX +XXX,XX @@ int hibernate(void) | 85 | @@ -XXX,XX +XXX,XX @@ int hibernate(void) |
78 | goto Restore; | 86 | goto Restore; |
79 | 87 | ||
80 | ksys_sync_helper(); | 88 | ksys_sync_helper(); |
81 | + filesystems_freeze(); | 89 | + if (filesystem_freeze_enabled) |
90 | + filesystems_freeze(); | ||
82 | 91 | ||
83 | error = freeze_processes(); | 92 | error = freeze_processes(); |
84 | if (error) | 93 | if (error) |
85 | @@ -XXX,XX +XXX,XX @@ int hibernate(void) | 94 | @@ -XXX,XX +XXX,XX @@ int hibernate(void) |
86 | error = load_image_and_restore(); | ||
87 | } | ||
88 | thaw_processes(); | ||
89 | + filesystems_thaw(); | ||
90 | |||
91 | /* Don't bother checking whether freezer_test_done is true */ | 95 | /* Don't bother checking whether freezer_test_done is true */ |
92 | freezer_test_done = false; | 96 | freezer_test_done = false; |
97 | Exit: | ||
98 | + filesystems_thaw(); | ||
99 | pm_notifier_call_chain(PM_POST_HIBERNATION); | ||
100 | Restore: | ||
101 | pm_restore_console(); | ||
93 | @@ -XXX,XX +XXX,XX @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) | 102 | @@ -XXX,XX +XXX,XX @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) |
94 | if (error) | 103 | if (error) |
95 | goto restore; | 104 | goto restore; |
96 | 105 | ||
97 | + filesystems_freeze(); | 106 | + if (filesystem_freeze_enabled) |
107 | + filesystems_freeze(); | ||
98 | + | 108 | + |
99 | error = freeze_processes(); | 109 | error = freeze_processes(); |
100 | if (error) | 110 | if (error) |
101 | goto exit; | 111 | goto exit; |
102 | @@ -XXX,XX +XXX,XX @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) | 112 | @@ -XXX,XX +XXX,XX @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) |
... | ... | ||
109 | restore: | 119 | restore: |
110 | @@ -XXX,XX +XXX,XX @@ static int software_resume(void) | 120 | @@ -XXX,XX +XXX,XX @@ static int software_resume(void) |
111 | if (error) | 121 | if (error) |
112 | goto Restore; | 122 | goto Restore; |
113 | 123 | ||
114 | + filesystems_freeze(); | 124 | + if (filesystem_freeze_enabled) |
125 | + filesystems_freeze(); | ||
115 | + | 126 | + |
116 | pm_pr_dbg("Preparing processes for hibernation restore.\n"); | 127 | pm_pr_dbg("Preparing processes for hibernation restore.\n"); |
117 | error = freeze_processes(); | 128 | error = freeze_processes(); |
118 | - if (error) | 129 | - if (error) |
119 | + if (error) { | 130 | + if (error) { |
... | ... | ||
132 | thaw_processes(); | 143 | thaw_processes(); |
133 | + filesystems_thaw(); | 144 | + filesystems_thaw(); |
134 | Finish: | 145 | Finish: |
135 | pm_notifier_call_chain(PM_POST_RESTORE); | 146 | pm_notifier_call_chain(PM_POST_RESTORE); |
136 | Restore: | 147 | Restore: |
148 | diff --git a/kernel/power/main.c b/kernel/power/main.c | ||
149 | index XXXXXXX..XXXXXXX 100644 | ||
150 | --- a/kernel/power/main.c | ||
151 | +++ b/kernel/power/main.c | ||
152 | @@ -XXX,XX +XXX,XX @@ power_attr(pm_freeze_timeout); | ||
153 | |||
154 | #endif /* CONFIG_FREEZER*/ | ||
155 | |||
156 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | ||
157 | +bool filesystem_freeze_enabled = false; | ||
158 | + | ||
159 | +static ssize_t freeze_filesystems_show(struct kobject *kobj, | ||
160 | + struct kobj_attribute *attr, char *buf) | ||
161 | +{ | ||
162 | + return sysfs_emit(buf, "%d\n", filesystem_freeze_enabled); | ||
163 | +} | ||
164 | + | ||
165 | +static ssize_t freeze_filesystems_store(struct kobject *kobj, | ||
166 | + struct kobj_attribute *attr, | ||
167 | + const char *buf, size_t n) | ||
168 | +{ | ||
169 | + unsigned long val; | ||
170 | + | ||
171 | + if (kstrtoul(buf, 10, &val)) | ||
172 | + return -EINVAL; | ||
173 | + | ||
174 | + if (val > 1) | ||
175 | + return -EINVAL; | ||
176 | + | ||
177 | + filesystem_freeze_enabled = !!val; | ||
178 | + return n; | ||
179 | +} | ||
180 | + | ||
181 | +power_attr(freeze_filesystems); | ||
182 | +#endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */ | ||
183 | + | ||
184 | static struct attribute * g[] = { | ||
185 | &state_attr.attr, | ||
186 | #ifdef CONFIG_PM_TRACE | ||
187 | @@ -XXX,XX +XXX,XX @@ static struct attribute * g[] = { | ||
188 | #endif | ||
189 | #ifdef CONFIG_FREEZER | ||
190 | &pm_freeze_timeout_attr.attr, | ||
191 | +#endif | ||
192 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | ||
193 | + &freeze_filesystems_attr.attr, | ||
194 | #endif | ||
195 | NULL, | ||
196 | }; | ||
197 | diff --git a/kernel/power/power.h b/kernel/power/power.h | ||
198 | index XXXXXXX..XXXXXXX 100644 | ||
199 | --- a/kernel/power/power.h | ||
200 | +++ b/kernel/power/power.h | ||
201 | @@ -XXX,XX +XXX,XX @@ struct swsusp_info { | ||
202 | unsigned long size; | ||
203 | } __aligned(PAGE_SIZE); | ||
204 | |||
205 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | ||
206 | +extern bool filesystem_freeze_enabled; | ||
207 | +#endif | ||
208 | + | ||
209 | #ifdef CONFIG_HIBERNATION | ||
210 | /* kernel/power/snapshot.c */ | ||
211 | extern void __init hibernate_reserved_size_init(void); | ||
137 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | 212 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c |
138 | index XXXXXXX..XXXXXXX 100644 | 213 | index XXXXXXX..XXXXXXX 100644 |
139 | --- a/kernel/power/suspend.c | 214 | --- a/kernel/power/suspend.c |
140 | +++ b/kernel/power/suspend.c | 215 | +++ b/kernel/power/suspend.c |
141 | @@ -XXX,XX +XXX,XX @@ | 216 | @@ -XXX,XX +XXX,XX @@ |
... | ... | ||
148 | 223 | ||
149 | @@ -XXX,XX +XXX,XX @@ static int suspend_prepare(suspend_state_t state) | 224 | @@ -XXX,XX +XXX,XX @@ static int suspend_prepare(suspend_state_t state) |
150 | if (error) | 225 | if (error) |
151 | goto Restore; | 226 | goto Restore; |
152 | 227 | ||
153 | + if (sync_on_suspend_enabled) | 228 | + if (filesystem_freeze_enabled) |
154 | + filesystems_freeze(); | 229 | + filesystems_freeze(); |
155 | trace_suspend_resume(TPS("freeze_processes"), 0, true); | 230 | trace_suspend_resume(TPS("freeze_processes"), 0, true); |
156 | error = suspend_freeze_processes(); | 231 | error = suspend_freeze_processes(); |
157 | trace_suspend_resume(TPS("freeze_processes"), 0, false); | 232 | trace_suspend_resume(TPS("freeze_processes"), 0, false); |
158 | @@ -XXX,XX +XXX,XX @@ int suspend_devices_and_enter(suspend_state_t state) | 233 | @@ -XXX,XX +XXX,XX @@ int suspend_devices_and_enter(suspend_state_t state) |
159 | static void suspend_finish(void) | 234 | static void suspend_finish(void) |
160 | { | 235 | { |
161 | suspend_thaw_processes(); | 236 | suspend_thaw_processes(); |
162 | + if (sync_on_suspend_enabled) | 237 | + filesystems_thaw(); |
163 | + filesystems_thaw(); | ||
164 | pm_notifier_call_chain(PM_POST_SUSPEND); | 238 | pm_notifier_call_chain(PM_POST_SUSPEND); |
165 | pm_restore_console(); | 239 | pm_restore_console(); |
166 | } | 240 | } |
167 | @@ -XXX,XX +XXX,XX @@ static int enter_state(suspend_state_t state) | 241 | @@ -XXX,XX +XXX,XX @@ static int enter_state(suspend_state_t state) |
168 | trace_suspend_resume(TPS("sync_filesystems"), 0, true); | ||
169 | ksys_sync_helper(); | 242 | ksys_sync_helper(); |
170 | trace_suspend_resume(TPS("sync_filesystems"), 0, false); | 243 | trace_suspend_resume(TPS("sync_filesystems"), 0, false); |
171 | + filesystems_freeze(); | ||
172 | } | 244 | } |
245 | + if (filesystem_freeze_enabled) | ||
246 | + filesystems_freeze(); | ||
173 | 247 | ||
174 | pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); | 248 | pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); |
249 | pm_suspend_clear_flags(); | ||
175 | @@ -XXX,XX +XXX,XX @@ static int enter_state(suspend_state_t state) | 250 | @@ -XXX,XX +XXX,XX @@ static int enter_state(suspend_state_t state) |
176 | pm_pr_dbg("Finishing wakeup.\n"); | 251 | pm_pr_dbg("Finishing wakeup.\n"); |
177 | suspend_finish(); | 252 | suspend_finish(); |
178 | Unlock: | 253 | Unlock: |
179 | + if (sync_on_suspend_enabled) | 254 | + filesystems_thaw(); |
180 | + filesystems_thaw(); | ||
181 | mutex_unlock(&system_transition_mutex); | 255 | mutex_unlock(&system_transition_mutex); |
182 | return error; | 256 | return error; |
183 | } | 257 | } |
184 | 258 | ||
185 | -- | 259 | -- |
186 | 2.47.2 | 260 | 2.47.2 | diff view generated by jsdifflib |
1 | From: Luis Chamberlain <mcgrof@kernel.org> | 1 | Sysfs is built on top of kernfs and sysfs provides the power management |
---|---|---|---|
2 | infrastructure to support suspend/hibernate by writing to various files | ||
3 | in /sys/power/. As filesystems may be automatically frozen during | ||
4 | suspend/hibernate implementing freeze/thaw support for kernfs | ||
5 | generically will cause deadlocks as the suspending/hibernation | ||
6 | initiating task will hold a VFS lock that it will then wait upon to be | ||
7 | released. If freeze/thaw for kernfs is needed talk to the VFS. | ||
2 | 8 | ||
3 | The kernel power management now supports allowing the VFS | ||
4 | to handle filesystem freezing freezes and thawing. Take advantage | ||
5 | of that and remove the kthread freezing. This is needed so that we | ||
6 | properly really stop IO in flight without races after userspace | ||
7 | has been frozen. Without this we rely on kthread freezing and | ||
8 | its semantics are loose and error prone. | ||
9 | |||
10 | The filesystem therefore is in charge of properly dealing with | ||
11 | quiescing of the filesystem through its callbacks if it thinks | ||
12 | it knows better than how the VFS handles it. | ||
13 | |||
14 | The following Coccinelle rule was used as to remove the now superfluous | ||
15 | freezer calls: | ||
16 | |||
17 | make coccicheck MODE=patch SPFLAGS="--in-place --no-show-diff" COCCI=./fs-freeze-cleanup.cocci M=fs/xfs | ||
18 | |||
19 | virtual patch | ||
20 | |||
21 | @ remove_set_freezable @ | ||
22 | expression time; | ||
23 | statement S, S2; | ||
24 | expression task, current; | ||
25 | @@ | ||
26 | |||
27 | ( | ||
28 | - set_freezable(); | ||
29 | | | ||
30 | - if (try_to_freeze()) | ||
31 | - continue; | ||
32 | | | ||
33 | - try_to_freeze(); | ||
34 | | | ||
35 | - freezable_schedule(); | ||
36 | + schedule(); | ||
37 | | | ||
38 | - freezable_schedule_timeout(time); | ||
39 | + schedule_timeout(time); | ||
40 | | | ||
41 | - if (freezing(task)) { S } | ||
42 | | | ||
43 | - if (freezing(task)) { S } | ||
44 | - else | ||
45 | { S2 } | ||
46 | | | ||
47 | - freezing(current) | ||
48 | ) | ||
49 | |||
50 | @ remove_wq_freezable @ | ||
51 | expression WQ_E, WQ_ARG1, WQ_ARG2, WQ_ARG3, WQ_ARG4; | ||
52 | identifier fs_wq_fn; | ||
53 | @@ | ||
54 | |||
55 | ( | ||
56 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
57 | - WQ_ARG2 | WQ_FREEZABLE, | ||
58 | + WQ_ARG2, | ||
59 | ...); | ||
60 | | | ||
61 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
62 | - WQ_ARG2 | WQ_FREEZABLE | WQ_ARG3, | ||
63 | + WQ_ARG2 | WQ_ARG3, | ||
64 | ...); | ||
65 | | | ||
66 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
67 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE, | ||
68 | + WQ_ARG2 | WQ_ARG3, | ||
69 | ...); | ||
70 | | | ||
71 | WQ_E = alloc_workqueue(WQ_ARG1, | ||
72 | - WQ_ARG2 | WQ_ARG3 | WQ_FREEZABLE | WQ_ARG4, | ||
73 | + WQ_ARG2 | WQ_ARG3 | WQ_ARG4, | ||
74 | ...); | ||
75 | | | ||
76 | WQ_E = | ||
77 | - WQ_ARG1 | WQ_FREEZABLE | ||
78 | + WQ_ARG1 | ||
79 | | | ||
80 | WQ_E = | ||
81 | - WQ_ARG1 | WQ_FREEZABLE | WQ_ARG3 | ||
82 | + WQ_ARG1 | WQ_ARG3 | ||
83 | | | ||
84 | fs_wq_fn( | ||
85 | - WQ_FREEZABLE | WQ_ARG2 | WQ_ARG3 | ||
86 | + WQ_ARG2 | WQ_ARG3 | ||
87 | ) | ||
88 | | | ||
89 | fs_wq_fn( | ||
90 | - WQ_FREEZABLE | WQ_ARG2 | ||
91 | + WQ_ARG2 | ||
92 | ) | ||
93 | | | ||
94 | fs_wq_fn( | ||
95 | - WQ_FREEZABLE | ||
96 | + 0 | ||
97 | ) | ||
98 | ) | ||
99 | |||
100 | @ add_auto_flag @ | ||
101 | expression E1; | ||
102 | identifier fs_type; | ||
103 | @@ | ||
104 | |||
105 | struct file_system_type fs_type = { | ||
106 | .fs_flags = E1 | ||
107 | + | FS_AUTOFREEZE | ||
108 | , | ||
109 | }; | ||
110 | |||
111 | Generated-by: Coccinelle SmPL | ||
112 | Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> | ||
113 | Link: https://lore.kernel.org/r/20250326112220.1988619-7-mcgrof@kernel.org | ||
114 | Signed-off-by: Christian Brauner <brauner@kernel.org> | 9 | Signed-off-by: Christian Brauner <brauner@kernel.org> |
115 | --- | 10 | --- |
116 | fs/xfs/xfs_discard.c | 2 +- | 11 | fs/kernfs/mount.c | 15 +++++++++++++++ |
117 | fs/xfs/xfs_log.c | 3 +-- | 12 | 1 file changed, 15 insertions(+) |
118 | fs/xfs/xfs_log_cil.c | 2 +- | ||
119 | fs/xfs/xfs_mru_cache.c | 2 +- | ||
120 | fs/xfs/xfs_pwork.c | 2 +- | ||
121 | fs/xfs/xfs_super.c | 14 +++++++------- | ||
122 | fs/xfs/xfs_trans_ail.c | 3 --- | ||
123 | fs/xfs/xfs_zone_gc.c | 2 -- | ||
124 | 8 files changed, 12 insertions(+), 18 deletions(-) | ||
125 | 13 | ||
126 | diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c | 14 | diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c |
127 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
128 | --- a/fs/xfs/xfs_discard.c | 16 | --- a/fs/kernfs/mount.c |
129 | +++ b/fs/xfs/xfs_discard.c | 17 | +++ b/fs/kernfs/mount.c |
130 | @@ -XXX,XX +XXX,XX @@ xfs_trim_gather_extents( | 18 | @@ -XXX,XX +XXX,XX @@ const struct super_operations kernfs_sops = { |
131 | static bool | 19 | |
132 | xfs_trim_should_stop(void) | 20 | .show_options = kernfs_sop_show_options, |
133 | { | 21 | .show_path = kernfs_sop_show_path, |
134 | - return fatal_signal_pending(current) || freezing(current); | 22 | + |
135 | + return fatal_signal_pending(current); | 23 | + /* |
136 | } | 24 | + * sysfs is built on top of kernfs and sysfs provides the power |
137 | 25 | + * management infrastructure to support suspend/hibernate by | |
138 | /* | 26 | + * writing to various files in /sys/power/. As filesystems may |
139 | diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c | 27 | + * be automatically frozen during suspend/hibernate implementing |
140 | index XXXXXXX..XXXXXXX 100644 | 28 | + * freeze/thaw support for kernfs generically will cause |
141 | --- a/fs/xfs/xfs_log.c | 29 | + * deadlocks as the suspending/hibernation initiating task will |
142 | +++ b/fs/xfs/xfs_log.c | 30 | + * hold a VFS lock that it will then wait upon to be released. |
143 | @@ -XXX,XX +XXX,XX @@ xlog_alloc_log( | 31 | + * If freeze/thaw for kernfs is needed talk to the VFS. |
144 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ | 32 | + */ |
145 | 33 | + .freeze_fs = NULL, | |
146 | log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", | 34 | + .unfreeze_fs = NULL, |
147 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | | 35 | + .freeze_super = NULL, |
148 | - WQ_HIGHPRI), | 36 | + .thaw_super = NULL, |
149 | + XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_HIGHPRI), | 37 | }; |
150 | 0, mp->m_super->s_id); | 38 | |
151 | if (!log->l_ioend_workqueue) | 39 | static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, |
152 | goto out_free_iclog; | ||
153 | diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c | ||
154 | index XXXXXXX..XXXXXXX 100644 | ||
155 | --- a/fs/xfs/xfs_log_cil.c | ||
156 | +++ b/fs/xfs/xfs_log_cil.c | ||
157 | @@ -XXX,XX +XXX,XX @@ xlog_cil_init( | ||
158 | * concurrency the log spinlocks will be exposed to. | ||
159 | */ | ||
160 | cil->xc_push_wq = alloc_workqueue("xfs-cil/%s", | ||
161 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), | ||
162 | + XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_UNBOUND), | ||
163 | 4, log->l_mp->m_super->s_id); | ||
164 | if (!cil->xc_push_wq) | ||
165 | goto out_destroy_cil; | ||
166 | diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c | ||
167 | index XXXXXXX..XXXXXXX 100644 | ||
168 | --- a/fs/xfs/xfs_mru_cache.c | ||
169 | +++ b/fs/xfs/xfs_mru_cache.c | ||
170 | @@ -XXX,XX +XXX,XX @@ int | ||
171 | xfs_mru_cache_init(void) | ||
172 | { | ||
173 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", | ||
174 | - XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1); | ||
175 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), 1); | ||
176 | if (!xfs_mru_reap_wq) | ||
177 | return -ENOMEM; | ||
178 | return 0; | ||
179 | diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c | ||
180 | index XXXXXXX..XXXXXXX 100644 | ||
181 | --- a/fs/xfs/xfs_pwork.c | ||
182 | +++ b/fs/xfs/xfs_pwork.c | ||
183 | @@ -XXX,XX +XXX,XX @@ xfs_pwork_init( | ||
184 | trace_xfs_pwork_init(mp, nr_threads, current->pid); | ||
185 | |||
186 | pctl->wq = alloc_workqueue("%s-%d", | ||
187 | - WQ_UNBOUND | WQ_SYSFS | WQ_FREEZABLE, nr_threads, tag, | ||
188 | + WQ_UNBOUND | WQ_SYSFS, nr_threads, tag, | ||
189 | current->pid); | ||
190 | if (!pctl->wq) | ||
191 | return -ENOMEM; | ||
192 | diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c | ||
193 | index XXXXXXX..XXXXXXX 100644 | ||
194 | --- a/fs/xfs/xfs_super.c | ||
195 | +++ b/fs/xfs/xfs_super.c | ||
196 | @@ -XXX,XX +XXX,XX @@ xfs_init_mount_workqueues( | ||
197 | struct xfs_mount *mp) | ||
198 | { | ||
199 | mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", | ||
200 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), | ||
201 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), | ||
202 | 1, mp->m_super->s_id); | ||
203 | if (!mp->m_buf_workqueue) | ||
204 | goto out; | ||
205 | |||
206 | mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", | ||
207 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), | ||
208 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), | ||
209 | 0, mp->m_super->s_id); | ||
210 | if (!mp->m_unwritten_workqueue) | ||
211 | goto out_destroy_buf; | ||
212 | |||
213 | mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", | ||
214 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), | ||
215 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), | ||
216 | 0, mp->m_super->s_id); | ||
217 | if (!mp->m_reclaim_workqueue) | ||
218 | goto out_destroy_unwritten; | ||
219 | |||
220 | mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", | ||
221 | - XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), | ||
222 | + XFS_WQFLAGS(WQ_UNBOUND | WQ_MEM_RECLAIM), | ||
223 | 0, mp->m_super->s_id); | ||
224 | if (!mp->m_blockgc_wq) | ||
225 | goto out_destroy_reclaim; | ||
226 | |||
227 | mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", | ||
228 | - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), | ||
229 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), | ||
230 | 1, mp->m_super->s_id); | ||
231 | if (!mp->m_inodegc_wq) | ||
232 | goto out_destroy_blockgc; | ||
233 | |||
234 | mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", | ||
235 | - XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); | ||
236 | + XFS_WQFLAGS(0), 0, mp->m_super->s_id); | ||
237 | if (!mp->m_sync_workqueue) | ||
238 | goto out_destroy_inodegc; | ||
239 | |||
240 | @@ -XXX,XX +XXX,XX @@ xfs_init_workqueues(void) | ||
241 | * max_active value for this workqueue. | ||
242 | */ | ||
243 | xfs_alloc_wq = alloc_workqueue("xfsalloc", | ||
244 | - XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); | ||
245 | + XFS_WQFLAGS(WQ_MEM_RECLAIM), 0); | ||
246 | if (!xfs_alloc_wq) | ||
247 | return -ENOMEM; | ||
248 | |||
249 | diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c | ||
250 | index XXXXXXX..XXXXXXX 100644 | ||
251 | --- a/fs/xfs/xfs_trans_ail.c | ||
252 | +++ b/fs/xfs/xfs_trans_ail.c | ||
253 | @@ -XXX,XX +XXX,XX @@ xfsaild( | ||
254 | unsigned int noreclaim_flag; | ||
255 | |||
256 | noreclaim_flag = memalloc_noreclaim_save(); | ||
257 | - set_freezable(); | ||
258 | |||
259 | while (1) { | ||
260 | /* | ||
261 | @@ -XXX,XX +XXX,XX @@ xfsaild( | ||
262 | |||
263 | __set_current_state(TASK_RUNNING); | ||
264 | |||
265 | - try_to_freeze(); | ||
266 | - | ||
267 | tout = xfsaild_push(ailp); | ||
268 | } | ||
269 | |||
270 | diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c | ||
271 | index XXXXXXX..XXXXXXX 100644 | ||
272 | --- a/fs/xfs/xfs_zone_gc.c | ||
273 | +++ b/fs/xfs/xfs_zone_gc.c | ||
274 | @@ -XXX,XX +XXX,XX @@ xfs_zone_gc_handle_work( | ||
275 | } | ||
276 | |||
277 | __set_current_state(TASK_RUNNING); | ||
278 | - try_to_freeze(); | ||
279 | |||
280 | if (reset_list) | ||
281 | xfs_zone_gc_reset_zones(data, reset_list); | ||
282 | @@ -XXX,XX +XXX,XX @@ xfs_zoned_gcd( | ||
283 | unsigned int nofs_flag; | ||
284 | |||
285 | nofs_flag = memalloc_nofs_save(); | ||
286 | - set_freezable(); | ||
287 | |||
288 | for (;;) { | ||
289 | set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); | ||
290 | 40 | ||
291 | -- | 41 | -- |
292 | 2.47.2 | 42 | 2.47.2 | diff view generated by jsdifflib |