1 结构体示意

此前在虚拟文件系统篇章应该已经列举过一部分结构了, 这里我们继续用一个实例来完成本篇.

这里纠正一下前面的一些概念, 我们编码当中的 sb 指代的是该文件系统的一个实例, 而不是物理意义上的超级块.

类型 表示什么
super_block 一个挂载的文件系统
inode 一个文件或目录
dentry 路径名到 inode 的映射

struct file_system_type.

fs_flags.

标志 作用
FS_REQUIRES_DEV 表示文件系统需要块设备(如 ext4/xfs)
FS_BINARY_MOUNTDATA mount 参数是二进制数据(较少见)
FS_HAS_SUBTYPE 支持子类型,例如 fuse.sshfs
FS_USERNS_MOUNT 允许 user namespace 的 root 挂载
FS_RENAME_DOES_D_MOVE 自己处理 d_move(),VFS 不介入
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// https://elixir.bootlin.com/linux/v4.8/source/include/linux/fs.h#L2020
// VFS 用于描述文件系统的核心结构体
// 用来识别, 注册, 挂载, 卸载各种文件系统
struct file_system_type {
const char *name; // 文件系统名称, 决定 mount 的 -t 参数.
int fs_flags; // 对应下面的这些宏
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *); // 内核受到 mount 时的回调.
void (*kill_sb) (struct super_block *); // umount 时的回调, 常被作为清理函数
struct module *owner; // 模块引用计数, 防止文件系统正在使用时被 rmmod 卸载
struct file_system_type * next; // VFS 利用这个便利所有注册的文件系统
struct hlist_head fs_supers;

// 西面的字段不是给文件系统作者使用的, 不需要手动操作.
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key s_vfs_rename_key;
struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];

struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
};

2 例子

2.1 示例 1

2.1.1 代码

通过该例子, 当我们去 cat 一个文件的时候, 我们会先调用 open, 再调用 read. 如果发现 dentry 不再 cache 当中的时候, 我们会先调用该文件的 lookup 方法.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/mnt_idmapping.h>
#include <linux/string.h>

#define MYFS_MAGIC 0x13131313

#define MYFS_INFO(fmt, ...) \
printk(KERN_INFO "[myfs] " fmt, ##__VA_ARGS__)

struct myfs_file_entry {
const char *name;
const char *content;
};

static struct myfs_file_entry file_entries[] = {
{ "hello", "Hello from myfs!\n" },
{ "foo", "This is foo file.\n" },
{ "bar", "Content of bar file.\n" },
};

static struct inode *myfs_make_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (!inode) {
MYFS_INFO("myfs_make_inode: new_inode() failed\n");
return NULL;
}

inode->i_mode = mode;

inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);

MYFS_INFO("myfs_make_inode: inode=%p mode=%o sb=%p\n",
inode, mode, sb);

return inode;
}

static ssize_t myfs_read(struct file *file, char __user *buf,
size_t len, loff_t *offset)
{
const char *data = file->private_data;
size_t data_len;

if (!data) {
MYFS_INFO("read: file=%p offset=%lld len=%zu, data=NULL\n",
file, *offset, len);
return -EINVAL;
}

data_len = strlen(data);

MYFS_INFO("read: file=%p offset=%lld len=%zu data_len=%zu\n",
file, *offset, len, data_len);

if (*offset >= data_len)
return 0;

if (len > data_len - *offset)
len = data_len - *offset;

if (copy_to_user(buf, data + *offset, len)) {
MYFS_INFO("read: copy_to_user failed\n");
return -EFAULT;
}

*offset += len;
MYFS_INFO("read: done, new offset=%lld, ret=%zu\n", *offset, len);
return len;
}

static int myfs_open(struct inode *inode, struct file *file)
{
MYFS_INFO("open: inode=%p file=%p i_private=%p\n",
inode, file, inode->i_private);

file->private_data = inode->i_private;
return 0;
}


static const struct file_operations myfs_file_ops = {
.owner = THIS_MODULE,
.open = myfs_open,
.read = myfs_read,
.llseek = default_llseek,
};

// lookup 的调用时机是当内核需要解析某个目录下的子项名字,并且该名字的 dentry 还不存在于 dentry cache 中时,就会调用 lookup, 并且只有目录才参与 lookup.
static struct dentry *myfs_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
int i;

MYFS_INFO("lookup: name='%s' dir_inode=%p flags=0x%x\n",
dentry->d_name.name, dir, flags);

for (i = 0; i < ARRAY_SIZE(file_entries); i++) {
if (strcmp(dentry->d_name.name, file_entries[i].name) == 0) {
struct inode *inode;

MYFS_INFO("lookup: hit entry[%d] name='%s'\n",
i, file_entries[i].name);

inode = myfs_make_inode(dir->i_sb, S_IFREG | 0444);
if (!inode) {
MYFS_INFO("lookup: myfs_make_inode failed for '%s'\n",
file_entries[i].name);
return ERR_PTR(-ENOMEM);
}

inode->i_fop = &myfs_file_ops;
inode->i_private = (void *)file_entries[i].content;
inode->i_size = strlen(file_entries[i].content);

MYFS_INFO("lookup: d_add dentry=%p inode=%p size=%lld\n",
dentry, inode, inode->i_size);

d_add(dentry, inode);
return NULL;
}
}

MYFS_INFO("lookup: miss, name='%s' not found\n", dentry->d_name.name);
return ERR_PTR(-ENOENT);
}

// file: 当前目录打开后的 struct file
// ctx: VFS 提供的目录遍历上下文, 通过 ctx->pos 记录当前已遍历位置
static int myfs_iterate(struct file *file, struct dir_context *ctx)
{
int i;

MYFS_INFO("iterate: file=%p pos=%lld\n", file, ctx->pos);

/* 第一次调用时 ctx->pos == 0,先发 "." 和 ".." */
if (ctx->pos == 0) {
MYFS_INFO("iterate: emit dots (.) and (..)\n");
if (!dir_emit_dots(file, ctx))
return 0;
// dir_emit_dots 会自己更新 ctx->pos 到 2
}

/* ctx->pos >= 2 时,已经发过 "." 和 ".." 了
* 我们用 (ctx->pos - 2) 作为 file_entries 的起始下标
*/
for (i = ctx->pos - 2; i < ARRAY_SIZE(file_entries); i++) {
MYFS_INFO("iterate: emit entry[%d] name='%s'\n",
i, file_entries[i].name);

if (!dir_emit(ctx,
file_entries[i].name,
strlen(file_entries[i].name),
i + 1,
DT_REG)) {
MYFS_INFO("iterate: dir_emit stopped at i=%d\n", i);
break;
}
ctx->pos++;
}

MYFS_INFO("iterate: done, final pos=%lld\n", ctx->pos);
return 0;
}

static const struct inode_operations myfs_dir_inode_ops = {
.lookup = myfs_lookup, // 当 cat /mnt/myfs/foo 时会调用目录 inode 的 lookup
};

static const struct file_operations myfs_dir_ops = {
.owner = THIS_MODULE,
.iterate_shared = myfs_iterate, // 当 ls 时调用, 展示目录内容(普通文件不会触发).
};

static const struct super_operations myfs_super_ops = {
};

static int myfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;

MYFS_INFO("fill_super: sb=%p, data=%p, silent=%d\n", sb, data, silent);

sb->s_magic = MYFS_MAGIC;
sb->s_op = &myfs_super_ops;

MYFS_INFO("fill_super: creating root inode\n");

inode = myfs_make_inode(sb, S_IFDIR | 0755);
if (!inode) {
MYFS_INFO("fill_super: myfs_make_inode for root failed\n");
return -ENOMEM;
}

inode->i_op = &myfs_dir_inode_ops;
inode->i_fop = &myfs_dir_ops;

// 创建根 dentry
sb->s_root = d_make_root(inode);
if (!sb->s_root) {
MYFS_INFO("fill_super: d_make_root failed\n");
return -ENOMEM;
}

MYFS_INFO("fill_super: success, root dentry=%p\n", sb->s_root);
return 0;
}

static struct dentry *myfs_mount(struct file_system_type *type,
int flags, const char *dev, void *data)
{
MYFS_INFO("mount: type=%s flags=0x%x dev=%s data=%p\n",
type->name, flags, dev ? dev : "none", data);
return mount_nodev(type, flags, data, myfs_fill_super);
}

static struct file_system_type myfs_type = {
.owner = THIS_MODULE,
.name = "myfs",
.mount = myfs_mount,
.kill_sb = kill_litter_super,
};

static int __init myfs_init(void)
{
int ret;

MYFS_INFO("init: register_filesystem\n");
ret = register_filesystem(&myfs_type);
if (ret != 0)
MYFS_INFO("init: register_filesystem failed, ret=%d\n", ret);
else
MYFS_INFO("init: register_filesystem success\n");

return ret;
}

static void __exit myfs_exit(void)
{
int ret;

MYFS_INFO("exit: unregister_filesystem\n");
ret = unregister_filesystem(&myfs_type);
MYFS_INFO("exit: unregister_filesystem ret=%d\n", ret);
}

module_init(myfs_init);
module_exit(myfs_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jvle");
MODULE_DESCRIPTION("simple myfs for learning with debug logs");

以下是我执行 demo 时候的一些输出.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# insmod myfs.ko
[ 1155.011520] [myfs] init: register_filesystem
[ 1155.011609] [myfs] init: register_filesystem success

# cat /mnt/myfs/foo
[ 1164.253981] [myfs] mount: type=myfs flags=0x0 dev=none data=0000000000000000
[ 1164.254171] [myfs] fill_super: sb=00000000a346e5b1, data=0000000000000000, silent=0
[ 1164.254208] [myfs] fill_super: creating root inode
[ 1164.254254] [myfs] myfs_make_inode: inode=00000000ccddb8b1 mode=40755 sb=00000000a346e5b1
[ 1164.254344] [myfs] fill_super: success, root dentry=00000000e6ffdb1a

# cat /mnt/myfs/bar
[ 1188.322120] [myfs] lookup: name='foo' dir_inode=00000000ccddb8b1 flags=0x101
[ 1188.322210] [myfs] lookup: hit entry[1] name='foo'
[ 1188.322262] [myfs] myfs_make_inode: inode=000000002149c4e2 mode=100444 sb=00000000a346e5b1
[ 1188.322296] [myfs] lookup: d_add dentry=00000000ce365910 inode=000000002149c4e2 size=18
[ 1188.322363] [myfs] open: inode=000000002149c4e2 file=000000008d707dd7 i_private=00000000408c391e
[ 1188.323442] [myfs] read: file=000000008d707dd7 offset=0 len=131072 data_len=18
[ 1188.323603] [myfs] read: done, new offset=18, ret=18
[ 1188.323920] [myfs] read: file=000000008d707dd7 offset=18 len=131072 data_len=18
[ 1195.071079] [myfs] lookup: name='bar' dir_inode=00000000ccddb8b1 flags=0x101
[ 1195.071202] [myfs] lookup: hit entry[2] name='bar'
[ 1195.071240] [myfs] myfs_make_inode: inode=0000000077260ece mode=100444 sb=00000000a346e5b1
[ 1195.071256] [myfs] lookup: d_add dentry=00000000781e22fa inode=0000000077260ece size=21
[ 1195.071298] [myfs] open: inode=0000000077260ece file=0000000083c2f9e6 i_private=000000005674d64f
[ 1195.072136] [myfs] read: file=0000000083c2f9e6 offset=0 len=131072 data_len=21
[ 1195.072194] [myfs] read: done, new offset=21, ret=21
[ 1195.072509] [myfs] read: file=0000000083c2f9e6 offset=21 len=131072 data_len=21

# cat /mnt/myfs/bar
[ 1200.059113] [myfs] open: inode=0000000077260ece file=00000000606683ec i_private=000000005674d64f
[ 1200.059981] [myfs] read: file=00000000606683ec offset=0 len=131072 data_len=21
[ 1200.060064] [myfs] read: done, new offset=21, ret=21
[ 1200.060348] [myfs] read: file=00000000606683ec offset=21 len=131072 data_len=21

# ls /mnt/myfs/
[ 1204.289066] [myfs] iterate: file=0000000083c2f9e6 pos=0
[ 1204.289142] [myfs] iterate: emit dots (.) and (..)
[ 1204.289255] [myfs] iterate: emit entry[0] name='hello'
[ 1204.289308] [myfs] iterate: emit entry[1] name='foo'
[ 1204.289320] [myfs] iterate: emit entry[2] name='bar'
[ 1204.289341] [myfs] iterate: done, final pos=5
[ 1204.289403] [myfs] iterate: file=0000000083c2f9e6 pos=5
[ 1204.289426] [myfs] iterate: done, final pos=5
[ 1204.614005] [myfs] iterate: file=0000000083c2f9e6 pos=0
[ 1204.614076] [myfs] iterate: emit dots (.) and (..)
[ 1204.614090] [myfs] iterate: emit entry[0] name='hello'
[ 1204.614105] [myfs] iterate: emit entry[1] name='foo'
[ 1204.614114] [myfs] iterate: emit entry[2] name='bar'
[ 1204.614123] [myfs] iterate: done, final pos=5
[ 1204.614158] [myfs] iterate: file=0000000083c2f9e6 pos=5
[ 1204.614168] [myfs] iterate: done, final pos=5
[ 1204.614634] [myfs] lookup: name='hello' dir_inode=00000000ccddb8b1 flags=0x1
[ 1204.614656] [myfs] lookup: hit entry[0] name='hello'
[ 1204.614680] [myfs] myfs_make_inode: inode=00000000a746913e mode=100444 sb=00000000a346e5b1
[ 1204.614694] [myfs] lookup: d_add dentry=00000000f34d4f02 inode=00000000a746913e size=17

2.1.2 示例 1 分析

当我执行以下的指令的时候.

1
mount -t myfs none /mnt/myfs

此时 VFS 会调用到 myfs_mount.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static struct dentry *myfs_mount(struct file_system_type *type,
int flags, const char *dev, void *data)
{
// https://elixir.bootlin.com/linux/v4.8/source/include/linux/fs.h#L2056
// mount_nodev 表示无块设备文件系统, tmpfs, proc, sysfs 都是这种类型.
// 除此之外还有 mount_bdev(块设备的文件系统), mount_pseudo(伪文件系统,且无 VFS 查找语义), mount_single(单实例 superblock 的文件系统), mount_fs(最通用, 但是一般文件系统作者不会直接用), mount_subtree(在已有 superblock 上挂载子目录)...
// 该函数帮助分配 super_block, 设置 s_type, s_flags, s_op.
// 调用设置的回调函数
// 创建/返回 root dentry
/*
* extern struct dentry *mount_nodev(struct file_system_type *fs_type,
* int flags, void *data,
* int (*fill_super)(struct super_block *, void *, int));
*/
return mount_nodev(type, flags, data, myfs_fill_super);
}

2.2 示例 2

2.2.1 代码

我们此时新增了一个新的子目录 sub, 还有新的文件 sub_hello. 可以看到, 我们在初始化的时候加入了 root 的目录, 之后的子集目录和普通文件在 lookup 中完成就好, 因为每次遍历相关文件和目录, 如果发现找不到就会调用父目录的 lookup, 当然我们也可以一开始就初始化完成.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/mnt_idmapping.h>
#include <linux/string.h>
#include <linux/jiffies.h> // 为了 jiffies
#include <linux/sched.h> // 为了 current->pid / current->comm

#define MYFS_MAGIC 0x13131313

#define MYFS_INFO(fmt, ...) \
printk(KERN_INFO "[myfs] " fmt, ##__VA_ARGS__)

struct myfs_file_entry {
const char *name;
const char *content; // 静态文件内容
};

/* 根目录下的静态文件 */
static struct myfs_file_entry file_entries[] = {
{ "hello", "Hello from myfs!\n" },
{ "foo", "This is foo file.\n" },
{ "bar", "Content of bar file.\n" },
};

/* 子目录 sub/ 下的静态文件 */
static struct myfs_file_entry sub_entries[] = {
{ "sub_hello", "Hello from sub directory!\n" },
};

static struct inode *myfs_make_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (!inode) {
MYFS_INFO("myfs_make_inode: new_inode() failed\n");
return NULL;
}

inode->i_mode = mode;

inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);

MYFS_INFO("myfs_make_inode: inode=%p mode=%o sb=%p\n",
inode, mode, sb);

return inode;
}

/* ---------- 普通静态文件的 read ---------- */

static ssize_t myfs_read(struct file *file, char __user *buf,
size_t len, loff_t *offset)
{
const char *data = file->private_data;
size_t data_len;

if (!data) {
MYFS_INFO("read: file=%p offset=%lld len=%zu, data=NULL\n",
file, *offset, len);
return -EINVAL;
}

data_len = strlen(data);

MYFS_INFO("read: file=%p offset=%lld len=%zu data_len=%zu\n",
file, *offset, len, data_len);

if (*offset >= data_len)
return 0;

if (len > data_len - *offset)
len = data_len - *offset;

if (copy_to_user(buf, data + *offset, len)) {
MYFS_INFO("read: copy_to_user failed\n");
return -EFAULT;
}

*offset += len;
MYFS_INFO("read: done, new offset=%lld, ret=%zu\n", *offset, len);
return len;
}

static int myfs_open(struct inode *inode, struct file *file)
{
MYFS_INFO("open: inode=%p file=%p i_private=%p\n",
inode, file, inode->i_private);

file->private_data = inode->i_private;
return 0;
}

static const struct file_operations myfs_file_ops = {
.owner = THIS_MODULE,
.open = myfs_open,
.read = myfs_read,
.llseek = default_llseek,
};

/* ---------- 动态文件 "info" 的 read:每次 read 现拼内容 ---------- */

static ssize_t myfs_info_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
char kbuf[128];
int n;

n = scnprintf(kbuf, sizeof(kbuf),
"jiffies=%lu\npid=%d\ncomm=%s\n",
jiffies, current->pid, current->comm);

MYFS_INFO("info_read: file=%p offset=%lld len=%zu gen_len=%d\n",
file, *ppos, len, n);

if (*ppos >= n)
return 0;

if (len > n - *ppos)
len = n - *ppos;

if (copy_to_user(buf, kbuf + *ppos, len)) {
MYFS_INFO("info_read: copy_to_user failed\n");
return -EFAULT;
}

*ppos += len;
return len;
}

static const struct file_operations myfs_info_file_ops = {
.owner = THIS_MODULE,
.read = myfs_info_read,
.llseek = default_llseek,
/* 不需要 open,直接 read 就行 */
};

/* ---------- 子目录 sub/ 的 lookup 与 iterate ---------- */

static struct dentry *myfs_sub_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags)
{
int i;
const char *name = dentry->d_name.name;

MYFS_INFO("sub_lookup: name='%s' dir_inode=%p flags=0x%x\n",
name, dir, flags);

for (i = 0; i < ARRAY_SIZE(sub_entries); i++) {
if (strcmp(name, sub_entries[i].name) == 0) {
struct inode *inode;

MYFS_INFO("sub_lookup: hit entry[%d] name='%s'\n",
i, sub_entries[i].name);

inode = myfs_make_inode(dir->i_sb, S_IFREG | 0444);
if (!inode) {
MYFS_INFO("sub_lookup: myfs_make_inode failed for '%s'\n",
sub_entries[i].name);
return ERR_PTR(-ENOMEM);
}

inode->i_fop = &myfs_file_ops;
inode->i_private = (void *)sub_entries[i].content;
inode->i_size = strlen(sub_entries[i].content);

MYFS_INFO("sub_lookup: d_add dentry=%p inode=%p size=%lld\n",
dentry, inode, inode->i_size);

d_add(dentry, inode);
return NULL;
}
}

MYFS_INFO("sub_lookup: miss, name='%s' not found\n", name);
return ERR_PTR(-ENOENT);
}

static int myfs_sub_iterate(struct file *file, struct dir_context *ctx)
{
int i;

MYFS_INFO("sub_iterate: file=%p pos=%lld\n", file, ctx->pos);

if (ctx->pos == 0) {
MYFS_INFO("sub_iterate: emit dots (.) and (..)\n");
if (!dir_emit_dots(file, ctx))
return 0;
/* 这里 dir_emit_dots 会把 pos 调整到 2 */
}

while (ctx->pos >= 2 &&
ctx->pos < 2 + ARRAY_SIZE(sub_entries)) {

i = ctx->pos - 2;
MYFS_INFO("sub_iterate: emit sub entry[%d] name='%s'\n",
i, sub_entries[i].name);

if (!dir_emit(ctx,
sub_entries[i].name,
strlen(sub_entries[i].name),
i + 1,
DT_REG)) {
MYFS_INFO("sub_iterate: dir_emit stopped at i=%d\n", i);
return 0;
}
ctx->pos++;
}

MYFS_INFO("sub_iterate: done, final pos=%lld\n", ctx->pos);
return 0;
}

static const struct inode_operations myfs_sub_dir_inode_ops = {
.lookup = myfs_sub_lookup,
};

static const struct file_operations myfs_sub_dir_ops = {
.owner = THIS_MODULE,
.iterate_shared = myfs_sub_iterate,
};

/* ---------- 根目录的 lookup:hello/foo/bar/info/sub ---------- */

static struct dentry *myfs_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
int i;
const char *name = dentry->d_name.name;

MYFS_INFO("lookup: name='%s' dir_inode=%p flags=0x%x\n",
name, dir, flags);

/* 1) 静态文件 hello / foo / bar */
for (i = 0; i < ARRAY_SIZE(file_entries); i++) {
if (strcmp(name, file_entries[i].name) == 0) {
struct inode *inode;

MYFS_INFO("lookup: hit static entry[%d] name='%s'\n",
i, file_entries[i].name);

inode = myfs_make_inode(dir->i_sb, S_IFREG | 0444);
if (!inode) {
MYFS_INFO("lookup: myfs_make_inode failed for '%s'\n",
file_entries[i].name);
return ERR_PTR(-ENOMEM);
}

inode->i_fop = &myfs_file_ops;
inode->i_private = (void *)file_entries[i].content;
inode->i_size = strlen(file_entries[i].content);

MYFS_INFO("lookup: d_add dentry=%p inode=%p size=%lld\n",
dentry, inode, inode->i_size);

d_add(dentry, inode);
return NULL;
}
}

/* 2) 动态文件 info */
if (strcmp(name, "info") == 0) {
struct inode *inode;

MYFS_INFO("lookup: hit dynamic entry 'info'\n");

inode = myfs_make_inode(dir->i_sb, S_IFREG | 0444);
if (!inode) {
MYFS_INFO("lookup: myfs_make_inode failed for 'info'\n");
return ERR_PTR(-ENOMEM);
}

inode->i_fop = &myfs_info_file_ops;
inode->i_size = 0; // 动态内容,无固定 size

MYFS_INFO("lookup: d_add dentry=%p inode=%p (info)\n",
dentry, inode);

d_add(dentry, inode);
return NULL;
}

/* 3) 子目录 sub */
if (strcmp(name, "sub") == 0) {
struct inode *inode;

MYFS_INFO("lookup: hit sub directory 'sub'\n");

inode = myfs_make_inode(dir->i_sb, S_IFDIR | 0755);
if (!inode) {
MYFS_INFO("lookup: myfs_make_inode failed for 'sub'\n");
return ERR_PTR(-ENOMEM);
}

inode->i_op = &myfs_sub_dir_inode_ops;
inode->i_fop = &myfs_sub_dir_ops;

MYFS_INFO("lookup: d_add dentry=%p inode=%p (sub dir)\n",
dentry, inode);

d_add(dentry, inode);
return NULL;
}

/* 4) 其他名字不存在 */
MYFS_INFO("lookup: miss, name='%s' not found\n", name);
return ERR_PTR(-ENOENT);
}

/* ---------- 根目录 iterate:ls /mnt/myfs ---------- */

static int myfs_iterate(struct file *file, struct dir_context *ctx)
{
int i;

MYFS_INFO("iterate: file=%p pos=%lld\n", file, ctx->pos);

if (ctx->pos == 0) {
MYFS_INFO("iterate: emit dots (.) and (..)\n");
if (!dir_emit_dots(file, ctx))
return 0;
/* emit dots 后 pos 通常会变成 2 */
}

/* 先发静态文件 hello/foo/bar */
while (ctx->pos >= 2 &&
ctx->pos < 2 + ARRAY_SIZE(file_entries)) {

i = ctx->pos - 2;
MYFS_INFO("iterate: emit static entry[%d] name='%s'\n",
i, file_entries[i].name);

if (!dir_emit(ctx,
file_entries[i].name,
strlen(file_entries[i].name),
i + 1, // 伪 inode 号
DT_REG)) {
MYFS_INFO("iterate: dir_emit stopped at static i=%d\n", i);
return 0;
}
ctx->pos++;
}

/* 然后发动态文件 info */
if (ctx->pos == 2 + ARRAY_SIZE(file_entries)) {
MYFS_INFO("iterate: emit dynamic entry 'info'\n");
if (dir_emit(ctx,
"info",
strlen("info"),
ARRAY_SIZE(file_entries) + 1,
DT_REG)) {
ctx->pos++;
} else {
return 0;
}
}

/* 最后发子目录 sub */
if (ctx->pos == 2 + ARRAY_SIZE(file_entries) + 1) {
MYFS_INFO("iterate: emit sub directory 'sub'\n");
if (dir_emit(ctx,
"sub",
strlen("sub"),
ARRAY_SIZE(file_entries) + 2,
DT_DIR)) {
ctx->pos++;
} else {
return 0;
}
}

MYFS_INFO("iterate: done, final pos=%lld\n", ctx->pos);
return 0;
}

/* ---------- 目录 inode / file 操作表 ---------- */

static const struct inode_operations myfs_dir_inode_ops = {
.lookup = myfs_lookup, // 根目录的 lookup
};

static const struct file_operations myfs_dir_ops = {
.owner = THIS_MODULE,
.iterate_shared = myfs_iterate, // 根目录的 iterate
};

static const struct super_operations myfs_super_ops = {
};

static int myfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;

MYFS_INFO("fill_super: sb=%p, data=%p, silent=%d\n", sb, data, silent);

sb->s_magic = MYFS_MAGIC;
sb->s_op = &myfs_super_ops;

MYFS_INFO("fill_super: creating root inode\n");

inode = myfs_make_inode(sb, S_IFDIR | 0755);
if (!inode) {
MYFS_INFO("fill_super: myfs_make_inode for root failed\n");
return -ENOMEM;
}

inode->i_op = &myfs_dir_inode_ops;
inode->i_fop = &myfs_dir_ops;

sb->s_root = d_make_root(inode);
if (!sb->s_root) {
MYFS_INFO("fill_super: d_make_root failed\n");
return -ENOMEM;
}

MYFS_INFO("fill_super: success, root dentry=%p\n", sb->s_root);
return 0;
}

static struct dentry *myfs_mount(struct file_system_type *type,
int flags, const char *dev, void *data)
{
MYFS_INFO("mount: type=%s flags=0x%x dev=%s data=%p\n",
type->name, flags, dev ? dev : "none", data);
return mount_nodev(type, flags, data, myfs_fill_super);
}

static struct file_system_type myfs_type = {
.owner = THIS_MODULE,
.name = "myfs",
.mount = myfs_mount,
.kill_sb = kill_litter_super,
};

static int __init myfs_init(void)
{
int ret;

MYFS_INFO("init: register_filesystem\n");
ret = register_filesystem(&myfs_type);
if (ret != 0)
MYFS_INFO("init: register_filesystem failed, ret=%d\n", ret);
else
MYFS_INFO("init: register_filesystem success\n");

return ret;
}

static void __exit myfs_exit(void)
{
int ret;

MYFS_INFO("exit: unregister_filesystem\n");
ret = unregister_filesystem(&myfs_type);
MYFS_INFO("exit: unregister_filesystem ret=%d\n", ret);
}

module_init(myfs_init);
module_exit(myfs_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jvle");
MODULE_DESCRIPTION("simple myfs with root files, dynamic info, and subdir");

2.3 示例 3

之前我们的文件系统都是虚拟的, 不能够持久化, 现在我们想要真实落地一个文件系统.

TODO.

3 References

  1. https://www.cnblogs.com/orange-CC/p/12720341.html