1. tmpfs 内存简介
- 首先它是个文件系统
- 但是它的文件数据是完全存放在内存里面的,不在磁盘上
2. tmpfs 文件系统的实现
2.1. virtual file system 接口定义
- 一个是 struct file_operations:文件读写的接口
- 一个是 struct inode_operations:inode操作接口
struct inode_operations {
int (*create) (struct user_namespace *, struct inode *,struct dentry *, umode_t, bool);
int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,const char *);
int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
/* 省略一万字 */
};
struct file_operations {
int (*open) (struct inode *, struct file *);
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
/* 省略一万字 */
};
static const struct file_operations shmem_file_operations = { /* 普通文件的读、写、seek、fsync */
.mmap = shmem_mmap,
.get_unmapped_area = shmem_get_unmapped_area,
#ifdef CONFIG_TMPFS
.llseek = shmem_file_llseek,
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif
};
static const struct inode_operations shmem_inode_operations = { /* 这个是针对普通文件的 inode 操作 */
.getattr = shmem_getattr,
.setattr = shmem_setattr,
};
static const struct inode_operations shmem_dir_inode_operations = { /* 这个是针对目录的 inode 操作 */
#ifdef CONFIG_TMPFS
.create = shmem_create,
.lookup = simple_lookup,
.link = shmem_link,
.unlink = shmem_unlink,
.symlink = shmem_symlink,
.mkdir = shmem_mkdir,
.rmdir = shmem_rmdir,
.mknod = shmem_mknod,
.rename = shmem_rename2,
.tmpfile = shmem_tmpfile,
#endif
};
2.2. 文件创建过程
static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
return shmem_mknod(dir, dentry, mode | S_IFREG, 0); /* S_IFREG 表明是创建的文件,如果是目录,那就是 S_IFDIR */
}
/*
* File creation. Allocate an inode, and we're done..
*/
static int
shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
int error = -ENOSPC;
/* 这里先 new 一个新的 inode,然后挂到 sb 里面管理起来,sb的数据也完全是在内存里面 */
inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
error = simple_acl_create(dir, inode);
if (error)
goto out_iput;
error = security_inode_init_security(inode, dir,
&dentry->d_name,
shmem_initxattrs, NULL);
if (error && error != -EOPNOTSUPP)
goto out_iput;
error = 0;
/* 目录的 size,其实就是目录下所有 inode 空间的总和
* 注意:目录也是一个特殊的文件,文件的内容存储的是子目录的索引 */
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = current_time(dir);
/* 把新建的 inode 和 dentry 关联起来 */
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
}
return error;
out_iput:
iput(inode);
return error;
}
2.3. 文件读写过程
static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
/* ... */
index = *ppos >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK;
for (;;) {
/* ... */
error = shmem_getpage(inode, index, &page, sgp);
if (error) {
if (error == -EINVAL)
error = 0;
break;
}
/* ... */
/*
* Ok, we have the page, and it's up-to-date, so
* now we can copy it to user space...
*/
ret = copy_page_to_iter(page, offset, nr, to);
/* ... */
}
/* ... */
}
3. tmpfs 内存的内核态管理
3.1. tmpfs 内存限制
3.2. tmpfs 内存统计
- page cache
- shmem
cat /sys/fs/cgroup/memory/test/memory.stat cache 104890368 rss 12288 rss_huge 0 shmem 104755200 mapped_file 0 dirty 0 writeback 0 pgpgin 25905 pgpgout 284
static int memory_stat_show(struct seq_file *m, void *v)
{
/* ... */
seq_printf(m, "anon %llu\n",
(u64)stat[MEMCG_RSS] * PAGE_SIZE);
seq_printf(m, "file %llu\n",
(u64)stat[MEMCG_CACHE] * PAGE_SIZE);
seq_printf(m, "shmem %llu\n",
(u64)stat[NR_SHMEM] * PAGE_SIZE); // 这里
seq_printf(m, "file_mapped %llu\n",
(u64)stat[NR_FILE_MAPPED] * PAGE_SIZE);
seq_printf(m, "file_dirty %llu\n",
(u64)stat[NR_FILE_DIRTY] * PAGE_SIZE);
seq_printf(m, "file_writeback %llu\n",
(u64)stat[NR_WRITEBACK] * PAGE_SIZE);
/* ... */
}
/*
* Like add_to_page_cache_locked, but error if expected item has gone.
*/
static int shmem_add_to_page_cache(struct page *page,
struct address_space *mapping,
pgoff_t index, void *expected)
{
int error, nr = hpage_nr_pages(page);
/* ... */
if (!error) {
mapping->nrpages += nr;
if (PageTransHuge(page))
__inc_node_page_state(page, NR_SHMEM_THPS);
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr); /* 这里 */
spin_unlock_irq(&mapping->tree_lock);
} else {
page->mapping = NULL;
spin_unlock_irq(&mapping->tree_lock);
page_ref_sub(page, nr);
}
return error;
}
3.3. tmpfs 内存的分配和回收(page cache回收)
static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
struct inode *inode,
pgoff_t index, bool huge)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct page *page;
int nr;
int err = -ENOSPC;
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
huge = false;
nr = huge ? HPAGE_PMD_NR : 1;
if (!shmem_inode_acct_block(inode, nr))
goto failed;
if (huge)
page = shmem_alloc_hugepage(gfp, info, index);
else
page = shmem_alloc_page(gfp, info, index);
if (page) { /* 这里 */
__SetPageLocked(page);
__SetPageSwapBacked(page);
return page;
}
err = -ENOMEM;
shmem_inode_unacct_blocks(inode, nr);
failed:
return ERR_PTR(err);
}
- PAGE_KEEP:写page失败
- PAGE_ACTIVATE:表示page需要迁移回到活跃LRU链表中
- PAGE_SUCCESS:表示 page 已经成功写入存储设备
- PAGE_CLEAN:表示 page 已经是干净的,可以释放