詳解如何在內核中操作某個文件？

作者：土豆居士 2021-03-08 11:47:34

系統 Linux

有粉絲問我如何在內核中操作某個文件？本篇就這個問題給大家詳細介紹一下，希望能夠幫助到你！

[[386034]]

一、問題描述

如何在內核中操作某個文件?

問題

二、操作函數

1. 分析

在用戶態，讀寫文件可以通過read和write這兩個系統調用來完成(C庫函數實際上是對系統調用的封裝)。但是，在內核態沒有這樣的系統調用，我們又該如何讀寫文件呢?

閱讀Linux內核源碼，可以知道陷入內核執行的是實際執行的是sys_read和sys_write這兩個函數，但是這兩個函數沒有使用EXPORT_SYMBOL導出，也就是說其他模塊不能使用。

在fs/open.c中系統調用具體實現如下(內核版本3.14)：

SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) 
{ 
 if (force_o_largefile()) 
  flags |= O_LARGEFILE; 
 
 return do_sys_open(AT_FDCWD, filename, flags, mode); 
}

跟蹤do_sys_open()函數，

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) 
{ 
 struct open_flags op; 
 int fd = build_open_flags(flags, mode, &op); 
 struct filename *tmp; 
 
 if (fd) 
  return fd; 
 
 tmp = getname(filename); 
 if (IS_ERR(tmp)) 
  return PTR_ERR(tmp); 
 
 fd = get_unused_fd_flags(flags); 
 if (fd >= 0) { 
  struct file *f = do_filp_open(dfd, tmp, &op); 
  if (IS_ERR(f)) { 
   put_unused_fd(fd); 
   fd = PTR_ERR(f); 
  } else { 
   fsnotify_open(f); 
   fd_install(fd, f); 
  } 
 } 
 putname(tmp); 
 return fd; 
}

就會發現它主要使用了do_filp_open()函數該函數在fs/namei.c中，

struct file *do_filp_open(int dfd, struct filename *pathname, 
  const struct open_flags *op) 
{ 
 struct nameidata nd; 
 int flags = op->lookup_flags; 
 struct file *filp; 
 
 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); 
 if (unlikely(filp == ERR_PTR(-ECHILD))) 
  filp = path_openat(dfd, pathname, &nd, op, flags); 
 if (unlikely(filp == ERR_PTR(-ESTALE))) 
  filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); 
 return filp; 
}

該函數最終打開了文件，并返回file類型指針。所以我們只需要找到其他調用了do_filp_open()函數的地方，就可找到我們需要的文件操作函數。

而在文件fs/open.c中，filp_open函數也是調用了file_open_name函數，

/** 
 * filp_open - open file and return file pointer 
 * 
 * @filename: path to open 
 * @flags: open flags as per the open(2) second argument 
 * @mode: mode for the new file if O_CREAT is set, else ignored 
 * 
 * This is the helper to open a file from kernelspace if you really 
 * have to.  But in generally you should not do this, so please move 
 * along, nothing to see here.. 
 */ 
struct file *filp_open(const char *filename, int flags, umode_t mode) 
{ 
 struct filename name = {.name = filename}; 
 return file_open_name(&name, flags, mode); 
} 
EXPORT_SYMBOL(filp_open);

函數file_open_name調用了do_filp_open，并且接口和sys_open函數極為相似，調用參數也和sys_open一樣，并且使用EXPORT_SYMBOL導出了，所以在內核中可以使用該函數打開文件，功能非常類似于應用層的open。

/** 
 * file_open_name - open file and return file pointer 
 * 
 * @name: struct filename containing path to open 
 * @flags: open flags as per the open(2) second argument 
 * @mode: mode for the new file if O_CREAT is set, else ignored 
 * 
 * This is the helper to open a file from kernelspace if you really 
 * have to.  But in generally you should not do this, so please move 
 * along, nothing to see here.. 
 */ 
struct file *file_open_name(struct filename *name, int flags, umode_t mode) 
{ 
 struct open_flags op; 
 int err = build_open_flags(flags, mode, &op); 
 return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); 
}

2. 所有操作函數

使用同樣的方法，找出了一組在內核操作文件的函數，如下：

這些函數的參數非常類似于應用層文件IO函數，open、read、write、close。

3. 用戶空間地址

雖然我們找到了這些函數，但是我們還不能直接使用。

因為在vfs_read和vfs_write函數中，其參數buf指向的用戶空間的內存地址，如果我們直接使用內核空間的指針，則會返回-EFALUT。

這是因為使用的緩沖區超過了用戶空間的地址范圍。一般系統調用會要求你使用的緩沖區不能在內核區。這個可以用set_fs()、get_fs()來解決。

在include/asm/uaccess.h中，有如下定義：

#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) 
#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) 
#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) 
#define get_ds() (KERNEL_DS) 
#define get_fs() (current->addr_limit) 
#define set_fs(x) (current->addr_limit = (x))

如果使用，可以按照如下順序執行：

mm_segment_t fs = get_fs(); 
set_fs(KERNEL_FS); 
//vfs_write(); 
//vfs_read(); 
set_fs(fs);

詳解：系統調用本來是提供給用戶空間的程序訪問的，所以，對傳遞給它的參數(比如上面的buf)，它默認會認為來自用戶空間，在read或write()函數中，為了保護內核空間，一般會用get_fs()得到的值來和USER_DS進行比較，從而防止用戶空間程序“蓄意”破壞內核空間。

而現在要在內核空間使用系統調用，此時傳遞給read或write()的參數地址就是內核空間的地址了，在USER_DS之上(USER_DS ~ KERNEL_DS)，如果不做任何其它處理，在write()函數中，會認為該地址超過了USER_DS范圍，所以會認為是用戶空間的“蓄意破壞”，從而不允許進一步的執行。

為了解決這個問題， set_fs(KERNEL_DS)，將其能訪問的空間限制擴大到KERNEL_DS,這樣就可以在內核順利使用系統調用了!

在VFS的支持下，用戶態進程讀寫任何類型的文件系統都可以使用read和write這兩個系統調用，但是在linux內核中沒有這樣的系統調用我們如何操作文件呢?

我們知道read和write在進入內核態之后，實際執行的是sys_read和sys_write，但是查看內核源代碼，發現這些操作文件的函數都沒有導出(使用EXPORT_SYMBOL導出)，也就是說在內核模塊中是不能使用的，那如何是好?

通過查看sys_open的源碼我們發現，其主要使用了do_filp_open()函數，該函數在fs/namei.c中，而在改文件中，filp_open函數也是間接調用了do_filp_open函數，并且接口和sys_open函數極為相似，調用參數也和sys_open一樣，并且使用EXPORT_SYMBOL導出了，所以我們猜想該函數可以打開文件，功能和open一樣。

三、實例

Makefile

ifneq ($(KERNELRELEASE),) 
obj-m:=sysopen.o 
else 
KDIR :=/lib/modules/$(shell uname -r)/build 
PWD  :=$(shell pwd) 
all: 
 $(info "1st") 
 make -C $(KDIR) M=$(PWD) modules 
clean: 
 rm -f *.ko *.o *.mod.o *.symvers *.cmd  *.mod.c *.order 
endif

sysopen.c

#include <linux/module.h> 
#include <linux/syscalls.h> 
#include <linux/file.h> 
#include <linux/fcntl.h> 
#include <linux/delay.h> 
#include <linux/slab.h> 
#include <linux/uaccess.h> 
 
MODULE_LICENSE("GPL"); 
MODULE_AUTHOR("yikoulinux"); 
 
void test(void) 
{ 
 struct file *file = NULL; 
 mm_segment_t old_fs; 
 loff_t  pos; 
 
 char buf[64]="yikoulinux"; 
 
 printk("test()"); 
 file = filp_open("/home/peng/open/test.txt\n",O_RDWR|O_APPEND|O_CREAT,0644); 
 if(IS_ERR(file)){ 
  return ; 
 } 
 old_fs = get_fs(); 
 set_fs(KERNEL_DS); 
 pos = 0; 
 vfs_write(file,buf,sizeof(buf),&pos); 
 
 pos =0; 
 vfs_read(file, buf, sizeof(buf), &pos); 
 printk("buf:%s\n",buf); 
  
 filp_close(file,NULL); 
 set_fs(old_fs); 
 return; 
} 
 
 
static int hello_init(void) 
{ 
 printk("hello_init \n"); 
 test(); 
 return 0; 
} 
static void hello_exit(void) 
{ 
 printk("hello_exit \n"); 
 return; 
} 
 
module_init(hello_init); 
module_exit(hello_exit);