【Linux】Linux文件I/O

xuanxuan

2022-06-10

文件I/O

直接使用系统调用的缺点:

影响系统性能

系统调用比普通函数调用开销大，因为系统调用要进行用户空间和内核空间的切换。

系统调用一次所能读写的数据量大小，受硬件的限制。

解决方案:使用带缓冲功能的标准I/O库，以减少系统调用的次数。

例如: fwrite、fread、fopen、fclose、fseek、fflush

文件系统接口

文件系统——一种把数据组织成文件和目录的存储方式，提供了基于文件的存取接口，并通过文件权限控制访问。

(一系列的接口。)

文件系统缓存

主存(通常是DRAM)的一块区域用来缓存文件系统的内容，包含各种数据和元数据。

标准文件访问方式

直接IO方式

OPEN +O_DIRECT = 绕过内核缓冲区的直接访问，有效避免了CPU和内存的多余时间开销。要求内存边界对齐。

直接IO，绕过缓存，不会出现write成功数据丢失情况。

注意：直接IO的缺点就是如果访问的数据不在应用程序缓存中，那么每次数据都会直接从磁盘进行加载，这种直接加载会非常慢，通常直接IO跟异步IO结合使用会得到较好的性能。

示例:

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#define TOTAL 10

//直接IO要考虑到硬件特性
//磁盘最基本的单位是扇区，一个扇区512字节
#define BUF_LEN 512

int writeToFile(int fd,const char* buf,int len) {
    int wlen = 0;
    if ((wlen = write(fd, buf, len)) < 0) {
        fprintf(stderr,"write to %d failed,reason:%s.\n",fd,strerror(errno));
        return -3;
    }

    return wlen;
}

int main(int argc, char** argv) {

    //const char* TEXT = "This is a test.\n";

    char* buf = NULL;

    //buf = (char*)malloc(BUF_LEN); 
    //地址要是512的倍数——内存边界对齐

    posix_memalign((void**)&buf,512,BUF_LEN);
    strcpy(buf,"This is test.\n");

    const char* filename = "./io_test.txt";
    int fd = 0;
    int i = 0;

    fd = open(filename,O_RDWR | O_TRUNC | O_CREAT | O_DIRECT);
    if (fd < 0) {
        fprintf(stderr, "fopen %s failed,reason:%s.\n exit\n",filename,strerror(errno));
        return -1;
    }

    for (i = 0; i < TOTAL; i++) {
        if (writeToFile(fd, buf, BUF_LEN) < 0) {
            fprintf(stderr,"write to %s failed,reason: %s.\n exit\n",filename,strerror(errno));
            //return -2;
        }
        printf("%d\n",i+1);
    }

    printf("finished.\n");

    //printf("Start to sleep 30 second....\n");

    if (buf)free(buf);
    close(fd);
    return 0;

}

直接IO和标准方式进行对比

示例:测试20s内对同一文件的读取次数0

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#define BUF_SIZE 512

int main(int argc, char** argv) {

    char* buf = NULL;
    const char* filename = "./open_compare.txt";
    int fd = -1;
    time_t start;
    time_t cur;
    int rlen = 0;
    int ret = 0;
    static int read_total = 0;

    ret = posix_memalign((void**)&buf,512,BUF_SIZE);
    if (ret)fprintf(stderr,"posix_memalign failed.reason:%s\n",strerror(errno));

    start = time(NULL);

    do 
    {
        read_total++;
        //fd = open(filename, O_RDWR | O_DIRECT);
        fd = open(filename,O_RDWR);

        if (fd < 0) { 
            fprintf(stderr, "fopen %s failed,reason:%s.\n exit\n", filename, strerror(errno));
            return -1;
        }

        do 
        {
            if ((rlen = read(fd, buf, BUF_SIZE)) < 0) {
                fprintf(stderr, "read to %s failed,reason: %s.\n exit\n", filename, strerror(errno));
            }

        } while (rlen>0);
        close(fd);

        cur = time(NULL);
    } while ((cur-start) < 20);

    printf("total time:%d\n",read_total);

    return 0;

}

直接IO

标准方式

(高速页缓存，多次读取速度快。)

O_SYNC

缓存同步

为了保证磁盘系统与缓冲区内容一致，Linux系统提供了sync,fsync,fdatasync三个函数。

函数描述:向打开的文件写数据，成功返回写入的字节数，出错则返回-1。
#include

int fsync(int fd);
int fdatasync(int fd);
void sync(void);
说明:

sync——将所有修改过的块缓冲区排入写队列，然后就返回，它并不等待实际写磁盘操作结束。

fsync——将fd对应文件的块缓冲区立即写入磁盘，并等待实际写磁盘操作结束返回。

fdatasync——类似fsync,但只影响文件的数据部分。而除数据外，fsync还会同步更新文件属性。

Linux文件IO流程图

内核中会有一个线程，不断地将高速页缓冲区中的数据写入到物理磁盘中。