IO函数详解

该版本为libc-2.31

fopen

_IO_new_fopen

1
2
3
4
5
FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}

调用__fopen_internal函数

__fopen_internal

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO //没执行
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE)); //first:分配空间

if (new_f == NULL)
return NULL;
#ifdef _IO_MTSAFE_IO //没执行
new_f->fp.file._lock = &new_f->lock;
#endif
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps); //second:初始化
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps; //说明所有的_io_file_jump都是同一个vtable
_IO_new_file_init_internal (&new_f->fp); //third:将file链接到_IO_list_all
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL) //forth:打开文件
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_un_link (&new_f->fp);
free (new_f);
return NULL;
}

分配空间部分

1
2
3
4
5
6
7
8
  struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO //没执行
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

主要是locked_FILE结构体,而其中包含了两个结构体一个_IO_FILE_plus,一个_IO_wide_data,而_IO_FILE_plus又包含FILE_IO_jump_t_IO_jump_t即vtable

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct _IO_wide_data
{
wchar_t *_IO_read_ptr; /* Current read pointer */ //unsigned short
wchar_t *_IO_read_end; /* End of get area. */
wchar_t *_IO_read_base; /* Start of putback+get area. */
wchar_t *_IO_write_base; /* Start of put area. */
wchar_t *_IO_write_ptr; /* Current put pointer. */
wchar_t *_IO_write_end; /* End of put area. */
wchar_t *_IO_buf_base; /* Start of reserve area. */
wchar_t *_IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
wchar_t *_IO_save_base; /* Pointer to start of non-current get area. */
wchar_t *_IO_backup_base; /* Pointer to first valid character of
backup area */
wchar_t *_IO_save_end; /* Pointer to end of non-current get area. */

__mbstate_t _IO_state;
__mbstate_t _IO_last_state;
struct _IO_codecvt _codecvt;

wchar_t _shortbuf[1];

const struct _IO_jump_t *_wide_vtable;
};

_IO_wide_data是用于处理宽字符的输入和输出的,只有在调用 glibc 提供的宽字符输入输出函数(如 fgetwc()fputwc() 等)时,会使用 _IO_wide_data 结构体中的字段来处理宽字符数据的读取和写入。

至于FILE则是在./libio/bits/types/FILE.h:typedef struct _IO_FILE FILE;被赋为_IO_FILE

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
struct _IO_FILE
{
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */

/* The following pointers correspond to the C++ streambuf protocol. */
char *_IO_read_ptr; /* Current read pointer */
char *_IO_read_end; /* End of get area. */
char *_IO_read_base; /* Start of putback+get area. */
char *_IO_write_base; /* Start of put area. */
char *_IO_write_ptr; /* Current put pointer. */
char *_IO_write_end; /* End of put area. */
char *_IO_buf_base; /* Start of reserve area. */
char *_IO_buf_end; /* End of reserve area. */

/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */

struct _IO_marker *_markers;

struct _IO_FILE *_chain;

int _fileno;
int _flags2;
__off_t _old_offset; /* This used to be _offset but it's too small. */

/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];

_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

初始化部分

1
2
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);    //second:初始化
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps; //说明所有的_io_file_jump都是同一个vtable

主要是其中的_IO_no_init,至于下一个语句就比较好理解,只要是同一种vtable类型,那么vtable就在libc.so段上,不能改,但是能改指向vtable的指针,就不过多缀叙了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void
_IO_no_init (FILE *fp, int flags, int orientation,
struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
{
_IO_old_init (fp, flags);
fp->_mode = orientation;
if (orientation >= 0)
{
fp->_wide_data = wd;
fp->_wide_data->_IO_buf_base = NULL;
fp->_wide_data->_IO_buf_end = NULL;
fp->_wide_data->_IO_read_base = NULL;
fp->_wide_data->_IO_read_ptr = NULL;
fp->_wide_data->_IO_read_end = NULL;
fp->_wide_data->_IO_write_base = NULL;
fp->_wide_data->_IO_write_ptr = NULL;
fp->_wide_data->_IO_write_end = NULL;
fp->_wide_data->_IO_save_base = NULL;
fp->_wide_data->_IO_backup_base = NULL;
fp->_wide_data->_IO_save_end = NULL;

fp->_wide_data->_wide_vtable = jmp;
}
else
/* Cause predictable crash when a wide function is called on a byte
stream. */
fp->_wide_data = (struct _IO_wide_data *) -1L;
fp->_freeres_list = NULL;
}

可以看到这个主要部分都被NULL掉了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void
_IO_old_init (FILE *fp, int flags)
{
fp->_flags = _IO_MAGIC|flags;
fp->_flags2 = 0;
if (stdio_needs_locking)
fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
fp->_IO_buf_base = NULL;
fp->_IO_buf_end = NULL;
fp->_IO_read_base = NULL;
fp->_IO_read_ptr = NULL;
fp->_IO_read_end = NULL;
fp->_IO_write_base = NULL;
fp->_IO_write_ptr = NULL;
fp->_IO_write_end = NULL;
fp->_chain = NULL; /* Not necessary. */

fp->_IO_save_base = NULL;
fp->_IO_backup_base = NULL;
fp->_IO_save_end = NULL;
fp->_markers = NULL;
fp->_cur_column = 0;
#if _IO_JUMPS_OFFSET
fp->_vtable_offset = 0;
#endif
#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_init (*fp->_lock);
#endif
}
image-20240301235011497

这张图非常好的描述了初始化后的情况

链接file部分

1
_IO_new_file_init_internal (&new_f->fp);                    //third:将file链接到_IO_list_all

跟进

1
2
3
4
5
6
7
8
9
10
11
12
void
_IO_new_file_init_internal (struct _IO_FILE_plus *fp)
{
/* POSIX.1 allows another file handle to be used to change the position
of our file descriptor. Hence we actually don't know the actual
position before we do the first fseek (and until a following fflush). */
fp->file._offset = _IO_pos_BAD;
fp->file._flags |= CLOSED_FILEBUF_FLAGS;

_IO_link_in (fp);
fp->file._fileno = -1; //设置_fileno -1
}

主要的作用是调用_IO_link_in和设置_fileno为-1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
..........
#endif
fp->file._chain = (FILE *) _IO_list_all;
_IO_list_all = fp; //通过_IO_list_all来赋值chain
#ifdef _IO_MTSAFE_IO
..........
#endif
}
}
libc_hidden_def (_IO_link_in)

主要是通过_IO_list_all来赋值chain以此链接file

_IO_file_fopen打开文件句柄部分

1
2
3
4
5
6
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_un_link (&new_f->fp);
free (new_f);
return NULL;

先是调用_IO_file_fopen函数来打开文件,之后又再次调用_IO_un_link函数,之后free掉new_f?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
_IO_FILE *
_IO_new_file_fopen (_IO_FILE *fp, const char *filename, const char *mode,
int is32not64)
{

...
// 检查文件是否以打开,打开则返回
if (_IO_file_is_open (fp))
return 0;
// 设置文件打开模式
switch (*mode)
{
case 'r':
omode = O_RDONLY;
read_write = _IO_NO_WRITES;
break;
...
}
...
// 调用_IO_file_open函数
result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);
...
}
libc_hidden_ver (_IO_new_file_fopen, _IO_file_fopen)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
FILE *
_IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
fdesc = __open_nocancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot); //系统调用open
if (fdesc < 0)
return NULL;
fp->_fileno = fdesc; //将文件描述符设置到fileno上
_IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
/* For append mode, send the file offset to the end of the file. Don't
update the offset cache though, since the file handle is not active. */
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
__close_nocancel (fdesc);
return NULL;
}
}
_IO_link_in ((struct _IO_FILE_plus *) fp); //再次执行_IO_link_in
return fp;
}

函数的主要功能就是执行系统调用open打开文件,并将文件描述符赋值给FILE结构体的_fileno字段,最后再次调用_IO_link_in函数,确保该结构体被链接进入_IO_list_all链表。

执行完_IO_new_file_fopen函数后,FILE结构体为:

image-20240302003003453

该函数执行完后,程序返回FILE结构体指针

小结

看完代码后,可以将fopen整体的流程可以归纳为:

  1. malloc分配内存空间。
  2. _IO_no_init 对file结构体进行null初始化。
  3. _IO_file_init将结构体链接进_IO_list_all链表。
  4. _IO_file_fopen执行系统调用打开文件。

整个流程还是比较简单的,fopen返回之后_IO_list_all链表指向返回的FILE结构体,且FILE结构体的_chain字段指向之前的结构体(没有其他额外打开文件的话,将是指向stderr),同时其他的字段大多都是默认的null值,vtable存储的是__GI__IO_file_jumps函数表

image-20240302003126840

fread

大致的流程如下

image-20240302135632361

_IO_fread

1
2
3
4
5
6
7
8
9
10
11
12
13
14
size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp) //buf:缓冲区,fp:要读取的文件
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}
libc_hidden_def (_IO_fread)

主要是调用_IO_sgetn函数

1
2
3
4
5
6
7
size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)

而其中调用了_IO_XSGETN其中又#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)宏定义了为_IO_file_xsgetn

_IO_file_xsgetn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)/*fp:指向要读取数据的文件流的指针。data:指向用户缓冲区的指针。n:要读取的数据的数量。*/
{
size_t want, have;
ssize_t count;
char *s = data; //s是用户缓冲区

want = n; //want是要存储数据的数量

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp); //first:如果未创建缓冲区则调用_IO_doallocbuf创建
}

while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr; //输入缓冲区的数据
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want); //second:如果输入缓冲区有data,则拷贝输入缓冲区
fp->_IO_read_ptr += want;
want = 0;
}
else //third:如果输入缓冲区为0或不满足需求
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have); //先将能拷贝的拷贝到用户缓冲区
want -= have;
fp->_IO_read_ptr += have;
}

/* Check for backup and repeat */
if (_IO_in_backup (fp)) //_IO_IN_BACKUP 标志位用于指示文件流是否有一个输入缓冲区的备份
{
_IO_switch_to_main_get_area (fp); //切换文件流的输入缓冲区为主要区域
continue;
}

/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;

continue;
}

/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}

return n - want;
}
libc_hidden_def (_IO_file_xsgetn)

该file虚表函数分为三个部分:

  • 首先是判断是否存在输入缓冲区,如果没有则调用_IO_doallocbuf来初始化输入缓冲区;
  • 然后再判断输入缓冲区是否有足够的数据空间,如果有则直接拷贝到用户缓冲区;
  • 如果没有则调用__underflow扩大输入缓冲区,再拷贝至用户缓冲区

初始化部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 size_t want, have;
ssize_t count;
char *s = data; //s是用户缓冲区

want = n; //want是要存储数据的数量

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp); //first:如果未创建缓冲区则调用_IO_doallocbuf创建
}

首先会判断是否存在reserve area即fp->_IO_buf_base == NULL,如果为空的话,就说明resever area还没有被分配出来,则会调用_IO_doallocbuf分配一个reserve area

分配reserve area

1
2
3
4
5
6
7
8
9
10
11
void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
if (_IO_DOALLOCATE (fp) != EOF)
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)

其中又调用了系统调用_IO_DOALLOCATE,其中调用了IO_file_doallocate,又调用了vtable中的_IO_file_stat函数,而后又执行了系统调用fstat,这个系统调用是来获取文件状态,并且初始化st结构体的。可以看到此时的st_blksize为4096,而这个st_blksize也就是接下来malloc申请的内存大小(也就是reserve area的大小)

image-20240302213145338

而后出来之后又调用_IO_setb

1
2
3
4
5
6
7
8
9
10
11
12
13
void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)

这个函数主要就是对_IO_buf_base_IO_buf_end指针进行赋值。

_IO_doallocbuf函数主要是将resever area申请出来(大小为0x1000),并且对_IO_buf_base_IO_buf_end指针进行赋值

从缓冲区拷贝数据部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
 while (want > 0)
{
have = fp->_IO_read_end - fp->_IO_read_ptr; //输入缓冲区的数据
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want); //second:如果输入缓冲区有data,则拷贝输入缓冲区
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have); //先将能拷贝的拷贝到用户缓冲区
want -= have;
fp->_IO_read_ptr += have;
}

结合源码非常好理解

缓冲区缺乏的处理部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
  /* Check for backup and repeat */            //third:如果输入缓冲区为0或不满足需求的处理
if (_IO_in_backup (fp)) //_IO_IN_BACKUP 标志位用于指示文件流是否有一个输入缓冲区的备份
{
_IO_switch_to_main_get_area (fp); //切换文件流的输入缓冲区为主要区域
continue;
}

/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;

continue;
}

/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count); //直接读取数据到用户缓冲区
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count; //更新指针和临时数据
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count); //更新文件流指针
}
}

return n - want;
}

首先先是如果有缓冲区的备份,那么就会切换输入缓冲区为主要区域(这个不明白),然后如果reserve area区域足够(want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)))且存在,则会调用__underflow,之后出来便是

1
2
3
4
5
(fp)->_IO_read_base = (fp->_IO_buf_base)
(fp)->_IO_read_ptr = (fp->_IO_buf_base)
(fp)->_IO_read_end = (fp->_IO_buf_base)
(fp)->_IO_write_base = (fp)->_IO_write_ptr = fp->_IO_buf_base
(fp)->_IO_write_end = (fp->_IO_buf_base)

疯狂赋值都赋值为_IO_buf_base

1
2
3
4
5
6
7
8
 /* Try to maintain alignment: read a whole number of blocks.  */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

接下来的这个是如果缓冲区足够大,大于等于128,就会进行对齐的操作

1
2
3
4
5
6
7
8
9
10
 count = _IO_SYSREAD (fp, s, count);       //直接读取数据到用户缓冲区
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

之后便是直接调用_IO_SYSREAD直接读取数据到用户缓冲区

1
2
3
4
5
6
7
8
  s += count;            //更新指针和临时数据
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count); //更新文件流指针
}
}

return n - want;

搞完一切,就更新文件流的指针,之后直接return就结束了

__underflow

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;

if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)

这里面首先是大量的check,之后return了一个_IO_UNDERFLOW函数,这个是main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;

/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;

if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);

if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);

_IO_release_lock (stdout);
}

_IO_switch_to_get_mode (fp);

/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count;
if (count == 0)
{
/* If a stream is read to EOF, the calling application may switch active
handles. As a result, our offset cache would no longer be valid, so
unset it. */
fp->_offset = _IO_pos_BAD;
return EOF;
}
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}
libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)

首先也先是大量的检查,然后都赋值为_IO_buf_base

1
2
3
4
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

之后再次调用_IO_SYSREAD函数来将数据读到输入缓冲区,之后再一些check就return了

_IO_SYSREAD

1
2
3
4
5
6
7
8
ssize_t
_IO_file_read (FILE *fp, void *buf, ssize_t size)
{
return (__builtin_expect (fp->_flags2 & _IO_FLAGS2_NOTCANCEL, 0)
? __read_nocancel (fp->_fileno, buf, size)
: __read (fp->_fileno, buf, size));
}
libc_hidden_def (_IO_file_read)

一个非常底层的操作,就不过多赘述了,能大致了解功能应该就差不多?

scanf

1
2
3
4
5
read
_IO_new_file_underflow at fileops.c
__GI__IO_default_uflow at genops.c
_IO_vfscanf_internal at vfscanf.c
__isoc99_scanf at at isoc99_scanf.c

栈回溯

可以看到scanf最终是调用stdin的vtable中的_IO_new_file_underflow去调用read的。

不过它并不是由_IO_file_xsgetn调用的,而是使用vtable中的__uflow,源码如下:

1
2
3
4
5
6
7
8
9
10
C
int
_IO_default_uflow (_IO_FILE *fp)
{
int ch = _IO_UNDERFLOW (fp);
if (ch == EOF)
return EOF;
return *(unsigned char *) fp->_IO_read_ptr++;
}
libc_hidden_def (_IO_default_uflow)

__uflow函数首先直接调用_IO_new_file_underflow,因此最终也是_IO_new_file_underflow实现的输入。之后其只返回_IO_read_ptr处的一个字符

gets

1
2
3
4
read
__GI__IO_file_underflow
__GI__IO_default_uflow
gets

函数调用栈与scanf基本一致:

fwrite

_IO_fwrite

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)//buf:要写入的数据的地址,fp:要写入的文件流指针
{
size_t request = size * count;
size_t written = 0;
CHECK_FILE (fp, 0);
if (request == 0)
return 0;
_IO_acquire_lock (fp);
if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
written = _IO_sputn (fp, (const char *) buf, request);
_IO_release_lock (fp);
/* We have written all of the input in case the return value indicates
this or EOF is returned. The latter is a special case where we
simply did not manage to flush the buffer. But the data is in the
buffer and therefore written as far as fwrite is concerned. */
if (written == request || written == EOF)
return count;
else
return written / size;
}
libc_hidden_def (_IO_fwrite)

主要是调用_IO_sputn

_IO_new_file_xsputn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
size_t
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (const char *) data; //s是用户缓冲区
size_t to_do = n;
int must_flush = 0;
size_t count = 0;

if (n <= 0)
return 0;
/* This is an optimized implementation.
If the amount to be written straddles a block boundary
(or the filebuf is unbuffered), use sys_write directly. */

/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING)) //标志为当前为行缓冲模式且为写入状态
{
count = f->_IO_buf_end - f->_IO_write_ptr; //count是输入缓冲区的大小
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

/* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count); //直接拷贝数据(all or partial)
s += count;
to_do -= count;
}
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF) //刷新已满缓冲区
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0); //较大:对齐

if (do_write)
{
count = new_do_write (f, s, do_write); //调用系统调用读取
to_do -= count;
if (count < do_write)
return n - to_do;
}

/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do); //剩余数据的写入:因为行缓冲模式较难实现
}
return n - to_do;
}
libc_hidden_ver (_IO_new_file_xsputn, _IO_file_xsputn)

我认为这个_IO_new_file_xsputn可以分为三个部分:

  • 首先是计算有多少可用空间
  • 然后是填充缓冲区
  • 之后刷新缓冲区,并写入数据
  • 最后是对剩余数据的处理

计算可用空间部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 const char *s = (const char *) data;       //s是用户缓冲区
size_t to_do = n;
int must_flush = 0;
size_t count = 0;

if (n <= 0)
return 0;
/* This is an optimized implementation.
If the amount to be written straddles a block boundary
(or the filebuf is unbuffered), use sys_write directly. */

/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING)) //标志为当前为行缓冲模式且为写入状态
{
count = f->_IO_buf_end - f->_IO_write_ptr; //count是输入缓冲区的大小
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

其中会遍历缓冲区,如果有换行符就会+1,这是因为行缓冲区在遇到换行符或者缓冲区满了之后才会停止,只有+1才能保证换行符前面的数据都被写入,之后还会must_flush赋值为1

填充缓冲区部分

1
2
3
4
5
6
7
8
9
 /* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count); //直接拷贝数据(all or partial)
s += count;
to_do -= count;
}

这就很好理解了

刷新缓冲区并写入部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF) //刷新已满缓冲区
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0); //较大:对齐

if (do_write)
{
count = new_do_write (f, s, do_write); //调用系统调用读取
to_do -= count;
if (count < do_write)
return n - to_do;
}

如果之前有赋值过must_flush又或者此时还有数据没写入,则会刷新缓冲区,然后如果缓冲区很大,则会对齐设置,之后便是调用new_do_write函数,而这个里面是调用了系统调用_IO_SYSWRITE

剩余数据的处理部分

1
2
3
4
5
6
7
8
9
      /* Now write out the remainder.  Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do); //剩余数据的写入:因为行缓冲模式较难实现
}
return n - to_do;
}
libc_hidden_ver (_IO_new_file_xsputn, _IO_file_xsputn)

之后便是读入剩余数据,因为行缓冲区需要遇到换行符或者缓冲区满了才会写入,因此不好读取剩余的数据,就会调用_IO_default_xsputn来读取剩余数据,之后return就结束了

_IO_OVERFLOW

1
#define _IO_OVERFLOW(FP, CH) JUMP1 (__overflow, FP, CH)

_IO_new_file_overflow

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
int
_IO_new_file_overflow (FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL) //当前没有正在进行写入操作或者没有分配缓冲区
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f); //初始化缓冲区
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
/* Otherwise must be currently reading.
If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
logically slide the buffer forwards one block (by setting the
read pointers to all point at the beginning of the block). This
makes room for subsequent output.
Otherwise, set the read pointers to _IO_read_end (leaving that
alone, so it can continue to correspond to the external position). */
if (__glibc_unlikely (_IO_in_backup (f)))
{
size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
_IO_free_backup_area (f);
f->_IO_read_base -= MIN (nbackup,
f->_IO_read_base - f->_IO_buf_base);
f->_IO_read_ptr = f->_IO_read_base;
}

if (f->_IO_read_ptr == f->_IO_buf_end)
f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
f->_IO_write_ptr = f->_IO_read_ptr;
f->_IO_write_base = f->_IO_write_ptr;
f->_IO_write_end = f->_IO_buf_end;
f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;

f->_flags |= _IO_CURRENTLY_PUTTING;
if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
f->_IO_write_end = f->_IO_write_ptr;
}
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base);
if (f->_IO_write_ptr == f->_IO_buf_end ) /* Buffer is really full */
if (_IO_do_flush (f) == EOF) //缓冲区满了就会刷新
return EOF;
*f->_IO_write_ptr++ = ch;
if ((f->_flags & _IO_UNBUFFERED)
|| ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
if (_IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base) == EOF)
return EOF;
return (unsigned char) ch;
}
libc_hidden_ver (_IO_new_file_overflow, _IO_file_overflow)

可以看到除了第50行刷新缓冲区的操作外,第11-45行的if操作是在当前没有正在进行写入操作或者没有分配缓冲区的前提下进行的,大概的功能是了解的,就不过多赘述,所以这段的主要功能便是检查加刷新缓冲区

new_do_write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
{
size_t count;
if (fp->_flags & _IO_IS_APPENDING) //该文件是追加模式打开
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD; //表明偏移不可预测
else if (fp->_IO_read_end != fp->_IO_write_base) //如果不等,则说明还有数据未读取
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1); //使用该调用,能让指针到正确位置,避免覆盖未读取数据
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do); //真正读取数据
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base); //重置读指针的位置
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}

主要调用_IO_SYSWRITE

_IO_new_file_write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
ssize_t
_IO_new_file_write (FILE *f, const void *data, ssize_t n)
{
ssize_t to_do = n;
while (to_do > 0)
{
ssize_t count = (__builtin_expect (f->_flags2
& _IO_FLAGS2_NOTCANCEL, 0)
? __write_nocancel (f->_fileno, data, to_do)
: __write (f->_fileno, data, to_do));
if (count < 0)
{
f->_flags |= _IO_ERR_SEEN;
break;
}
to_do -= count;
data = (void *) ((char *) data + count);
}
n -= to_do;
if (f->_offset >= 0)
f->_offset += n;
return n;
}

主要是调用__write_nocancel__write来读取数据,想要仔细了解就去看源码吧,只能说是很底层的调用了

_IO_default_xsputn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
size_t
_IO_default_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (char *) data;
size_t more = n;
if (more <= 0)
return 0;
for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}
return n - more;
}
libc_hidden_def (_IO_default_xsputn)

这里有两种读取方式,也是比较好理解的这段源码,应该是为了性能最佳的考虑吧

printf

调用栈

1
2
3
4
5
6
► f 0   0x7f6117fd43b0 write
f 1 0x7f6117f55c0f _IO_file_write+143
f 2 0x7f6117f5639a _IO_file_xsputn+426
f 3 0x7f6117f2cfa4 buffered_vfprintf+308
f 4 0x7f6117f2a33d vfprintf+445
f 5 0x7f6117f328a9 printf+153

puts

调用栈与fwrite大致相同

至于fclose由于本菜鸡懒得写了,参考file函数学习 | ixout’s blog学长的博客哈哈哈