From 43ea4aabb53ac03ba32bd9636b9b7b86b06b92fb Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 7 Oct 2014 16:46:08 -0700 Subject: wc: don't miscount /sys and similar file systems Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files. (cherry picked from commit 2662702b9e8643f62c670bbf2fa94b1be1ccf9af) Conflicts: NEWS --- src/head.c | 147 ++++++++++++++++++++++++---------------- src/od.c | 23 +++++-- src/split.c | 146 ++++++++++++++++++++++++++++++--------- src/tac.c | 70 ++++++++++++------- src/tail.c | 43 +++++------- src/wc.c | 45 ++++++------ tests/local.mk | 3 + tests/misc/head-c.sh | 12 ++++ tests/misc/od-j.sh | 39 +++++++++++ tests/misc/tac-2-nonseekable.sh | 14 +++- tests/misc/wc-proc.sh | 32 +++++++++ tests/split/b-chunk.sh | 39 +++++++---- tests/tail-2/tail-c.sh | 35 ++++++++++ 13 files changed, 460 insertions(+), 188 deletions(-) create mode 100755 tests/misc/od-j.sh create mode 100755 tests/misc/wc-proc.sh create mode 100755 tests/tail-2/tail-c.sh diff --git a/src/head.c b/src/head.c index 65bd52a..4de9465 100644 --- a/src/head.c +++ b/src/head.c @@ -36,6 +36,7 @@ #include "quote.h" #include "quotearg.h" #include "safe-read.h" +#include "stat-size.h" #include "xfreopen.h" #include "xstrtol.h" @@ -206,13 +207,42 @@ copy_fd (int src_fd, uintmax_t n_bytes) return COPY_FD_OK; } -/* Print all but the last N_ELIDE bytes from the input available via - the non-seekable file descriptor FD. Return true upon success. +/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD + corresponds to the file FILENAME. WHENCE must be SEEK_SET or + SEEK_CUR. Return the resulting offset. Give a diagnostic and + return -1 if lseek fails. */ + +static off_t +elseek (int fd, off_t offset, int whence, char const *filename) +{ + off_t new_offset = lseek (fd, offset, whence); + char buf[INT_BUFSIZE_BOUND (offset)]; + + if (new_offset < 0) + error (0, errno, + _(whence == SEEK_SET + ? N_("%s: cannot seek to offset %s") + : N_("%s: cannot seek to relative offset %s")), + quotearg_colon (filename), + offtostr (offset, buf)); + + return new_offset; +} + +/* For an input file with name FILENAME and descriptor FD, + output all but the last N_ELIDE_0 bytes. + If CURRENT_POS is nonnegative, assume that the input file is + positioned at CURRENT_POS and that it should be repositioned to + just before the elided bytes before returning. + Return true upon success. Give a diagnostic and return false upon error. */ static bool -elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) +elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0, + off_t current_pos) { size_t n_elide = n_elide_0; + uintmax_t desired_pos = current_pos; + bool ok = true; #ifndef HEAD_TAIL_PIPE_READ_BUFSIZE # define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZ @@ -251,7 +281,6 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD) { - bool ok = true; bool first = true; bool eof = false; size_t n_to_read = READ_BUFSIZE + n_elide; @@ -293,22 +322,26 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) /* Output any (but maybe just part of the) elided data from the previous round. */ if (! first) - xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta); + { + desired_pos += n_elide - delta; + xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta); + } first = false; if (n_elide < n_read) - xwrite_stdout (b[i], n_read - n_elide); + { + desired_pos += n_read - n_elide; + xwrite_stdout (b[i], n_read - n_elide); + } } free (b[0]); - return ok; } else { /* Read blocks of size READ_BUFSIZE, until we've read at least n_elide bytes. Then, for each new buffer we read, also write an old one. */ - bool ok = true; bool eof = false; size_t n_read; bool buffered_enough; @@ -357,7 +390,10 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) buffered_enough = true; if (buffered_enough) - xwrite_stdout (b[i_next], n_read); + { + desired_pos += n_read; + xwrite_stdout (b[i_next], n_read); + } } /* Output any remainder: rem bytes from b[i] + n_read. */ @@ -366,6 +402,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) if (buffered_enough) { size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read; + desired_pos += rem; if (rem < n_bytes_left_in_b_i) { xwrite_stdout (b[i] + n_read, rem); @@ -392,6 +429,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) */ size_t y = READ_BUFSIZE - rem; size_t x = n_read - y; + desired_pos += x; xwrite_stdout (b[i_next], x); } } @@ -400,36 +438,16 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0) for (i = 0; i < n_alloc; i++) free (b[i]); free (b); - - return ok; } -} - -/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD - corresponds to the file FILENAME. WHENCE must be SEEK_SET or - SEEK_CUR. Return the resulting offset. Give a diagnostic and - return -1 if lseek fails. */ - -static off_t -elseek (int fd, off_t offset, int whence, char const *filename) -{ - off_t new_offset = lseek (fd, offset, whence); - char buf[INT_BUFSIZE_BOUND (offset)]; - if (new_offset < 0) - error (0, errno, - _(whence == SEEK_SET - ? N_("%s: cannot seek to offset %s") - : N_("%s: cannot seek to relative offset %s")), - quotearg_colon (filename), - offtostr (offset, buf)); - - return new_offset; + if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0) + ok = false; + return ok; } /* For the file FILENAME with descriptor FD, output all but the last N_ELIDE bytes. If SIZE is nonnegative, this is a regular file positioned - at START_POS with SIZE bytes. Return true on success. + at CURRENT_POS with SIZE bytes. Return true on success. Give a diagnostic and return false upon error. */ /* NOTE: if the input file shrinks by more than N_ELIDE bytes between @@ -437,10 +455,11 @@ elseek (int fd, off_t offset, int whence, char const *filename) static bool elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide, - off_t current_pos, off_t size) + struct stat const *st, off_t current_pos) { - if (size < 0) - return elide_tail_bytes_pipe (filename, fd, n_elide); + off_t size = st->st_size; + if (size <= ST_BLKSIZE (*st)) + return elide_tail_bytes_pipe (filename, fd, n_elide, current_pos); else { /* Be careful here. The current position may actually be @@ -460,13 +479,16 @@ elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide, } } -/* Print all but the last N_ELIDE lines from the input stream - open for reading via file descriptor FD. +/* For an input file with name FILENAME and descriptor FD, + output all but the last N_ELIDE_0 bytes. + If CURRENT_POS is nonnegative, the input file is positioned there + and should be repositioned to just before the elided bytes. Buffer the specified number of lines as a linked list of LBUFFERs, adding them as needed. Return true if successful. */ static bool -elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) +elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide, + off_t current_pos) { struct linebuffer { @@ -475,6 +497,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) size_t nlines; struct linebuffer *next; }; + uintmax_t desired_pos = current_pos; typedef struct linebuffer LBUFFER; LBUFFER *first, *last, *tmp; size_t total_lines = 0; /* Total number of newlines in all buffers. */ @@ -497,6 +520,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) if (! n_elide) { + desired_pos += n_read; xwrite_stdout (tmp->buffer, n_read); continue; } @@ -536,6 +560,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) last = last->next = tmp; if (n_elide < total_lines - first->nlines) { + desired_pos += first->nbytes; xwrite_stdout (first->buffer, first->nbytes); tmp = first; total_lines -= first->nlines; @@ -565,6 +590,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) for (tmp = first; n_elide < total_lines - tmp->nlines; tmp = tmp->next) { + desired_pos += tmp->nbytes; xwrite_stdout (tmp->buffer, tmp->nbytes); total_lines -= tmp->nlines; } @@ -581,6 +607,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide) ++tmp->nlines; --n; } + desired_pos += p - tmp->buffer; xwrite_stdout (tmp->buffer, p - tmp->buffer); } @@ -591,6 +618,9 @@ free_lbuffers: free (first); first = tmp; } + + if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0) + ok = false; return ok; } @@ -714,10 +744,11 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd, static bool elide_tail_lines_file (const char *filename, int fd, uintmax_t n_elide, - off_t current_pos, off_t size) + struct stat const *st, off_t current_pos) { - if (size < 0) - return elide_tail_lines_pipe (filename, fd, n_elide); + off_t size = st->st_size; + if (size <= ST_BLKSIZE (*st)) + return elide_tail_lines_pipe (filename, fd, n_elide, current_pos); else { /* Find the offset, OFF, of the Nth newline from the end, @@ -802,28 +833,24 @@ head (const char *filename, int fd, uintmax_t n_units, bool count_lines, if (elide_from_end) { - off_t current_pos = -1, size = -1; - if (! presume_input_pipe) + off_t current_pos = -1; + struct stat st; + if (fstat (fd, &st) != 0) { - struct stat st; - if (fstat (fd, &st) != 0) - { - error (0, errno, _("cannot fstat %s"), - quotearg_colon (filename)); - return false; - } - if (S_ISREG (st.st_mode)) - { - size = st.st_size; - current_pos = elseek (fd, 0, SEEK_CUR, filename); - if (current_pos < 0) - return false; - } + error (0, errno, _("cannot fstat %s"), + quotearg_colon (filename)); + return false; + } + if (! presume_input_pipe && usable_st_size (&st)) + { + current_pos = elseek (fd, 0, SEEK_CUR, filename); + if (current_pos < 0) + return false; } if (count_lines) - return elide_tail_lines_file (filename, fd, n_units, current_pos, size); + return elide_tail_lines_file (filename, fd, n_units, &st, current_pos); else - return elide_tail_bytes_file (filename, fd, n_units, current_pos, size); + return elide_tail_bytes_file (filename, fd, n_units, &st, current_pos); } if (count_lines) return head_lines (filename, fd, n_units); diff --git a/src/od.c b/src/od.c index 7bc0e2a..e26518c 100644 --- a/src/od.c +++ b/src/od.c @@ -27,6 +27,7 @@ #include "error.h" #include "ftoastr.h" #include "quote.h" +#include "stat-size.h" #include "xfreopen.h" #include "xprintf.h" #include "xstrtol.h" @@ -1034,9 +1035,11 @@ skip (uintmax_t n_skip) If the number of bytes left to skip is larger than the size of the current file, we can decrement n_skip and go on to the next file. Skip this optimization also - when st_size is 0, because some kernels report that - nonempty files in /proc have st_size == 0. */ - if (S_ISREG (file_stats.st_mode) && 0 < file_stats.st_size) + when st_size is no greater than the block size, because + some kernels report nonsense small file sizes for + proc-like file systems. */ + if (usable_st_size (&file_stats) + && ST_BLKSIZE (file_stats) < file_stats.st_size) { if ((uintmax_t) file_stats.st_size < n_skip) n_skip -= file_stats.st_size; @@ -1052,6 +1055,7 @@ skip (uintmax_t n_skip) } /* If it's not a regular file with nonnegative size, + or if it's so small that it might be in a proc-like file system, position the file pointer by reading. */ else @@ -1067,10 +1071,15 @@ skip (uintmax_t n_skip) n_skip -= n_bytes_read; if (n_bytes_read != n_bytes_to_read) { - in_errno = errno; - ok = false; - n_skip = 0; - break; + if (ferror (in_stream)) + { + in_errno = errno; + ok = false; + n_skip = 0; + break; + } + if (feof (in_stream)) + break; } } } diff --git a/src/split.c b/src/split.c index dacacaa..ba74797 100644 --- a/src/split.c +++ b/src/split.c @@ -246,6 +246,37 @@ r/K/N likewise but only output Kth of N to stdout\n\ exit (status); } +/* Return the number of bytes that can be read from FD, a file with + apparent size SIZE. Actually read the data into BUF (of size + BUFSIZE) if the file appears to be smaller than BUFSIZE, as this + works better on proc-like file systems. If the returned value is + less than BUFSIZE, store all the file's data into BUF; otherwise, + restore the input file's position so that the file can be reread if + needed. */ + +static off_t +input_file_size (int fd, off_t size, char *buf, size_t bufsize) +{ + if (size < bufsize) + { + size = 0; + while (true) + { + size_t save = size < bufsize ? size : 0; + size_t n_read = safe_read (fd, buf + save, bufsize - save); + if (n_read == 0) + break; + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + size += n_read; + } + if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", infile); + } + + return size; +} + /* Compute the next sequential output file name and store it into the string 'outfile'. */ @@ -511,10 +542,13 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes) } /* Split into pieces of exactly N_BYTES bytes. - Use buffer BUF, whose size is BUFSIZE. */ + Use buffer BUF, whose size is BUFSIZE. + If INITIAL_READ != SIZE_MAX, the entire input file has already been + partly read into BUF and BUF contains INITIAL_READ input bytes. */ static void -bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) +bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, + uintmax_t max_files) { size_t n_read; bool new_file_flag = true; @@ -525,9 +559,17 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) do { - n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } bp_out = buf; to_read = n_read; while (true) @@ -736,7 +778,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) static void lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, - off_t file_size) + size_t initial_read, off_t file_size) { assert (n && k <= n && n <= file_size); @@ -751,7 +793,12 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, { /* Start reading 1 byte before kth chunk of file. */ off_t start = (k - 1) * chunk_size - 1; - if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) + if (initial_read != SIZE_MAX) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) error (EXIT_FAILURE, errno, "%s", infile); n_written = start; chunk_no = k - 1; @@ -761,10 +808,19 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (n_written < file_size) { char *bp = buf, *eob; - size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); - else if (n_read == 0) + size_t n_read; + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } + if (n_read == 0) break; /* eof. */ n_read = MIN (n_read, file_size - n_written); chunk_truncated = false; @@ -841,7 +897,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, static void bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, - off_t file_size) + size_t initial_read, off_t file_size) { off_t start; off_t end; @@ -851,15 +907,29 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, start = (k - 1) * (file_size / n); end = (k == n) ? file_size : k * (file_size / n); - if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) + if (initial_read != SIZE_MAX) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0) error (EXIT_FAILURE, errno, "%s", infile); while (start < end) { - size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); - if (n_read == SAFE_READ_ERROR) - error (EXIT_FAILURE, errno, "%s", infile); - else if (n_read == 0) + size_t n_read; + if (initial_read != SIZE_MAX) + { + n_read = initial_read; + initial_read = SIZE_MAX; + } + else + { + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, "%s", infile); + } + if (n_read == 0) break; /* eof. */ n_read = MIN (n_read, end - start); if (full_write (STDOUT_FILENO, buf, n_read) != n_read @@ -1403,22 +1473,34 @@ main (int argc, char **argv) if (in_blk_size == 0) in_blk_size = io_blksize (in_stat_buf); + void *b = xmalloc (in_blk_size + 1 + page_size - 1); + char *buf = ptr_align (b, page_size); + size_t initial_read = SIZE_MAX; + if (split_type == type_chunk_bytes || split_type == type_chunk_lines) { off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR); - if (usable_st_size (&in_stat_buf)) - file_size = in_stat_buf.st_size; - else if (0 <= input_offset) + if (0 <= input_offset) { - file_size = lseek (STDIN_FILENO, 0, SEEK_END); - input_offset = (file_size < 0 - ? file_size - : lseek (STDIN_FILENO, input_offset, SEEK_SET)); + if (usable_st_size (&in_stat_buf)) + { + file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size, + buf, in_blk_size); + if (file_size < in_blk_size) + initial_read = file_size; + } + else + { + file_size = lseek (STDIN_FILENO, 0, SEEK_END); + input_offset = (file_size < 0 + ? file_size + : lseek (STDIN_FILENO, input_offset, SEEK_SET)); + file_size -= input_offset; + } } if (input_offset < 0) error (EXIT_FAILURE, 0, _("%s: cannot determine file size"), quote (infile)); - file_size -= input_offset; /* Overflow, and sanity checking. */ if (OFF_T_MAX < n_units) { @@ -1431,9 +1513,6 @@ main (int argc, char **argv) file_size = MAX (file_size, n_units); } - void *b = xmalloc (in_blk_size + 1 + page_size - 1); - char *buf = ptr_align (b, page_size); - /* When filtering, closure of one pipe must not terminate the process, as there may still be other streams expecting input from us. */ if (filter_command) @@ -1454,7 +1533,7 @@ main (int argc, char **argv) break; case type_bytes: - bytes_split (n_units, buf, in_blk_size, 0); + bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0); break; case type_byteslines: @@ -1463,13 +1542,16 @@ main (int argc, char **argv) case type_chunk_bytes: if (k_units == 0) - bytes_split (file_size / n_units, buf, in_blk_size, n_units); + bytes_split (file_size / n_units, buf, in_blk_size, initial_read, + n_units); else - bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size); + bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read, + file_size); break; case type_chunk_lines: - lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size); + lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read, + file_size); break; case type_rr: diff --git a/src/tac.c b/src/tac.c index c76afc7..3f7b7f0 100644 --- a/src/tac.c +++ b/src/tac.c @@ -187,10 +187,11 @@ output (const char *start, const char *past_end) } /* Print in reverse the file open on descriptor FD for reading FILE. + The file is already positioned at FILE_POS, which should be near its end. Return true if successful. */ static bool -tac_seekable (int input_fd, const char *file) +tac_seekable (int input_fd, const char *file, off_t file_pos) { /* Pointer to the location in 'G_buffer' where the search for the next separator will begin. */ @@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file) /* Length of the record growing in 'G_buffer'. */ size_t saved_record_size; - /* Offset in the file of the next read. */ - off_t file_pos; - /* True if 'output' has not been called yet for any file. Only used when the separator is attached to the preceding record. */ bool first_time = true; @@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file) char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */ size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ - /* Find the size of the input file. */ - file_pos = lseek (input_fd, 0, SEEK_END); - if (file_pos < 1) - return true; /* It's an empty file. */ - /* Arrange for the first read to lop off enough to leave the rest of the file a multiple of 'read_size'. Since 'read_size' can change, this may not always hold during the program run, but since it usually will, leave it here for i/o efficiency (page/sector boundaries and all that). Note: the efficiency gain has not been verified. */ - saved_record_size = file_pos % read_size; - if (saved_record_size == 0) - saved_record_size = read_size; - file_pos -= saved_record_size; - /* 'file_pos' now points to the start of the last (probably partial) block - in the input file. */ + size_t remainder = file_pos % read_size; + if (remainder != 0) + { + file_pos -= remainder; + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + } - if (lseek (input_fd, file_pos, SEEK_SET) < 0) - error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + /* Scan backward, looking for end of file. This caters to proc-like + file systems where the file size is just an estimate. */ + while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0 + && file_pos != 0) + { + off_t rsize = read_size; + if (lseek (input_fd, -rsize, SEEK_CUR) < 0) + error (0, errno, _("%s: seek failed"), quotearg_colon (file)); + file_pos -= read_size; + } - if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size) + /* Now scan forward, looking for end of file. */ + while (saved_record_size == read_size) + { + size_t nread = safe_read (input_fd, G_buffer, read_size); + if (nread == 0) + break; + saved_record_size = nread; + if (saved_record_size == SAFE_READ_ERROR) + break; + file_pos += nread; + } + + if (saved_record_size == SAFE_READ_ERROR) { error (0, errno, _("%s: read error"), quotearg_colon (file)); return false; @@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name) /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream - and file name. Return true if successful. */ + and file name. Return the number of bytes copied, or -1 on error. */ -static bool +static off_t copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) { FILE *fp; char *file_name; + off_t bytes_copied = 0; if (!temp_stream (&fp, &file_name)) - return false; + return -1; while (1) { @@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) error (0, errno, _("%s: write error"), quotearg_colon (file_name)); goto Fail; } + + bytes_copied += bytes_read; } if (fflush (fp) != 0) @@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) *g_tmp = fp; *g_tempfile = file_name; - return true; + return bytes_copied; Fail: fclose (fp); - return false; + return -1; } /* Copy INPUT_FD to a temporary, then tac that file. @@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file) { FILE *tmp_stream; char *tmp_file; - if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)) + off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file); + if (bytes_copied < 0) return false; - bool ok = tac_seekable (fileno (tmp_stream), tmp_file); + bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied); return ok; } @@ -578,7 +596,7 @@ tac_file (const char *filename) ok = (file_size < 0 || isatty (fd) ? tac_nonseekable (fd, filename) - : tac_seekable (fd, filename)); + : tac_seekable (fd, filename, file_size)); if (!is_stdin && close (fd) != 0) { diff --git a/src/tail.c b/src/tail.c index 5ff738d..647b15f 100644 --- a/src/tail.c +++ b/src/tail.c @@ -40,6 +40,7 @@ #include "posixver.h" #include "quote.h" #include "safe-read.h" +#include "stat-size.h" #include "stat-time.h" #include "xfreopen.h" #include "xnanosleep.h" @@ -1665,40 +1666,30 @@ tail_bytes (const char *pretty_filename, int fd, uintmax_t n_bytes, if (t) return t < 0; } - *read_pos += dump_remainder (pretty_filename, fd, COPY_TO_EOF); + n_bytes = COPY_TO_EOF; } else { - if ( ! presume_input_pipe - && S_ISREG (stats.st_mode) && n_bytes <= OFF_T_MAX) + off_t end_pos = ((! presume_input_pipe && usable_st_size (&stats) + && n_bytes <= OFF_T_MAX) + ? stats.st_size : -1); + if (end_pos <= ST_BLKSIZE (stats)) + return pipe_bytes (pretty_filename, fd, n_bytes, read_pos); + off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename); + if (current_pos < end_pos) { - off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename); - off_t end_pos = xlseek (fd, 0, SEEK_END, pretty_filename); - off_t diff = end_pos - current_pos; - /* Be careful here. The current position may actually be - beyond the end of the file. */ - off_t bytes_remaining = diff < 0 ? 0 : diff; - off_t nb = n_bytes; - - if (bytes_remaining <= nb) - { - /* From the current position to end of file, there are no - more bytes than have been requested. So reposition the - file pointer to the incoming current position and print - everything after that. */ - *read_pos = xlseek (fd, current_pos, SEEK_SET, pretty_filename); - } - else + off_t bytes_remaining = end_pos - current_pos; + + if (n_bytes < bytes_remaining) { - /* There are more bytes remaining than were requested. - Back up. */ - *read_pos = xlseek (fd, -nb, SEEK_END, pretty_filename); + current_pos = end_pos - n_bytes; + xlseek (fd, current_pos, SEEK_SET, pretty_filename); } - *read_pos += dump_remainder (pretty_filename, fd, n_bytes); } - else - return pipe_bytes (pretty_filename, fd, n_bytes, read_pos); + *read_pos = current_pos; } + + *read_pos += dump_remainder (pretty_filename, fd, n_bytes); return true; } diff --git a/src/wc.c b/src/wc.c index 4909d9f..02355c8 100644 --- a/src/wc.c +++ b/src/wc.c @@ -184,9 +184,10 @@ write_counts (uintmax_t lines, /* Count words. FILE_X is the name of the file (or NULL for standard input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. Return true if successful. */ static bool -wc (int fd, char const *file_x, struct fstatus *fstatus) +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { bool ok = true; char buf[BUFFER_SIZE + 1]; @@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) if (count_bytes && !count_chars && !print_lines && !count_complicated) { - off_t current_pos, end_pos; - if (0 < fstatus->failed) fstatus->failed = fstat (fd, &fstatus->st); - if (! fstatus->failed && S_ISREG (fstatus->st.st_mode) - && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1 - && (end_pos = lseek (fd, 0, SEEK_END)) != -1) + /* For sized files, seek to one buffer before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) { - /* Be careful here. The current position may actually be - beyond the end of the file. As in the example above. */ - bytes = end_pos < current_pos ? 0 : end_pos - current_pos; + size_t end_pos = fstatus->st.st_size; + off_t hi_pos = end_pos - end_pos % BUFFER_SIZE; + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; } - else + + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { - fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); - while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + if (bytes_read == SAFE_READ_ERROR) { - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - bytes += bytes_read; + error (0, errno, "%s", file); + ok = false; + break; } + bytes += bytes_read; } } else if (!count_chars && !count_complicated) @@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus) have_read_stdin = true; if (O_BINARY && ! isatty (STDIN_FILENO)) xfreopen (NULL, "rb", stdin); - return wc (STDIN_FILENO, file, fstatus); + return wc (STDIN_FILENO, file, fstatus, -1); } else { @@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus) } else { - bool ok = wc (fd, file, fstatus); + bool ok = wc (fd, file, fstatus, 0); if (close (fd) != 0) { error (0, errno, "%s", file); diff --git a/tests/misc/head-c.sh b/tests/misc/head-c.sh index d6433d0..807e965 100755 --- a/tests/misc/head-c.sh +++ b/tests/misc/head-c.sh @@ -42,4 +42,16 @@ esac # based on the value passed to -c (ulimit -v 20000; head --bytes=-$SSIZE_MAX < /dev/null) || fail=1 +# Make sure it works on funny files in /proc and /sys. + +for file in /proc/cpuinfo /sys/kernel/profiling; do + if test -r $file; then + cp -f $file copy && + head -c -1 copy > exp1 || framework_failure_ + + head -c -1 $file > out1 || fail=1 + compare exp1 out1 || fail=1 + fi +done + Exit $fail diff --git a/tests/misc/od-j.sh b/tests/misc/od-j.sh new file mode 100755 index 0000000..a40a99f --- /dev/null +++ b/tests/misc/od-j.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# Verify that 'od -j N' skips N bytes of input. + +# Copyright 2014 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ od + +for file in ${srcdir=.}/tests/init.sh /proc/version /sys/kernel/profiling; do + test -r $file || continue + + cp -f $file copy && + bytes=$(wc -c < copy) || framework_failure_ + + od -An $file > exp || fail=1 + od -An -j $bytes $file $file > out || fail=1 + compare out exp || fail=1 + + od -An -j 4096 copy copy > exp1 2> experr1; expstatus=$? + od -An -j 4096 $file $file > out1 2> err1; status=$? + test $status -eq $expstatus || fail=1 + compare out1 exp1 || fail=1 + compare err1 experr1 || fail=1 +done + +Exit $fail diff --git a/tests/misc/tac-2-nonseekable.sh b/tests/misc/tac-2-nonseekable.sh index c27694c..a4a35ab 100755 --- a/tests/misc/tac-2-nonseekable.sh +++ b/tests/misc/tac-2-nonseekable.sh @@ -1,5 +1,5 @@ #!/bin/sh -# ensure that tac works with two or more non-seekable inputs +# ensure that tac works with non-seekable or quasi-seekable inputs # Copyright (C) 2011-2014 Free Software Foundation, Inc. @@ -24,4 +24,16 @@ echo x > exp || fail=1 compare exp out || fail=1 compare /dev/null err || fail=1 +# Make sure it works on funny files in /proc and /sys. + +for file in /proc/version /sys/kernel/profiling; do + if test -r $file; then + cp -f $file copy && + tac copy > exp1 || framework_failure_ + + tac $file > out1 || fail=1 + compare exp1 out1 || fail=1 + fi +done + Exit $fail diff --git a/tests/misc/wc-proc.sh b/tests/misc/wc-proc.sh new file mode 100755 index 0000000..828160d --- /dev/null +++ b/tests/misc/wc-proc.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# Test wc on /proc and /sys files. + +# Copyright 2014 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ wc + +for file in /proc/version /sys/kernel/profiling; do + if test -r $file; then + cp -f $file copy && + wc -c < copy > exp1 || framework_failure_ + + wc -c < $file > out1 || fail=1 + compare exp1 out1 || fail=1 + fi +done + +Exit $fail diff --git a/tests/split/b-chunk.sh b/tests/split/b-chunk.sh index 86f95a0..fefa090 100755 --- a/tests/split/b-chunk.sh +++ b/tests/split/b-chunk.sh @@ -31,20 +31,29 @@ stat x?? 2>/dev/null && fail=1 printf '1\n2\n3\n4\n5\n' > in || framework_failure_ -split -n 3 in > out || fail=1 -split -n 1/3 in > b1 || fail=1 -split -n 2/3 in > b2 || fail=1 -split -n 3/3 in > b3 || fail=1 -printf '1\n2' > exp-1 -printf '\n3\n' > exp-2 -printf '4\n5\n' > exp-3 - -compare exp-1 xaa || fail=1 -compare exp-2 xab || fail=1 -compare exp-3 xac || fail=1 -compare exp-1 b1 || fail=1 -compare exp-2 b2 || fail=1 -compare exp-3 b3 || fail=1 -test -f xad && fail=1 +for file in in /proc/version /sys/kernel/profiling; do + split -n 3 $file > out || fail=1 + split -n 1/3 $file > b1 || fail=1 + split -n 2/3 $file > b2 || fail=1 + split -n 3/3 $file > b3 || fail=1 + + case $file in + in) + printf '1\n2' > exp-1 + printf '\n3\n' > exp-2 + printf '4\n5\n' > exp-3 + + compare exp-1 xaa || fail=1 + compare exp-2 xab || fail=1 + compare exp-3 xac || fail=1 + ;; + esac + + compare xaa b1 || fail=1 + compare xab b2 || fail=1 + compare xac b3 || fail=1 + cat xaa xab xac | compare - $file || fail=1 + test -f xad && fail=1 +done Exit $fail diff --git a/tests/tail-2/tail-c.sh b/tests/tail-2/tail-c.sh new file mode 100755 index 0000000..cdbaa46 --- /dev/null +++ b/tests/tail-2/tail-c.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# exercise tail -c + +# Copyright 2014 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ tail +require_ulimit_v_ + +# Make sure it works on funny files in /proc and /sys. + +for file in /proc/version /sys/kernel/profiling; do + if test -r $file; then + cp -f $file copy && + tail -c -1 copy > exp1 || framework_failure_ + + tail -c -1 $file > out1 || fail=1 + compare exp1 out1 || fail=1 + fi +done + +Exit $fail -- 1.7.12.1