cut: fix "-s" flag to omit blank lines

Using cut with the delimiter flag ("-d") with the "-s" flag to only
output lines containing the delimiter will print blank lines. This is
deviant behavior from cut provided by GNU Coreutils. Blank lines should
be omitted if "-s" is used with "-d".

This change introduces a somewhat naiive, yet efficient solution, where
line length is checked before looping though bytes. If line length is
zero and the "-s" flag is used, the code will jump to parsing the next
line to avoid printing a newline character.

function                                             old     new   delta
cut_main                                            1196    1185     -11

Signed-off-by: Colin McAllister <colinmca242@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Colin McAllister 2024-07-17 16:33:35 -05:00 committed by Denys Vlasenko
parent 87e60dcf0f
commit 50e2b59370
2 changed files with 24 additions and 5 deletions

View file

@ -152,11 +152,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
unsigned uu = 0, start = 0, end = 0, out = 0;
int dcount = 0;
/* Blank line? Check -s (later check for -s does not catch empty lines) */
if (linelen == 0) {
if (option_mask32 & CUT_OPT_SUPPRESS_FLGS)
goto next_line;
}
/* Loop through bytes, finding next delimiter */
for (;;) {
/* End of current range? */
if (end == linelen || dcount > cut_lists[cl_pos].endpos) {
if (++cl_pos >= nlists) break;
if (++cl_pos >= nlists)
break;
if (option_mask32 & CUT_OPT_NOSORT_FLGS)
start = dcount = uu = 0;
end = 0;
@ -175,15 +182,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
if (shoe) {
regmatch_t rr = {-1, -1};
if (!regexec(&reg, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
if (!regexec(&reg, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
end = uu + rr.rm_so;
uu += rr.rm_eo;
} else {
uu = linelen;
continue;
}
} else if (line[end = uu++] != *delim)
continue;
} else {
end = uu++;
if (line[end] != *delim)
continue;
}
/* Got delimiter. Loop if not yet within range. */
if (dcount++ < cut_lists[cl_pos].startpos) {
@ -192,7 +202,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
}
}
if (end != start || !shoe)
printf("%s%.*s", out++ ? odelim : "", end-start, line + start);
printf("%s%.*s", out++ ? odelim : "", end - start, line + start);
start = uu;
if (!dcount)
break;

View file

@ -65,6 +65,15 @@ testing "cut with -d -f( ) -s" "cut -d' ' -f3 -s input && echo yes" "yes\n" "$in
testing "cut with -d -f(a) -s" "cut -da -f3 -s input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
testing "cut with -d -f(a) -s -n" "cut -da -f3 -s -n input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
input="\
foo bar baz
bing bong boop
"
testing "cut with -d -s omits blank lines" "cut -d' ' -f2 -s input" "bar\nbong\n" "$input" ""
# substitute for awk
optional FEATURE_CUT_REGEX
testing "cut -DF" "cut -DF 2,7,5" \