# /* * U N I Q . C * * Read a file, writing unique (or non-unique) lines. * */ /*)BUILD $(TKBOPTIONS) = { TASK = ...UNI } */ #ifdef DOCUMENTATION title uniq Print Unique Lines in a File index Print Unique Lines in a File synopsis uniq [-options] [-fields] [+letters] [ input [output] ] description Uniq reads a sorted input file, writing each unique line. The following options are defined: .lm +8 .s.i -8;-u Only print unique lines. .s.i -8;-d Only print duplicate lines. .s.i -8;-c Print the number of times each line occurred along with the line. .s.i -8;-N Skip over the first N words before checking for uniqueness. .s.i -8;+N Skip over the first N letters (in the indicated field). Note that fields are skipped before letters. .s.i -8;N Compare only N letters. .s.lm -8 A word is defined as "optional spaces or tabs" followed by text up to the first space, tab, or end of line. .s If the output file is not specified, uniq will write to the standard output. If the input file is not specified, uniq will read from the standard input. .s For an on-line help message, execute: .s uniq ? diagnostics .lm +8 .s.i -8;Can't open input file "name" .s.i -8;Can't open output file "name" .lm -8 author Martin Minow bugs It would be nice if you could output all instances of duplicated lines -- especially if you skip fields. #endif char *documentation[] = { "Uniq reads an input file, writing each unique line.", "Usage: uniq [-Mode] [-N_fields] [+N_letters] [infile [outfile]]", "", "Where:", "", " -u Only print unique lines.", " -d Only print duplicate lines.", " -c Print the number of times each line occurred along with the line.", " -z Print the count (as in -c) with leading zero's.", " -N Skip over the first N words before checking for uniqueness", " +N Skip over the first N letters (in the indicated field)", " N Compare only N letters after skipping", "", "A word is defined as \"optional spaces or tabs\" followed by text up to", "the first space, tab, or end of line.", "", "If no file names are given, input and output are stdin and stdout", "", #ifdef vms "To use on vms (native mode), define \"uniq :== $disk:[account]uniq\",", "", #endif 0 }; #include #include #define EOS 0 #define FALSE 0 #define TRUE 1 #define BUFSIZE 1024 /* Buffer size (max. line) */ int skip_fields = 0; /* Number of fields to skip */ int skip_letters = 0; /* Number of letters to skip */ int check_letters = 0; /* Number of letters to test */ int linecount; /* How many repetitions */ int countmode = 0; /* Counted output */ int mode = 0; /* Mode byte, if any */ int line1[BUFSIZE]; /* Input buffer 1 */ int line2[BUFSIZE]; /* Input buffer 2 */ FILE *infd; /* Input file */ FILE *outfd; /* Output file */ main(argc, argv) int argc; /* Number of arguments */ char *argv[]; /* Argument buffer pointer */ { register char *argp; /* Argument pointer */ register char c; /* Temp character */ register char *lp; /* Line buffer pointer */ char *getline(); char *check(); infd = stdin; /* Assume no in/out files */ outfd = stdout; if (argc <= 1 || argv[1][0] == '?') { help(); exit(); } while (argc > 1 && (c = *(argp = argv[1])) == '-' || c == '+' || isdigit(c)) { ++argp; switch (c) { case '+': skip_letters = atoi(argp); break; case '-': if ((c = *argp) >= '0' && c <= '9') skip_fields = atoi(argp); else { c = tolower(c); if (c == 'c' || c == 'z') { countmode = c; } else { mode = c; } } break; default: check_letters = atoi(&argp[-1]); break; } argc--; argv++; } if (argc > 1) { if ((infd = fopen(argv[1], "r")) == NULL) { printf("?Can't open input file \"%s\"\n", argv[1]); exit(1); } argc--; argv++; } if (argc > 1) { if ((outfd = fopen(argv[1], "w")) == NULL) { printf("?Can't open output file \"%s\"\n", argv[1]); exit(1); } } /* * Here we go */ if ((lp = getline(line2)) == 0) { /* Prime the pump */ fclose(infd); fclose(outfd); exit(); } for (;;) { lp = check(line1, line2, lp); lp = check(line2, line1, lp); } } char *check(new, old, oldpos) char *new; /* New line read here */ char *old; /* Old line resides here */ char *oldpos; /* Start of field in old line */ /* * Read lines as long as new == old. Return a pointer to the field to * test in new. Exit the program on end of file. */ { register char *lp; /* Random line pointer */ char *getline(); linecount = 0; for (;;) { linecount++; if ((lp = getline(new)) == 0) { output(old); fclose(infd); fclose(outfd); exit(); } if (!equals(oldpos, lp)) break; } output(old); return(lp); } equals(old, new) char *old; /* Compare this field */ char *new; /* Against this field */ /* * Return zero if they don't match. If they do, return 1. */ { if (check_letters) { #ifdef unix return(strcmpn(old, new, check_letters) == 0); #else return(strncmp(old, new, check_letters) == 0); #endif } else return(strcmp(old, new) == 0); } output(line) char *line; /* What to output */ /* * Output this line. */ { switch (mode) { case 'u': if (linecount > 1) return; break; case 'd': if (linecount > 1) break; return; } if (countmode == 'c') fprintf(outfd, "%7d\t", linecount); else if (countmode == 'z') fprintf(outfd, "%07d\t", linecount); fprintf(outfd, "%s", line); } char *getline(line) char *line; /* Buffer to read into */ /* * Read a line. return 0 on end of file. If not end of file, return * a pointer to the first byte of the field to check. */ { register int count; register char c; register char *lp; if (fgets(line, BUFSIZE, infd) == NULL) return(0); lp = line; for (count = 0; count++ < skip_fields;) { while ((c = *lp) == ' ' || c == '\t') lp++; while ((c = *lp) != ' ' && c != '\t') { if (c == 0) return(lp); else lp++; } } for (count = 0; count++ < skip_letters; lp++) { if (*lp == 0) break; } return(lp); } help() /* * Give good help */ { register char **dp; for (dp = documentation; *dp; dp++) printf("%s\n", *dp); }