/* * U N I Q . C * * Read a file, writing unique (or non-unique) lines. * */ /*)BUILD $(TKBOPTIONS) = { TASK = ...UNI } */ #ifdef DOCUMENTATION title uniq Print Unique Lines in a File index Print Unique Lines in a File synopsis uniq [-options] [-fields] [+letters] [limit] [input [output]] description Uniq reads a sorted input file, writing each unique line. The following options are defined: .lm +8 .s.i -8;-a Print all copies of duplicated lines; implies -d. .s.i -8;-u Only print unique lines. .s.i -8;-d Only print duplicate lines. .s.i -8;-c Print the number of times each line occurred along with the line. .s.i -8;-N Skip over the first N words before checking for uniqueness. .s.i -8;+N Skip over the first N letters (in the indicated field). Note that fields are skipped before letters. .s.i -8;N Compare only N letters. .s.lm -8 A word is defined as "optional spaces or tabs" followed by text up to the first space, tab, or end of line. .s If the output file is not specified, uniq will write to the standard output. If the input file is not specified, uniq will read from the standard input. .s For an on-line help message (written to the standard output), execute: .s uniq ? diagnostics .lm +8 .s.i -8;Can't open input file "name" .s.i -8;Can't open output file "name" .lm -8 author Martin Minow bugs Because of the way the syntax of the command line is defined, you can't use an input file whose first character is a digit - the code will attempt to interpret it as a limit. #endif /* * Edit history * 0.0 ??-???-?? MM Creation * 1.0 24-May-85 JSL Added VMS redirection code, -a switch, simple * usage message. Bugfix: If there are no args * nor switches, don't signal an error - just do * a uniq from stdin to stdout! */ char *documentation[] = { "Uniq reads an input file, writing each unique line.", "Usage: uniq [-Mode] [-N_fields] [+N_letters] [N_limit] [infile [outfile]]", "", "Where:", "", " -a Print all copies of duplicated lines; implies -d.", " -u Only print unique lines.", " -d Only print duplicate lines.", " -c Print the number of times each line occurred along with the line.", " -z Print the count (as in -c) with leading zero's.", " -N Skip over the first N words before checking for uniqueness", " +N Skip over the first N letters (in the indicated field)", " N Compare only N letters after skipping", "", "A word is defined as \"optional spaces or tabs\" followed by text up to", "the first space, tab, or end of line.", "", "If no file names are given, input and output are stdin and stdout.", "", 0 }; #include #include #ifdef vms #include #include #define IO_SUCCESS (SS$_NORMAL | STS$M_INHIB_MSG) #define IO_ERROR SS$_ABORT #endif /* * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file */ #ifndef IO_SUCCESS #define IO_SUCCESS 0 #endif #ifndef IO_ERROR #define IO_ERROR 1 #endif #define EOS 0 #define FALSE 0 #define TRUE 1 #define BUFSIZE 1024 /* Buffer size (max. line) */ int skip_fields = 0; /* Number of fields to skip */ int skip_letters = 0; /* Number of letters to skip */ int check_letters = 0; /* Number of letters to test */ int linecount; /* How many repetitions */ int countmode = 0; /* Counted output */ int mode = 0; /* Mode byte, if any */ int allmode = 0; /* Display all lines as read */ int line1[BUFSIZE]; /* Input buffer 1 */ int line2[BUFSIZE]; /* Input buffer 2 */ FILE *infd; /* Input file */ FILE *outfd; /* Output file */ main(argc, argv) int argc; /* Number of arguments */ char *argv[]; /* Argument buffer pointer */ { register char *argp; /* Argument pointer */ register char c; /* Temp character */ register char *lp; /* Line buffer pointer */ char *getline(); char *check(); #ifdef vms argc = getredirection(argc,argv); #endif infd = stdin; /* Assume no in/out files */ outfd = stdout; if (argc > 1 && argv[1][0] == '?') { help(); exit(IO_ERROR); } while (argc > 1 && (c = *(argp = argv[1])) == '-' || c == '+' || isdigit(c)) { ++argp; switch (c) { case '+': skip_letters = atoi(argp); break; case '-': if ((c = *argp) >= '0' && c <= '9') skip_fields = atoi(argp); else { switch (c = tolower(c)) { case 'c': case 'z': countmode = c; break; case 'u': allmode = 0; /* * Fall through... */ case 'd': mode = c; break; case 'a': allmode++; mode = 'd'; break; default: usage(); exit(IO_ERROR); } } break; default: check_letters = atoi(&argp[-1]); break; } argc--; argv++; } if (argc > 1) { if ((infd = fopen(argv[1], "r")) == NULL) { perror(argv[1]); printf("?Can't open input file \"%s\"\n", argv[1]); exit(IO_ERROR); } argc--; argv++; } if (argc > 1) { if ((outfd = fopen(argv[1], "w")) == NULL) { perror(argv[1]); printf("?Can't open output file \"%s\"\n", argv[1]); exit(IO_ERROR); } } /* * Here we go */ if ((lp = getline(line2)) == 0) { /* Prime the pump */ fclose(infd); fclose(outfd); exit(IO_SUCCESS); } for (;;) { lp = check(line1, line2, lp); lp = check(line2, line1, lp); } } char *check(new, old, oldpos) char *new; /* New line read here */ char *old; /* Old line resides here */ char *oldpos; /* Start of field in old line */ /* * Read lines as long as new == old. Return a pointer to the field to * test in new. Exit the program on end of file. */ { register char *lp; /* Random line pointer */ char *getline(); linecount = 0; for (;;) { if (allmode) { if (linecount == 1) output(old); if (linecount++ >= 1) output(new); } else linecount++; if ((lp = getline(new)) == 0) { if (!allmode) output(old); fclose(infd); fclose(outfd); exit(IO_SUCCESS); } if (!equals(oldpos, lp)) break; } if (!allmode) output(old); return(lp); } equals(old, new) char *old; /* Compare this field */ char *new; /* Against this field */ /* * Return zero if they don't match. If they do, return 1. */ { if (check_letters) { #ifdef unix return(strcmpn(old, new, check_letters) == 0); #else return(strncmp(old, new, check_letters) == 0); #endif } else return(strcmp(old, new) == 0); } output(line) char *line; /* What to output */ /* * Output this line. */ { switch (mode) { case 'u': if (linecount > 1) return; break; case 'd': if (linecount > 1 || allmode) break; return; } if (countmode == 'c') fprintf(outfd, "%7d\t", linecount); else if (countmode == 'z') fprintf(outfd, "%07d\t", linecount); fprintf(outfd, "%s", line); } char *getline(line) char *line; /* Buffer to read into */ /* * Read a line. return 0 on end of file. If not end of file, return * a pointer to the first byte of the field to check. */ { register int count; register char c; register char *lp; if (fgets(line, BUFSIZE, infd) == NULL) return(0); lp = line; for (count = 0; count++ < skip_fields;) { while ((c = *lp) == ' ' || c == '\t') lp++; while ((c = *lp) != ' ' && c != '\t') { if (c == 0) return(lp); else lp++; } } for (count = 0; count++ < skip_letters; lp++) { if (*lp == 0) break; } return(lp); } usage() /* * Give simple-minded help */ { fprintf(stderr, "Usage: uniq [-audc] [-] [+] [] [input [output]]\n" ); } help() /* * Give good help */ { register char **dp; for (dp = documentation; *dp; dp++) printf("%s\n", *dp); }