/* * D I F F */ /*)BUILD $(TKBOPTIONS) = { TASK = ...DIF } */ #ifdef DOCUMENTATION title diff Differential File Comparison index Differential File Comparison synopsis diff [option] file1 file2 description Diff compares two files, showing what must be changed to make them identical. Either file1 or file2 (but not both) may refer to directories. If that is the case, a file in the directory whose name is the same as the other file argument will be used. The standard input may be used for one of the files by replacing the argument by "-". Except for the standard input, both files must be on disk devices. .s Options: .lm +8 .s.i -8;-b Remove trailing whitespace (blanks and tabs) and compress all other strings of whitespace to a single blank. .s.i -8;-c Print some context -- matching lines before and after the non-match section. Mark non-matched sections with "|". .s.i -8;-i Ignore lower/upper case distinctions. .s.i -8;-e Output is in an "editor script" format which is compatible with the Unix 'ed' editor. .s.lm -8 All information needed to compare the files is maintained in main memory. This means that very large files (or fairly large files with many differences) will cause the program to abort with an "out of space" message. Main memory requirements (in words) are approximately: .s 2 * (length of file1 + length of file2) .br + 3 * (number of changes) .s (Where "length" is the number of lines of data in each file.) .s The algorithm reads each file twice, once to build hash tables and once to check for fortuitous matches (two lines that are in fact different, but which have the same hash value). CPU time requirements include sorting the hash tables and randomly searching memory tables for equivalence classes. For example, on a time-shared VAX-11/780, comparing two 1000 line files required about 30 seconds (elapsed clock time) and about 10,000 bytes of working storage. About 90 per-cent of the time was taken up by file i/o. diagnostics .lm +8 .s.i -8;Warning, bad option 'x' .s The option is ignored. .s.i -8;Usage ... .s Two input files were not specified. .s.i -8;Can't open input file "filename". Can't continue. .s.i -8;Out of space .s The program ran out of memory while comparing the two files. .s.i -8;Can't read line nnn at xxx in file[A/B] .s This indicates an I/O error when seeking to the specific line. It should not happen. .s.i -8;Spurious match, output is not optimal. .s Two lines that were different yielded the same hash value. This is harmless except that the difference output is not the minimum set of differences between the two files. For example, instead of the output: .s lines 1 to 5 were changed to ... .s the program will print .s lines 1 to 3 were changed to ... .br lines 4 to 5 were changed to ... .s The program uses a CRC16 hash code. The likelihood of this error is quite small. .lm -8 author The diff algorithm was developed by J. W. Hunt and M. D. McIlroy, using a central algorithm defined by H. S. Stone. It was published in: .s.lm +4.nf Hunt, J. W., and McIlroy, M. D., An Algorithm for Differential File Comparison, Computing Science Technical Report #41, Bell Laboratories, Murray Hill, NJ 07974 .s.lm -4.f bugs On RSX systems, diff may fail if the both files are not "variable-length, implied carriage control" format. The scopy program can be used to convert files to this format. This may happen if one of the files has been copied to a VMS V3.0 system from a system, such as RSTS/E, that supports "stream format" files. #endif /* * Diff maintains all information needed to compare the two files in main * memory. This means that very large files (or fairly large files with * many differences) will cause the program to abort with an "out of space" * error. Main memory requirements (in words) are approximately: * * 2 * (length of file1 + length of file2) + (3 * number of changes) * * The diff algorithm reads each file twice (once to build hash tables and * a second time to check for fortuitous matches), then reads the differences * by seeking randomly within the files. CPU time requirements include * sorting the two hash vectors and randomly searching memory tables for * equivalence classes. For example, running in Vax compatibility * mode, two 1000 line files with a fair number of differences took * about 25 seconds (elapsed wall clock time) for processing. Most of this * time was spent in the file read routines. This test required slightly * more than 6000 words of memory for internal tables. * * The diff algorithm was developed by J. W. Hunt and M. D. McIlroy, * using a central algorithm defined by H. S. Stone. The algorithm * was described in: * * Hunt, J. W., and McIlroy, M. D., * An Algorithm for Differential File Comparison, * Computing Science Technical Report #41, * Bell Laboratories, Murray Hill, NJ 07974 * * The following description is summarized from that document. While * it has been slightly modified to correspond to the program source, the * algorithm is essentially identical. * * 1. Read the input files, building two vectors containing the * line number (serial) and hash value (hash) of each line. * Data for fileA will be in a vector pointed to by fileA[], * while data for fileB will be pointed to by fileB[]. The * lengths (number of lines) of the files will be represented * by lenA and lenB respectively. [This is slightly different * from the published algorithm.] * * 2. Note initial and final sequences that have identical * hash values to shorten subsequent processing. Note that * the "jackpot" phase (step 9.) will examine all lines in * the file. Next, sort each file using hash as the primary * key and serial as the secondary key. * * 3. Construct an array of equivalence classes (member[1..lenB]) * where each element contains the line number in fileB and a * flag which is True if the indicated line is the first member * of an equivalence class. (An equivalence class is a set of * lines which all hash to the same value. The lines themselves * are not necessarily identical.) * * 4. Construct an array, class[1..lenA], where each element, I, is set to * the index of a line, J, in fileB if member[J] is the first * element in an equivalence class and the hash code of line[I] in * fileA is the same as the hash code of line[J] in fileB. Class[I] * is set to zero if no such line exists. * * If non-zero, class[I] now points in member[] to the beginning of * the class of lines in fileB equivalent to line[I] in fileA. * * The next two steps implement the longest common subsequence algorithm. * * 5. Structure CANDIDATE { a, b, previous }, where a and b are line * numbers and previous a reference to a candidate, will store * candidate lists as they are constructed. * * Vector clist[] stores references to candidates. It is dimensioned * (0..min(lenA, lenB) + 1) * * Initialize * clist[0] = CANDIDATE { 0, 0, -1 }; * clist[1] = CANDIDATE { A+1, B+1, -1 }; * ktop = 1; * * clist[1] is a fence beyond the last usefully filled element * and -1 is an out-of-range clist index. Ktop is the index of the * fence. Note, because of the memory allocation used, clist[] * is actually composed of two vectors, clist[] containing the * candidate reference, and klist[] containing pointers to clist. * * 6. For (A = 1 to lenA) { * I = class[A]; -- the index in member[]: beginning of * -- the class of lines in fileB equivalent * -- to this line in fileA. * if (I is non-zero) { * Merge each member into the candidate list * as discussed below. * } * * Unravel the chain of candidates, getting a vector of common subsequences: * * 7. Set all elements of match[0..lenA] to zero. * * 8. clist[ktop-1] points to the subsequence chain head. For each element * in the chain, let A and B be the line number entries. Then, set * * match[A] = B; * * The non-zero elements of match[] now pick out a longest common * subsequence chain, possibly including spurious matches due to * hash coincidences. The pairings between the two files are: * * if (match[A] is non-zero) { * line A in fileA matches line match[A] in fileB; * } * * Now, read each line of fileA and fileB to check for jackpots: * * 9. for (A = 1 to lenA) { * if (match[A] is nonzero) { * if (fileA[A] is not identical to fileB[match[A]]) * match[A] = 0; -- Hash congruence * } * } * * Ignoring "squish" complications, the merge step may be defined as follows: * * Entry: * clist[] Candidate pointer array * ktop Fence beyond last filled index * A Current index in fileA * member[] Equivalence vector * I The index in member[] of the first element * of the class of lines in fileB that are * equivalent to line[A] in fileA. * * 1. Let clist[R] be "an r-candidate" and C a reference to * the last candidate found, which will always be an r-candidate. * clist[R] will be updated with this reference once the previous * value of clist[R] is no longer needed. Initialize: * * R = 0; * C = clist[0]; * * 2. Do steps 3 through 6 repeatedly: * * 3. set B to the line number in member[I]; * search clist[R..ktop] for an element, clist[S], such that * * clist[S-1].b < B and clist[S].b >= B * * Note that clist[] is ordered on clist[].b so that binary * search will work. The search algorithm used requires the * two "fence" entries described above. * * If such an element is found, perform steps 4. and 5. * * 4. Extend the longest common subsequence chain: * * If (clist[S].b > B) { * clist[R] = C; * R = S; * C = candidate(A, B, clist[S - 1]); * } * * 5. Extend the number of subsequences, moving the fence: * * If (S == ktop) { * clist[ktop + 1] = clist[ktop] * ktop = ktop + 1; * break out of step 2's loop; * } * * 6. I = I + 1; * if (member[I] is the first element in another class) { * break out of step 2's loop; * } * else { * continue at step 2. * } * * 7. clist[R] = C; * exit merge subroutine. * * To illustrate vector contents, here is a sample run: * * File A: * line 1 * line 2 * line 3 * line 4 * line 5 gets deleted * line 6 * * File B: * line 1 * line 1.5 inserted * line 2 * line 3 changed * line 4 * line 6 * * (For clarity, the "squish" step is omitted from the following) * * On entry to equiv() (after readin and sorting), the file[] vector is * as follows (the first entry in each pair is the line number, the * second is the hash value. Entries are sorted on hash value): * * FileA[] (1..lines in fileA): * line hash * 3 042400 6 043300 5 050026 1 102201 2 102701 4 103501 * FileB[] (1..lines in fileB): * 6 043300 2 045600 1 102201 3 102701 5 103501 4 147166 * * * After Equiv has processed file[]: * * FileA[] (1..lines in fileA): * line value * 3 0 6 1 5 0 1 3 2 4 4 5 * Member[] (0..lines in fileB) * 0 -6 -2 -1 -3 -5 -4 * * * After unsort() has unwound fileB: * * Class[] (1 .. lines in fileA): * 3 4 0 5 0 1 * * Within unravel(), match is built in the following order: * * match[6] := 6 * match[4] := 5 * match[2] := 3 * match[1] := 1 * * Match[] (0 .. lines in fileA): * * 0 1 3 0 5 0 6 * * Output is as follows: * * 1a2 * > line 1.5 inserted * 3c4 * < line 3 * --- * > line 3 changed * 5d5 * < line 5 gets deleted * * */ #include #include #define EOS 0 #define TEMPFILE "diff.tmp" #define TRUE 1 #define FALSE 0 #ifdef pdp11 #define short int #endif extern long ftell(); typedef struct candidate { int b; /* Line in fileB */ int a; /* Line in fileA */ int link; /* Previous candidate */ } CANDIDATE; typedef struct line { unsigned short hash; /* Hash value etc. */ short serial; /* Line number */ } LINE; LINE *file[2]; /* Hash/line for total file */ #define fileA file[0] #define fileB file[1] LINE *sfile[2]; /* Hash/line after prefix */ #define sfileA sfile[0] #define sfileB sfile[1] int len[2]; /* Actual lines in each file */ #define lenA len[0] #define lenB len[1] int slen[2]; /* Squished lengths */ #define slenA slen[0] #define slenB slen[1] int prefix; /* Identical lines at start */ int suffix; /* Identical lenes at end */ FILE *infd[2] = { NULL, NULL }; /* Input file identifiers */ FILE *tempfd; /* Temp for input redirection */ /* * The following vectors overlay the area defined by fileA */ int *class; /* Unsorted line numbers */ int *klist; /* Index of element in clist */ CANDIDATE *clist; /* Storage pool for candidates */ int clength = 0; /* Number of active candidates */ int *match; /* Longest subsequence */ long *oldseek; /* Seek position in file A */ /* * The following vectors overlay the area defined by fileB */ int *member; /* Concatenated equiv. classes */ long *newseek; /* Seek position in file B */ char *textb; /* Input from file2 for check */ /* * Global variables */ int eflag = FALSE; /* Edit script requested */ int bflag = FALSE; /* Blank supress requested */ int cflag = FALSE; /* Context printout */ int iflag = FALSE; /* Ignore case requested */ char text[257]; /* Input line from file1 */ #ifndef vms char *free_space; /* For storage allocation */ #endif extern char *myalloc(); /* Storage allocator */ extern char *compact(); /* Storage compactor */ #ifdef DEBUG #define TIMING #endif #ifdef TIMING extern long time(); extern char *$$mend; long totaltime; long sectiontime; char *mstart; #endif main(argc, argv) int argc; char **argv; /* * Diff main program */ { register int i; register char *ap; #ifdef TIMING sectiontime = time(&totaltime); #endif while (argc > 1 && *(ap = argv[1]) == '-' && *++ap != EOS) { while (*ap != EOS) { switch (tolower(*ap++)) { case 'b': bflag++; break; case 'c': cflag++; break; case 'e': eflag++; break; case 'i': iflag++; break; default: fprintf(stderr, "Warning, bad option '%c'\n", ap[-1]); break; } } argc--; argv++; } if (argc != 3) error("Usage: diff [-options] file1 file2"); if (cflag && eflag) { fprintf(stderr, "Warning, -c and -e are incompatible, -c supressed.\n"); cflag = FALSE; } argv++; for (i = 0; i <= 1; i++) { if (argv[i][0] == '-' && argv[i][1] == EOS) { infd[i] = stdin; if ((tempfd = fopen(TEMPFILE, "w")) == NULL) cant(TEMPFILE, "work", 1); } else { infd[i] = fopen(argv[i], "r"); } } if (infd[0] == NULL && infd[1] == NULL) { cant(argv[0], "input", 0); cant(argv[1], "input", 1); } else if (infd[1] == NULL) opendir(1, &argv[1], infd[0]); else if (infd[0] == NULL) opendir(0, &argv[0], infd[1]); #ifndef vms free_space = malloc(1); #endif /* * Read input, building hash tables. */ input(0); input(1); squish(); #ifdef DEBUG printf("before sort\n"); for (i = 1; i <= slenA; i++) printf("sfileA[%d] = %6d %06o\n", i, sfileA[i].serial, sfileA[i].hash); for (i = 1; i <= slenB; i++) printf("sfileB[%d] = %6d %06o\n", i, sfileB[i].serial, sfileB[i].hash); #endif sort(sfileA, slenA); sort(sfileB, slenB); #ifdef TIMING ptime("input"); #endif #ifdef DEBUG printf("after sort\n"); for (i = 1; i <= slenA; i++) printf("sfileA[%d] = %6d %06o\n", i, sfileA[i].serial, sfileB[i].hash); for (i = 1; i <= slenB; i++) printf("sfileB[%d] = %6d %06o\n", i, sfileB[i].serial, sfileB[i].hash); #endif /* * Build equivalence classes. */ member = (int *)fileB; equiv(); member = (int *)compact((char *)member, (slenB + 2) * sizeof (int), "squeezing member vector"); /* * Reorder equivalence classes into array class[] */ class = (int *)fileA; unsort(); class = (int *)compact((char *)class, (slenA + 2) * sizeof (int), "compacting class vector"); #ifdef TIMING ptime("equiv/unsort"); #endif /* * Find longest subsequences */ klist = (int *)myalloc((slenA + 2) * sizeof (int), "klist"); clist = (CANDIDATE *)myalloc(sizeof (CANDIDATE), "clist"); i = subseq(); free((char *)member); free((char *)class); match = (int *)myalloc((lenA + 2) * sizeof (int), "match"); unravel(klist[i]); free(clist); free(klist); #ifdef TIMING ptime("subsequence/unravel"); #endif /* * Check for fortuitous matches and output differences */ oldseek = myalloc((lenA + 2) * sizeof (* oldseek), "oldseek"); newseek = myalloc((lenB + 2) * sizeof (* newseek), "newseek"); textb = myalloc(sizeof text, "textbuffer"); if (check(argv[0], argv[1])) fprintf(stderr, "Spurious match, output is not optimal\n"); #ifdef TIMING ptime("check"); #endif output(); #ifdef TIMING ptime("output"); printf("%ld seconds required\n", sectiontime - totaltime); #endif if (tempfd != NULL) { fclose(tempfd); delete(TEMPFILE); } } input(which) int which; /* 0 or 1 to redefine infd[] */ /* * Read the file, building hash table */ { register LINE *lentry; register int linect; register short hashval; FILE *fd; unsigned short hash(); linect = 0; lentry = (LINE *)myalloc(sizeof(LINE) * 3, "line"); fd = infd[which]; while (!getline(fd, text)) { lentry = (LINE *)compact((char *)lentry, (++linect + 3) * sizeof(LINE), "extending line vector"); lentry[linect].hash = hash(text); } /* * If input was from stdin ("-" command), finish off the temp file. */ if (fd == stdin) { fclose(tempfd); tempfd = infd[which] = fopen(TEMPFILE, "r"); } len[which] = linect; file[which] = lentry; } squish() /* * Look for initial and trailing sequences that have identical hash values. * Don't bother building them into the candidate vector. */ { register int i; register LINE *ap; register LINE *bp; int j; int k; /* * prefix -> first line (from start) that doesn't hash identically */ i = 0; ap = &fileA[1]; bp = &fileB[1]; while (i < lenA && i < lenB && ap->hash == bp->hash) { i++; ap++; bp++; } prefix = i; /* * suffix -> first line (from end) that doesn't hash identically */ j = lenA - i; k = lenB - i; ap = &fileA[lenA]; bp = &fileB[lenB]; i = 0; while (i < j && i < k && ap->hash == bp->hash) { i++; ap--; bp--; } suffix = i; /* * Tuck the counts away */ for (k = 0; k <= 1; k++) { sfile[k] = file[k] + prefix; j = slen[k] = len[k] - prefix - suffix; for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) { ap->serial = i; } } } sort(vector, vecsize) LINE *vector; /* What to sort */ int vecsize; /* How many to sort */ /* * Sort hash entries */ { register int j; register LINE *aim; register LINE *ai; int mid; int k; LINE work; for (j = 1; j <= vecsize; j *= 2); mid = (j - 1); while ((mid /= 2) != 0) { k = vecsize - mid; for (j = 1; j <= k; j++) { for (ai = &vector[j]; ai > vector; ai -= mid) { aim = &ai[mid]; if (aim < ai) break; /* ?? Why ?? */ if (aim->hash > ai->hash || aim->hash == ai->hash && aim->serial > ai->serial) break; work.hash = ai->hash; ai->hash = aim->hash; aim->hash = work.hash; work.serial = ai->serial; ai->serial = aim->serial; aim->serial = work.serial; } } } } equiv() /* * Build equivalence class vector */ { register LINE *ap; register union { LINE *bp; int *mp; } r; register int j; LINE *atop; #ifdef DEBUG printf("equiv entry\n"); for (j = 1; j <= slenA; j++) printf("sfileA[%d] = %6d %06o\n", j, sfileA[j].serial, sfileA[j].hash); for (j = 1; j <= slenB; j++) printf("sfileB[%d] = %6d %06o\n", j, sfileB[j].serial, sfileB[j].hash); #endif j = 1; ap = &sfileA[1]; r.bp = &sfileB[1]; atop = &sfileA[slenA]; while (ap <= atop && j <= slenB) { if (ap->hash < r.bp->hash) { ap->hash = 0; ap++; } else if (ap->hash == r.bp->hash) { ap->hash = j; ap++; } else { r.bp++; j++; } } while (ap <= atop) { ap->hash = 0; ap++; } sfileB[slenB + 1].hash = 0; #ifdef DEBUG printf("equiv exit\n"); for (j = 1; j <= slenA; j++) printf("sfileA[%d] = %6d %06o\n", j, sfileA[j].serial, sfileA[j].hash); for (j = 1; j <= slenB; j++) printf("sfileB[%d] = %6d %06o\n", j, sfileB[j].serial, sfileB[j].hash); #endif ap = &sfileB[0]; atop = &sfileB[slenB]; r.mp = &member[0]; while (++ap <= atop) { r.mp++; *r.mp = -(ap->serial); while (ap[1].hash == ap->hash) { ap++; r.mp++; *r.mp = ap->serial; } } r.mp[1] = -1; #ifdef DEBUG for (j = 0; j <= slenB; j++) printf("member[%d] = %d\n", j, member[j]); #endif } unsort() /* * Build class vector */ { register int *temp; register int *tp; register union { LINE *ap; int *cp; } u; LINE *evec; int *eclass; #ifdef DEBUG int i; #endif temp = (int *)myalloc((slenA + 1) * sizeof(int), "unsort scratch"); u.ap = &sfileA[1]; evec = &sfileA[slenA]; while (u.ap <= evec) { #ifdef DEBUG printf("temp[%2d] := %06o\n", u.ap->serial, u.ap->hash); #endif temp[u.ap->serial] = u.ap->hash; u.ap++; } /* * Copy into class vector and free work space */ u.cp = &class[1]; eclass = &class[slenA]; tp = &temp[1]; while (u.cp <= eclass) *u.cp++ = *tp++; free((char *) temp); #ifdef DEBUG printf("unsort exit\n"); for (i = 1; i <= slenA; i++) printf("class[%d] = %d %06o\n", i, class[i], class[i]); #endif } subseq() /* * Generate maximum common subsequence chain in clist[] */ { int a; register int ktop; register int b; register int s; int r; int i; int cand; klist[0] = newcand(0, 0, -1); klist[1] = newcand(slenA + 1, slenB + 1, -1); ktop = 1; /* -> guard entry */ for (a = 1; a <= slenA; a++) { /* * For each non-zero element in fileA ... */ if ((i = class[a]) == 0) continue; cand = klist[0]; /* No candidate now */ r = 0; /* Current r-candidate */ do { #ifdef DEBUG printf("a = %d, i = %d, b = %d\n", a, i, member[i]); #endif /* * Perform the merge algorithm */ if ((b = member[i]) < 0) b = -b; #ifdef DEBUG printf("search(%d, %d, %d) -> %d\n", r, ktop, b, search(r, ktop, b)); #endif if ((s = search(r, ktop, b)) != 0) { if (clist[klist[s]].b > b) { klist[r] = cand; r = s; cand = newcand(a, b, klist[s-1]); #ifdef DEBUG dumpklist(ktop, "klist[s-1]->b > b"); #endif } if (s >= ktop) { klist[ktop + 1] = klist[ktop]; ktop++; #ifdef DEBUG klist[r] = cand; dumpklist(ktop, "extend"); #endif break; } } } while (member[++i] > 0); klist[r] = cand; } #ifdef DEBUG printf("last entry = %d\n", ktop - 1); #endif return(ktop - 1); /* Last entry found */ } int newcand(a, b, pred) int a; /* Line in fileA */ int b; /* Line in fileB */ int pred; /* Link to predecessor, index in cand[] */ { register CANDIDATE *new; clength++; clist = (CANDIDATE *)compact((char *)clist, clength * sizeof (CANDIDATE), "extending clist"); new = &clist[clength - 1]; new->a = a; new->b = b; new->link = pred; return(clength - 1); } search(low, high, b) register int low; int high; int b; /* * Search klist[low..top] (inclusive) for b. If klist[low]->b >= b, * return zero. Else return s such that klist[s-1]->b < b and * klist[s]->b >= b. Note that the algorithm presupposes the two * preset "fence" elements, (0, 0) and (slenA, slenB). */ { register int temp; register int mid; if (clist[klist[low]].b >= b) return(0); while ((mid = (low + high) / 2) > low) { if ((temp = clist[klist[mid]].b) > b) high = mid; else if (temp < b) low = mid; else { return(mid); } } return(mid + 1); } unravel(k) register int k; { register int i; register CANDIDATE *cp; int first_trailer; int difference; first_trailer = lenA - suffix; difference = lenB - lenA; #ifdef DEBUG printf("first trailer = %d, difference = %d\n", first_trailer, difference); #endif for (i = 0; i <= lenA; i++) { match[i] = (i <= prefix) ? i : (i > first_trailer) ? i + difference : 0; } #ifdef DEBUG printf("unravel\n"); #endif while (k != -1) { cp = &clist[k]; #ifdef DEBUG if (k < 0 || k >= clength) error("Illegal link -> %d", k); printf("match[%d] := %d\n", cp->a + prefix, cp->b + prefix); #endif match[cp->a + prefix] = cp->b + prefix; k = cp->link; } } check(fileAname, fileBname) char *fileAname; char *fileBname; /* * Check for hash matches (jackpots) and collect random access indices to * the two files. */ { register int a; /* Current line in file A */ register int b; /* Current line in file B */ int jackpot; b = 1; rewind(infd[0]); rewind(infd[1]); oldseek[0] = ftell(infd[0]); newseek[0] = ftell(infd[1]); jackpot = 0; #ifdef DEBUG printf("match vector\n"); for (a = 0; a <= lenA; a++) printf("match[%d] = %d\n", a, match[a]); #endif for (a = 1; a <= lenA; a++) { if (match[a] == 0) { getline(infd[0], text); oldseek[a] = ftell(infd[0]); continue; } while (b < match[a]) { getline(infd[1], textb); newseek[b] = ftell(infd[1]); b++; } getline(infd[0], text); getline(infd[1], textb); if (!streq(text, textb)) { fprintf(stderr, "Spurious match:\n"); fprintf(stderr, "line %d in %s, \"%s\"\n", a, fileAname, text); fprintf(stderr, "line %d in %s, \"%s\"\n", b, fileBname, textb); match[a] = 0; jackpot++; } oldseek[a] = ftell(infd[0]); newseek[b] = ftell(infd[1]); b++; } for (; b <= lenB; b++) { getline(infd[1], textb); newseek[b] = ftell(infd[1]); } return(jackpot); } output() { register int astart; register int aend; int bstart; register int bend; rewind(infd[0]); rewind(infd[1]); match[0] = 0; match[lenA+1] = lenB + 1; if (!eflag) { /* * Normal printout */ for (astart = 1; astart <= lenA; astart = aend + 1) { /* * New subsequence, skip over matching stuff */ while (astart <= lenA && match[astart] == (match[astart - 1] + 1)) astart++; /* * Found a difference, setup range and print it */ bstart = match[astart - 1] + 1; aend = astart - 1; while (aend < lenA && match[aend + 1] == 0) aend++; bend = match[aend + 1] - 1; match[aend] = bend; change(astart, aend, bstart, bend); } } else { /* * Edit script output -- differences are output "backwards" * for the benefit of a line-oriented editor. */ for (aend = lenA; aend >= 1; aend = astart - 1) { while (aend >= 1 && match[aend] == (match[aend + 1] - 1) && match[aend] != 0) aend--; bend = match[aend + 1] - 1; astart = aend + 1; while (astart > 1 && match[astart - 1] == 0) astart--; bstart = match[astart - 1] + 1; match[astart] = bstart; change(astart, aend, bstart, bend); } } if (lenA == 0) change(1, 0, 1, lenB); } change(astart, aend, bstart, bend) int astart; int aend; int bstart; int bend; /* * Output a change entry: fileA[astart..aend] changed to fileB[bstart..bend] */ { /* * This catches a "dummy" last entry */ if (astart > aend && bstart > bend) return; range(astart, aend); putchar((astart > aend) ? 'a' : (bstart > bend) ? 'd' : 'c'); if (!eflag) range(bstart, bend); putchar('\n'); if (!eflag) { fetch(oldseek, astart, aend, lenA, infd[0], "< "); if (astart <= aend && bstart <= bend) printf("---\n"); } fetch(newseek, bstart, bend, lenB, infd[1], (!eflag) ? "> " : ""); if (eflag && bstart <= bend) printf(".\n"); } range(from, to) int from; int to; /* * Print a range */ { printf("%d", (to > from) ? from : to); if (to >= from) printf(",%d", to); } fetch(seekvec, start, end, trueend, fd, pfx) long *seekvec; register int start; register int end; int trueend; FILE *fd; char *pfx; /* * Print the appropriate text */ { register int i; int first; int last; first = last = FALSE; if (cflag) { if (start > end) return; if (start > 1) { start--; first++; } if (end < trueend-1) { end++; last++; } } if (fseek(fd, seekvec[start - 1], 0) != 0) { printf("?Can't read line %d at %08lx (hex) in file%c\n", start, seekvec[start - 1], (fd == infd[0]) ? 'A' : 'B'); } else { for (i = start; i <= end; i++) { if (fgetss(text, sizeof text, fd) == NULL) { printf("** Unexpected end of file\n"); break; } #ifdef DEBUG printf("%5d: %s%s\n", i, pfx, text); #else if (cflag) { if (first || (last && i == end)) { putchar(' '); first = FALSE; } else putchar('|'); } else printf("%s", pfx); printf("%s\n", text); #endif } } } getline(fd, buffer) FILE *fd; char *buffer; /* * Input routine, read one line to buffer[], return TRUE on eof, else FALSE. * The terminating newline is always removed. If "-b" was given, trailing * whitespace (blanks and tabs) are removed and strings of blanks and * tabs are replaced by a single blank. Getline() does all hacking for * redirected input files. */ { register char *top; register char *fromp; register char c; if (fgetss(buffer, sizeof text, fd) == NULL) { *buffer = EOS; return(TRUE); } if (fd == stdin) fputss(buffer, tempfd); if (bflag || iflag) { top = buffer; fromp = buffer; while ((c = *fromp++) != EOS) { if (bflag && (c == ' ' || c == '\t')) { c = ' '; while (*fromp == ' ' || *fromp == '\t') fromp++; } if (iflag) c = tolower(c); *top++ = c; } if (bflag && top[-1] == ' ') top--; *top = EOS; } return(FALSE); } static unsigned short crc16a[] = { 0000000, 0140301, 0140601, 0000500, 0141401, 0001700, 0001200, 0141101, 0143001, 0003300, 0003600, 0143501, 0002400, 0142701, 0142201, 0002100, }; static unsigned short crc16b[] = { 0000000, 0146001, 0154001, 0012000, 0170001, 0036000, 0024000, 0162001, 0120001, 0066000, 0074000, 0132001, 0050000, 0116001, 0104001, 0043000, }; unsigned short hash(buffer) char *buffer; /* * Return the CRC16 hash code for the buffer * Algorithm from Stu Wecker (Digital memo 130-959-002-00). */ { register unsigned short crc; register char *tp; register short temp; crc = 0; for (tp = buffer; *tp != EOS;) { temp = *tp++ ^ crc; /* XOR crc with new char */ crc = (crc >> 8) ^ crc16a[(temp & 0017)] ^ crc16b[(temp & 0360) >> 4]; } #ifdef DEBUG_ALL printf("%06o: %s\n", (crc == 0) ? 1 : crc, buffer); #endif return((crc == 0) ? 1 : crc); } opendir(which, arg, okfd) int which; /* Which file to open (0 or 1) */ char **arg; /* File name argument, &argv[which] */ FILE *okfd; /* File name (already open) */ { register char *tp; register int c; register char *newname; fgetname(okfd, text); /* * Skip over device name */ for (tp = text; (c = *tp) && c != ':'; tp++); if (c) tp++; else tp = text; /* * Skip over [UIC] or [PPN] if present */ if (*tp == '[' || *tp == '(') { while ((c = *tp++) && c != ']' && c != ')'); if (c == 0) { fprintf(stderr, "?Bug: bad file name \"%s\"\n", text); tp--; } } cpystr(text, tp); /* * Don't include version */ for (tp = text; (c = *tp) && c != ';'; tp++); *tp = 0; /* * Now, text has the file name, tp - text, its length, * and *arg the (possible) directory name. Create a new * file name for opening. */ if ((newname = malloc(tp - text + strlen(*arg) + 1)) == NULL) error("Out of space at start"); concat(newname, *arg, text, NULL); if ((infd[which] = fopen(newname, "r")) == NULL) cant(*arg, "constructed input", 1); else *arg = newname; } char * myalloc(amount, why) int amount; char *why; /* * Allocate or crash. */ { register char *pointer; if ((pointer = malloc((unsigned) amount)) == NULL) noroom(why); return (pointer); } char * compact(pointer, new_amount, why) char *pointer; int new_amount; char *why; /* * Reallocate pointer, compacting storage */ { register *new_pointer; extern char *realloc(); /* * This routine is heavily dependent on C storage allocator hacks */ #ifndef vms free(pointer); /* Do not change */ free(free_space); /* The order of this code. */ free_space = malloc(1); /* This code doesn't work on */ #endif if ((new_pointer = /* Vax-11 C */ realloc(pointer, (unsigned) new_amount)) == NULL) noroom(why); #ifdef DEBUG if (new_pointer != pointer) { fprintf(stderr, "moved from %06o to %06o\n", pointer, new_pointer); } /* rdump(new_pointer, why); */ #endif return (new_pointer); } noroom(why) char *why; { fprintf(stderr, "?DIFF-F-out of room when %s\n", why); exit(-1); } #ifdef DEBUG rdump(pointer, why) int *pointer; char *why; /* * Dump memory block */ { int *last; int count; last = (int **)pointer[-1]; fprintf(stderr, "dump %s of %06o -> %06o, %d words", why, pointer, last, last - pointer); last = (int *)(((int) last) & ~1); for (count = 0; pointer < last; ++count) { if ((count & 07) == 0) { fprintf(stderr, "\n%06o", pointer); } fprintf(stderr, "\t%06o", *pointer); pointer++; } fprintf(stderr, "\n"); } #endif cant(filename, what, fatalflag) char *filename; char *what; int fatalflag; /* * Can't open file message */ { fprintf(stderr, "Can't open %s file \"%s\"\n", what, filename); if (fatalflag) error("Can't continue"); } #ifdef DEBUG dump(d_linep, d_len, d_which) LINE *d_linep; { register int i; printf("Dump of file%c, %d elements\n", "AB"[d_which], d_len); printf("linep @ %06o\n", d_linep); for (i = 0; i <= d_len; i++) { printf("%3d %6d %06o\n", i, d_linep[i].serial, d_linep[i].hash); } } dumpklist(kmax, why) int kmax; char *why; /* * Dump klist */ { register int i; register CANDIDATE *cp; register int count; printf("\nklist[0..%d] %s, clength = %d\n", kmax, why, clength); for (i = 0; i <= kmax; i++) { cp = &clist[klist[i]]; printf("%2d %2d", i, klist[i]); if (cp >= &clist[0] && cp < &clist[clength]) printf(" (%2d %2d -> %2d)\n", cp->a, cp->b, cp->link); else if (klist[i] == -1) printf(" End of chain\n"); else printf(" illegal klist element\n"); } for (i = 0; i <= kmax; i++) { count = -1; for (cp = klist[i]; cp > &clist[0]; cp = &cp->link) { if (++count >= 6) { printf("\n "); count = 0; } printf(" (%2d: %2d,%2d -> %d)", cp-clist, cp->a, cp->b, cp->link); } printf("\n"); } printf("*\n"); } #endif #ifdef TIMING ptime(why) char *why; /* * Dump time buffer */ { long ttemp; ttemp = time(NULL); printf("%ld seconds for %s\n", ttemp - sectiontime, why); sectiontime = ttemp; } #endif