%%% ==================================================================== %%% @@C-Web-file{ %%% author = "Robert M. Solovay", %%% version = "1.04", %%% date = "15 March 1993", %%% time = "16:58:07 MST", %%% filename = "checksum.w", %%% address = "Department of Mathematics %%% University of California %%% Berkeley, CA %%% USA %%% Tel: (415) 642-2252", %%% checksum = "43506 1194 6727 43473", %%% email = "solovay@@math.berkeley.edu (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "checksum", %%% supported = "yes", %%% docstring = "The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by checksum %%% itself.", %%% } %%% ==================================================================== %%% Revision history (reverse time order): %%% %%% 1.04 [15-Mar-1993] %%% Add support for Apple Macintosh. %%% %%% 1.03 [25-Jun-1992] %%% Add support for VAX VMS C run-time library deficiency. %%% %%% 1.02 [11-Sep-1991] %%% Add missing initialization of answer_from_critical; this bug %%% was discovered when garbage output was produced from an empty %%% input file. %%% %%% Correct two ">From" strings to "from" (mangled by UNIX mailer). %%% %%% Shorten two long lines to <= 80 characters. %%% %%% 1.01 [06-Sep-1991] %%% Minor wording changes. %%% %%% Add EXIT_SUCCESS and EXIT_FAILURE, and use in all exit() %%% calls. Make successful checksum installation or verification %%% return EXIT_SUCCESS, and all other conditions, EXIT_FAILURE, %%% so shell commands can test execution status. %%% %%% Add missing initialization of verify_successful. %%% %%% 1.00 [04-Mar-1991] %%% Original version. %%% %%% ==================================================================== @* The checksum program. \medskip Copyright 1991 Robert Solovay -- University of California, Berkeley. Email: solovay@@math.berkeley.edu \medskip Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. This software is provided ``as is'' without express or implied warranty. \medskip This program will compute a 16-bit cyclic redundancy checksum (CRC) for a file as well as a count of the words, lines and characters. If called from the command line with: {\tt \%checksum inputfile outputfile} {\parindent=0pt the program will compute a 16-bit checksum for the file {\sl inputfile} and write a copy of this file with the checksum installed to the file {\sl outputfile}.} If the checksum has previously been installed in the input file, and the input file has not been corrupted since then, the output file will be identical to the input file. If called with the command line: {\tt \%checksum -v file} {\parindent=0pt where {\sl file} is a file with an ``installed'' checksum, the program will proceed to verify whether the checksum is correct and report its conclusions to |stdout|.} %Here is the checksum for this web file: %checksum = "02100 1108 6383 40704" @ More precisely, the program will search for the first line of {\sl inputfile} which contains the word {\sl checksum} in lowercase and no other alphabetic characters. We refer to this line as the ``critical line'' of the file. If the critical line contains no quotation marks, then the file {\sl outputfile} will be created which is a copy of {\sl inputfile} except that the critical line will be replaced by a line where the word {\tt checksum} is replaced by {\tt checksum = "xxxxx lc wc cc"}. Here {\sl lc} is the number of lines in the file {\sl outfile}, written in decimal. Similarly, {\sl wc} is the word count of {\sl outfile} and {\sl cc} is the character count (when the end of line character is taken to be the single character \.{\\n}.) It is difficult to arrange that a file contains its own checksum. Instead, the field {\sl xxxxx} contains the checksum, written in decimal in a five-digit field (with possible leading 0's) of the file obtained from {\sl outfile} by replacing the field containing the checksum by the string |"ZZZZZ"|. If the critical line already contains after the word ``checksum'' precisely two quotation marks, and the first is the last character of the four-character string |" = \""|, then the material between the two quotes will be deleted and replaced by a checksum and three counts as described above. The program is designed to be usable as a UNIX filter; that is, it can take its input from |stdin| or send its output to |stdout|. The user can indicate the choce of |stdin| or |stdout| either by omitting the relevant file-name from the command line or by using the single character |'-'|. (If the command given is: {\tt \%checksum file} {\parindent=0pt the program will interpret |file| as the name of the input file, and will send the output to |stdout|.)} @* The skeleton. Here is the main module of the program. @c @@/ @@/ @@/ @@/ @ @ We have to include the standard I/O definitions, as well as some other standard header files. Until all C implementations reach the level of the 1989 ANSI C standard, we have to be careful about which files we try to include. The GNU C compiler defines the symbol |__STDC__|, but is commonly run with native pre-ANSI header files. @f CheckSum int @= #include #include #if defined(__STDC__) && !defined(__GNUC__) #include #include #else #ifndef SEEK_SET #define SEEK_SET 0 #endif #endif #include #ifdef THINK_C #include #endif #ifndef EXIT_SUCCESS #define EXIT_SUCCESS 0 #endif #ifndef EXIT_FAILURE #define EXIT_FAILURE 1 #endif #if defined(__STDC__)||defined(__GNUC__)||defined(__TURBOC__)||defined(MSDOS) #define ARGS(plist) plist #else #define ARGS(plist) () #endif CheckSum add_a_byte_to_checksum ARGS((unsigned int new_byte,CheckSum oldCheckSum));@/ int critical ARGS((char *s)); int main ARGS((int argc,char * *argv));@/ CheckSum merge_three_checksums ARGS((CheckSum s,CheckSum t,CheckSum u)); CheckSum MultiplyTwoPolys ARGS((CheckSum u,CheckSum v));@/ unsigned long int word_count ARGS((char *s)); @ Now we come to the general layout of the |main| function. The |status| variable communicates to the operating system if the run was successful or not, and |prog_name| is used in case there's an error message to be printed. @d OK 0 @d usage_error 1 @d cannot_open_read_file 2 @d cannot_open_write_file 4 @d error_reading_input_file 8 @d cannot_open_temporary_file 16 @= int status=OK; /* exit status of command */ char *prog_name; @ @= int main (argc,argv) int argc; char **argv; { @@/ #ifdef THINK_C argc = ccommand( & argv ); #endif prog_name=argv[0]; @@/ @@/ @@/ @@/ if (mode == COMPUTING){@}@/ @@/ if (status != 0) exit(EXIT_FAILURE); else exit(verify_successful ? EXIT_SUCCESS : EXIT_FAILURE); return(0); } @* Parsing the command line. We will have a variable |mode| which is set to indicate whether we are ``computing'' or ``verifying'' the checksum. @d VERIFY 0 @d COMPUTING 1 @= int mode = COMPUTING; @ The mode will be |VERIFY| iff the variable |argc| is $\geq 2$ and the second ``argument'' is |"-v"|. In any case we should have |argc| $\leq 3$. We update |argc| and |argv| to reflect the arguments already ``consumed''. @= if(argc > 3){@} else if ( (argc >= 2) && !(strcmp(argv[1], "-v"))) {mode = VERIFY; argc -= 2; argv += 2;} else{argc--;argv++;} @* The main loop. Our main loop reads a line at a time from the input file. It looks for the ``critical line'' which first contains the word ``checksum'' (and no other alphabetic characters). Suppose first that we are in |VERIFY| mode. As the program reads each line, it updates its various counts (|lc|, |wc|, and |cc|) as well as computing the checksum. When we encounter the critical line, we have to record its predictions as to our counts and checksum, to compare with those that are computed directly from the file. In addition, we have to modify the line (as discussed above) replacing the checksum by the string |"ZZZZZ"| before using the line in our checksum calculations. Our processing of the input file will end when we reach an end of file. At that point, we compare the predicted counts and checksums with those that are recorded on the critical line, and make a report to |stdout| as to whether or not the verification was successful. @ Next we describe the main loop when we are in |COMPUTING| mode. As we read each line, we see if it is the critical line or not. The first phase of our operations consists of the processing of those lines before the critical line. We update our counts and checksum. In addition, we write each line as we read it out to the output file. When we reach the critical line, we store it in a special string buffer |critical_line|. We also record our place in the input file, so that we will be able on our second pass to write the remainder of the input file to the output file. (We must modify the critical line before writing it to the output file so as to contain the counts and checksum. We will only be able to compute these at the end of the first pass through the input file.) The place in the input file that we record will be directly after the critical line. After processing the remainder of the file, we will be able to correctly compute the version of the critical line to write to the output file, and we will be able to write the remainder of the input file to the output file. @ We want to have a special type which is the kind of variable used for accumulating checksums. The reason is that if we wish to change to a 32-bit checksum, it is useful to have just one definition to change. @d SIZE 16 @= typedef unsigned int CheckSum; @^Checksum size dependencies@> @ Here are the basic variables needed to accumulate the various counts, etc. The reasons for the mysterious initializations of the checksum variables will become apparent below when we discuss the ``theory'' of our device for doing the checksum calculation in one pass through the input file. @= unsigned long int lc = 0; /*line count*/ unsigned long int wc = 0; /* word count*/ unsigned long int cc = 0; /*character count*/ int current_line_is_critical; /* This flag is set to 1 if the current line is the critical line*/ int found_critical_line = 0; /*This flag is set to 1 after the critical line is located. */ CheckSum chksum = 0, auxchksum = 0, auxchksum2 = 1; long int loc_critical_line; /*This stores the location needed to rewind to the start of the line after the critical line in the input file*/ int answer_from_critical = 0; char current_line[257]; char critical_line[257]; @ Here we read in the next line. If the pointer returned by |fgets| is null, it might signal an |EOF| or it might be an error condition. We use the |feof| facility to determine which. If it is an error condition, we signal the error and abort the program. (If not, we are done reading the input file, and the module \.{} will take over.) If the critical line has not yet been encountered, we must first check if the current line is the critical line. If it is, we set two flags. The first tells us that the critical line has indeed been found. The second tells us that the current line is indeed the critical line. (This second flag must be set to 0 before we test for the critical line.) @= while( fgets(current_line,257,input_file) != NULL)@/ { current_line_is_critical = 0; /*Reset this flag.*/ if (found_critical_line == 0)@@/ if(current_line_is_critical){@}@/ @@/ @@/ @@/ }@/ if (!feof(input_file))@@/ @ In order to check whether or not a line is critical, we use an auxiliary function |critical|. This function returns a positive integer if the line is critical and $0$ if it isn't. The input to |critical| is a pointer to the string that contains the current line. If the line is critical, the function will set the value of the global variable |loc_chksum| to indicate the position of the word |"checksum"| in the critical line. Then it will search the portion of the string after the word |"checksum"| for an occurrence of the string |" = \""|. If it doesn't find this string, it sets the return value of |critical| to 1 and exits. If it does, it records the position of the quote character of the string in the global variable |loc_first_quote_mark|. It then proceeds to look in the critical line for a quote character occurring after this string. If it doesn't find this, the function returns 2; if it does, it stores its location in the global variable |loc_second_quote_mark| and returns the value 3. @= int loc_chksum, loc_first_quote_mark, loc_second_quote_mark; @ @= int critical(s) char *s; { @@/ @@/ } @ The variable |i| is a state variable which takes a value in the range $0 \leq i \leq 3$. The precise meaning of the different states will be explained below. The variable $j$ holds our current place in the ``input string'' |s|. Thus we usually refer to |s[j]| in the code that follows. The array of characters |chksm| holds the string |"checksum"|. The variable $k$ holds our place in this string. The variable $jj$ holds the location of the first character of the string |"checksum"| in the ``input string''. @= int i = 0; int j = 0; char *chksm = "checksum"; int k = 0; int jj; @ We first determine if the string we are inspecting is the critical line. If it is not, we bail out and return 0. If it is we set the global variable |loc_chksum| and continue. As we read through the string |s| during this first phase, we are in one of four states: We are in state $i=0$ until we have spotted the first alphabetic character. We are in state $i=1$ if we know definitely that the string we are looking at is not critical. We are in state $i=2$ while we are checking that the part of the string we are now looking at is an occurrence of the word ``checksum''. After we have seen the occurrence of the word ``checksum'' we are in state 3. @= while ((i != 1) && (s[j] != 0)) switch(i) { case 0:if(isalpha(s[j])){ i = 2; jj = j;} else j++; break; case 2:if(s[j] == chksm[k]) {k++;j++; if (k == 8)i = 3;} else i = 1; break; case 3: if(isalpha(s[j])) i = 1; else j++; break;} /*end switch*/ if (i !=3) return(0); loc_chksum = jj; @ We next proceed to look for the string |" = \""|. We search for the final quote character of this string. If we find it, we check if the preceding characters match up too. In this part of the code, we use |i| as a flag which we set to one when we find the pattern we are looking for. @= j = jj + 8; i = 0;@/ while (s[j] != 0) { if (s[j] != '"') {j++; continue;} if( (s[j-1] == ' ') && (s[j-2] == '=') && (s[j-3] == ' ')) {loc_first_quote_mark = j; i = 1; break;} else j++;} if(i==0) return(1); @ In this part of the code, we are looking for the second ``quote mark''. @= j++; i = 0; while(s[j] != 0) { if(s[j] == '"'){i=1; loc_second_quote_mark = j; break;} j++;} if(i==0) return(2); else return(3); @ We stash the answer from the function |critical| away for future use. Note that once the answer is positive, we will never call this function again (because the flag |found_critical_line| will be set. @= if( answer_from_critical=critical(current_line)) current_line_is_critical = found_critical_line = 1; @ The flag |input_from_pipe| is set to $1$ if we are getting our input from |stdin|. In that case, it is not possible to save our location in the input file. Instead, we will store the remainder of the input file to a temporary file and read that during the second pass. @= {strcpy(critical_line,current_line); if((mode == COMPUTING) && !input_from_pipe) {@}@/ else if (mode == VERIFY) {@}@/ } @ @= int i; @ @= for(i = 1; i<6;i++) current_line[loc_first_quote_mark + i] = 'Z'; @ @= loc_critical_line = ftell(input_file); @ We write the current line to the output file if it is before the critical line. If our input is from |stdin|, then we are unable to rewind the ``input file'' for the second pass. We therefore copy out the remainder of the input file to a temporary file. @= if(mode == COMPUTING) {if (!found_critical_line) fputs(current_line,output_file); else if( (input_from_pipe) && !(current_line_is_critical)) fputs(current_line,temp_file);} @ This section updates the various counts. The word count needs a ``home-grown'' function |word_count| which we write next. If we are in |COMPUTING| mode, we postpone the calculations for the ``critical line''; they will be handled in the ``end-game'' procedures below. @= if(!(current_line_is_critical) || (mode == VERIFY)) { lc++; cc += strlen(current_line); wc += word_count(current_line); } @ @= unsigned long int word_count(s) char *s;@/ {int i ; int in_word = 0; int word_cnt = 0; char c; for(i = 0; (s[i] != 0) && (s[i] != '\n'); i++) { c = s[i]; if (c > ' ' && c < 0177) {/*visible ASCII codes */ if(!in_word) {word_cnt++; in_word++;} continue; } if (c != ' ' && c != '\t') continue; in_word = 0; } return( (unsigned long int)word_cnt); } @ Our checksum can be computed ``a byte at a time''. Thus if the sequence of bytes in the file is $s_0, s_1, \ldots, s_r$ (where the $s_i$'s are all of type |unsigned char|, then the checksum will be the value $t_{r+1}$ where the $t_i$'s are computed inductively via the equations: $t_0 = 0$; $t_{j+1} = |add_a_byte_to_checksum(@t$s_j,t_j$@>)|$. However, we are faced with the following problem. We don't know the version of the ``critical line'' to be written to the output file until we have read the whole file. It turns out that we can compute two auxiliary checksums for the portion of the file that comes after the critical line; when we reach the end of the file, we can compute the output file version of the critical line, and then the checksum for the portion of the file up to and including the critical line. Then there is a simple procedure (to be explained below) which will compute the checksum for the whole output file from these three partial checksums. @= if ( mode == VERIFY ||!found_critical_line) {@}@/ else if(!current_line_is_critical) {@} @ @= for(i=0;current_line[i] != 0;i++)@/ chksum = add_a_byte_to_checksum( (unsigned int)current_line[i],chksum); @ @= for(i=0;current_line[i] != 0;i++)@/ { auxchksum = add_a_byte_to_checksum( (unsigned int)current_line[i],auxchksum); auxchksum2 = add_a_byte_to_checksum( (unsigned int)current_line[i],auxchksum2); } @ We here simply rewind the input file to the place directly after we discovered the critical line and write all the following lines to the output file. (If we are receiving our input from |stdin|, then we ``rewind'' the temporary file and make that the input file.) The revised version of the critical line has already been written to the output file by the ``end-game'' portion of the program. @= if (!input_from_pipe) fseek(input_file,loc_critical_line,SEEK_SET); else { rewind(temp_file); fclose(input_file); input_file = temp_file; } while(fgets(current_line,257,input_file) != NULL) fputs(current_line,output_file); if (!feof(input_file)) @@/ @* Endgame procedures. @= if(mode == COMPUTING){@}@/ else {@} @ @= @@/ @@/ @@/ @@/ @@/ @@/ @@/ @ A major part of the work of the |COMPUTING| endgame procedures is in assembling the string which will be the ``critical line'' in the output file. For this we need a bunch of auxiliary string buffers. We can use the space that was previously used to hold the ``current line'' and just use a pointer into this space for one of our temporary buffers. We will also have a smaller temporary buffer |temp_buf2| which is used to assemble various ``count fields'' for inclusion in the new version of the critical line. @= char *temp_buf; char temp_buf2[17]; @ We will now split the critical line into two parts. Precisely how we do this will depend on the value of |answer_from_critical|. But after this is done, the beginning part of the line will be the buffer |critical_line|, ending in the four character string |" = \""|. The portion of the line that will follow the ``second quote'' will be in the buffer |temp_buf|. We don't actually delete the right portion of the ``critical line'' from the buffer |critical_line|. Instead, we simply store a null character at the appropriate place. @= temp_buf = current_line; switch (answer_from_critical) { @ The first case to consider is when |answer_from_critical| $=0$. This case ``can't happen'' so we print an appropriate error message and exit the program. @= case 0:{@}@/ break; @ For the second case, the line does not contain the pattern |" = \""| after the word |"checksum"|, We will put the portion of the critical line after the word |"checksum"| in the buffer |temp_buf|, and place the string |" = \""| after the end of the word |"checksum"| followed by a null character. @= char *quote_pattern = " = \""; @ @= case 1:strcpy(temp_buf,critical_line + (loc_chksum + 8)); critical_line[loc_chksum + 8] = 0; strcat(critical_line,quote_pattern); loc_first_quote_mark = loc_chksum + 11; break; @ The next case is when we can find the pattern |" = \""| in the critical line but not a following ``quote''. In that case, we split at the quote mark of the pattern |" = \""|. @= case 2:strcpy(temp_buf,critical_line + (loc_first_quote_mark +1)); critical_line[loc_first_quote_mark + 1] = 0; break; @ In the final case, we can find both the pattern |" = \""| and a following ``quote-mark''. We store the portion of the critical line after the second ``quote-mark'' in |temp_buf| and put a null character in the buffer |critical_line| after the first ``quote-mark''. This has the effect of erasing the text that is between the two ``quote-marks'' in the input file. It will be replaced by the new checksum and counts in the later portions of the program. @= case 3:strcpy(temp_buf,critical_line + (loc_second_quote_mark +1)); critical_line[loc_first_quote_mark +1] = 0; break;} /* end of switch*/ @ We will next insert the string |"ZZZZZ "| as part of the new ``critical line'' we are building. Later we will have to replace the string |"ZZZZZ"| by the correct checksum. @= char *Zstring = "ZZZZZ "; char *one_space = " "; char *quote = "\""; @ @= strcat(critical_line, Zstring); @ The installation of the final line count is easy. We have to increment the line count by 1 for the ``critical line''. @= lc++; sprintf(temp_buf2,"%lu",lc); strcat(critical_line,temp_buf2); strcat(critical_line,one_space); @ Computing the final word count is slightly more involved. We will add two dummy fields to the critical line and then tack on the text in |temp_buf|. We will take the ``word count'' of this line and add it to |wc|. Then we erase the material we have added from the line, but add in the new word count much as we added in the line count in the preceding module. We need to keep track of our position in the line, wo we define a variable in which to stash it. @= int loc_in_line; char *dummy_field_pattern = "B C\""; @ @= loc_in_line = strlen(critical_line); strcat(critical_line,dummy_field_pattern); strcat(critical_line,temp_buf); wc += word_count(critical_line); critical_line[loc_in_line] = 0; sprintf(temp_buf2,"%lu",wc); strcat(critical_line,temp_buf2); strcat(critical_line,one_space); @ The computation of the final character count involves an iterative procedure. The problem is that the total character count includes the number of characters in the character-count itself. A simple analysis shows the process converges in at most three iterations. (The 1 in the computation of |cc_guess| comes from the length of the string |"\""|.) At the end of this module, we splice back the second half of the critical line which we stashed in |temp_buf|. @= int char_field_length = 0; int old_char_field_length; unsigned long int cc_guess; @ @= cc += strlen(temp_buf) + strlen(critical_line); do {cc_guess = cc + char_field_length + 1; sprintf(temp_buf2,"%lu",cc_guess); old_char_field_length = char_field_length; char_field_length = strlen(temp_buf2);} while(old_char_field_length != char_field_length); strcat(critical_line,temp_buf2); strcat(critical_line,quote); strcat(critical_line,temp_buf); @ The function |merge_three_checksums| does the computation of the final checksum from the three partial checksums alluded to above. @= for(i=0;critical_line[i] != 0; i++)@/ chksum = add_a_byte_to_checksum((unsigned int)critical_line[i],chksum);@/ chksum = merge_three_checksums(chksum,auxchksum,auxchksum2); sprintf(temp_buf2,"%05u",chksum); strncpy(critical_line + (loc_first_quote_mark + 1),temp_buf2,5); @ @= fputs(critical_line,output_file); @ The verify ``endgame procedures'' consist in comparing the various computed counts and the checksum with those stored in the critical lines. If everything checks out, we write a ``success'' message to |stdout|; if not we write a ``failure'' message. We need some variables to store the fields extracted from the critical line. We also need a pointer to use in extracting these fields. @= unsigned long int stored_wc = 0; /* stored word count*/ unsigned long int stored_cc = 0; /*stored character count*/ unsigned long int stored_lc = 0; /*stored line count*/ CheckSum stored_chksum; int verify_successful = 1; #if vms char *format = "%d %ld %ld %ld"; /* VMS C sscanf() doesn't support u item. */ /* none of these numbers overflow into the */ /* sign bit on the VAX, so use %d instead */ #else char *format = "%u %lu %lu %lu"; #endif char *position_in_line; @ The first command positions the pointer |position_in_line| at the start of the |chksum| field. @= if(!found_critical_line) {printf("The input file did not have the correct format.\n"); exit(EXIT_FAILURE);} position_in_line = critical_line + loc_first_quote_mark + 1; if ( 4 != sscanf(position_in_line,format, &stored_chksum, &stored_lc, &stored_wc, &stored_cc)) {printf("The input file did not have the correct format.\n"); exit(EXIT_FAILURE);} verify_successful = (chksum == stored_chksum && wc == stored_wc && lc == stored_lc && cc == stored_cc); if(verify_successful) printf("The checksum verification of the input file was successful.\n"); else printf("The checksum verification of the input file did not succeed.\n"); @* CRC checksums. In computing the checksum of the file {\sl outputfile} as discussed above, we run into the following problem. We will only know the various counts (such as the word count) after reading the whole file. A naive implementation of the program would require three passes through the input file: (1) A pass through the file to compute the word, character, and line counts. (2) A pass through the file to compute the CRC-16 checksum. (3) A final pass to copy the input file to the output file. However, we will be able to get by with only two passes through the input file because of the following feature of the CRC-checksum. Let $s_1$ be a string of bytes with the checksum $c_1$. Let $s_2$ be a string of bytes, and let $s$ be the concatenation of $s_1$ and $s_2$ in that order, and let $c$ be the checksum of $s$. Then we will be able to compute from $s_2$ two summary numbers $a_2$ and $c_2$ (where $c_2$ is the usual checksum for $s_2$, and $a_2$ is also an integer such that $0 \leq a_2 < 2^{16}$) such that $c$ can be quickly computed from $c_1$, $c_2$, and $a_2$. In this section of the program, we develop the ideas needed to verify the property of CRC checksums just stated, and write the code for the various functions needed to compute CRC checksums. @ For an excellent discussion of the ideas involved in the CRC checksum, we refer the reader to the paper {\sl A Tutorial in CRC Computations} by T.~V.~Ramabadran and S.~S.~Gaitonde which appeared in IEEE Micro vol. 8, no. 4, (August 1988), pp. 62--75. \def\Zt{{\bf Z}_2} $\Zt$ is the two element field. $\Zt[X]$ is the ring of polynomials in the variable $X$ with coefficients in the field $\Zt$. A crucial role in the CRC-16 algorithm is played by the polynomial $p(X) = X^{16} + X^{15} + X^2 + 1$. We let $R$ be the quotient ring obtained by dividing $\Zt[X]$ by the ideal generated by the polynomial $p$. Then $R$ has cardinality $2^{16}$. Clearly each element of $R$ lies in the same equivalence class as a unique polynomial of $\Zt[X]$ of degree less than $16$, and in this way, we identify $R$ with the set of polynomials of degree less than $16$. We associate to each integer $x$ such that $0 \leq x < 256$ a polynomial in $\Zt[X]$ of degree at most $7$ as follows. Let $a_7\ldots a_0$ be the binary expansion of $x$ (so $0 \leq a_i < 2$ for $0 \leq i < 7$ and $x = \sum_{i=0}^7 a_i 2^i$. Then the polynomial associated to $x$ is $\sum_{i=0}^7 a_i X^i$. @ Now consider a string of bytes $s_0\ldots s_r$. (Here the $s_i$'s may be viewed as integers in the range $0 \leq s_i < 256$. We wish to associate to this sequence an element of $R$ which will be its checksum. We do this as follows. Let $p_i$ be the polynomial associated to $s_i$ as discussed in the preceding paragraph. Let $p$ be the polynomial $\sum_{i=0}^r p_i X^{8(r-i)}$. There is a canonical homomorphism of $\Zt[X]$ onto $R$. The checksum of the sequence $s_0\ldots s_r$ is the image of $p$ in $R$. The following way of looking at $p$ may make its definition seem more natural. View the string of $r+1$ bytes $s_0\ldots s_r$ as a string of $8(r+1)$ bits $b_0 \ldots b_{8r+7}$ by replacing each byte $s_i$ by its binary expansion $s_{i,7} \ldots s_{i,0}$. To this string of bits we associate the polynomial $\sum_{i=0}^{8r+7} b_i X^{(8r + 7) - i}$. This allows the following inductive definition of the checksum. Let $t_0 = 0$. Let $t_{i+1}$ be the canonical image in $R$ of $X^8 \cdot t_i + p_{i+1}$. Then $t_{i+1}$ is easily checked to be the checksum of the sequence $s_0 \ldots s_i$. @ We can identify $R$ with the set of integers $x$ such that $0 \leq x < 2^{16}$ as follows. Let $r \in R$. Then $r$ is the image of a unique polynomial of degree at most 15 in $\Zt[X]$, say $\sum_{i=0}^{15} a_i X^i$. We identify $r$ with the integer $\sum_{i=0}^{15} a_i 2^i$. Suppose then that $r \in R$ and that $s$ is an unsigned byte. We wish to indicate how we will implement a function that will quickly compute the element of $R$ that corresponds to $X^8 \cdot r + s$. The first point to make is that via our identification of $R$ with unsigned integers in the range $0 \leq x < 2^{16}$, the operation of addition in $R$ corresponds to the bit-wise exclusive {\sl or\/} (rendered in C as |^|). Second, if we write a typical element $r$ in $R$ as a polynomial of degree less than $16$ in $\Zt[X]$, and then write this polynomial as $r_1 X^8 + r_2$ where $r_1$ and $r_2$ are polynomials of degree at most $7$ in $\Zt[X]$, then $r_1$ and $r_2$ correspond respectively to the high-order byte and low-order byte of the unsigned integer corresponding to $r$. We will precompute a table |crc_table[i]| (where $0 \leq i < 256$ such that if $p$ is the polynomial of degree at most $7$ corresponding to $i$, then $|crc_table|[i]$ gives the unsigned integer corresponding to the element of $R$ which is the homomorphic image of $X^{16}\cdot p$. In these terms, if we add the byte |new_byte| to the |old_checksum| wth high-order byte $r_1$ and low order byte $r_2$, the resulting element of $R$ will be $|crc_table|(r_1) + X^8\cdot r_2 + |new_byte|$. To complete the explanation of the code that follows we need merely note that the operation of multiplying by $X^8$ corresponds to a left shift by 8 bits. @ We now begin writing the code for the |add_a_byte_to_checksum| function. A key role is played by the following table. This table is computed in the program described in the CWeb file {\tt polynom.w} which is enclosed with this distribution. @= static CheckSum crc_table[] = { 0x0000, 0x8005, 0x800f, 0x000a, 0x801b, 0x001e, 0x0014, 0x8011, 0x8033, 0x0036, 0x003c, 0x8039, 0x0028, 0x802d, 0x8027, 0x0022, 0x8063, 0x0066, 0x006c, 0x8069, 0x0078, 0x807d, 0x8077, 0x0072, 0x0050, 0x8055, 0x805f, 0x005a, 0x804b, 0x004e, 0x0044, 0x8041, 0x80c3, 0x00c6, 0x00cc, 0x80c9, 0x00d8, 0x80dd, 0x80d7, 0x00d2, 0x00f0, 0x80f5, 0x80ff, 0x00fa, 0x80eb, 0x00ee, 0x00e4, 0x80e1, 0x00a0, 0x80a5, 0x80af, 0x00aa, 0x80bb, 0x00be, 0x00b4, 0x80b1, 0x8093, 0x0096, 0x009c, 0x8099, 0x0088, 0x808d, 0x8087, 0x0082, 0x8183, 0x0186, 0x018c, 0x8189, 0x0198, 0x819d, 0x8197, 0x0192, 0x01b0, 0x81b5, 0x81bf, 0x01ba, 0x81ab, 0x01ae, 0x01a4, 0x81a1, 0x01e0, 0x81e5, 0x81ef, 0x01ea, 0x81fb, 0x01fe, 0x01f4, 0x81f1, 0x81d3, 0x01d6, 0x01dc, 0x81d9, 0x01c8, 0x81cd, 0x81c7, 0x01c2, 0x0140, 0x8145, 0x814f, 0x014a, 0x815b, 0x015e, 0x0154, 0x8151, 0x8173, 0x0176, 0x017c, 0x8179, 0x0168, 0x816d, 0x8167, 0x0162, 0x8123, 0x0126, 0x012c, 0x8129, 0x0138, 0x813d, 0x8137, 0x0132, 0x0110, 0x8115, 0x811f, 0x011a, 0x810b, 0x010e, 0x0104, 0x8101, 0x8303, 0x0306, 0x030c, 0x8309, 0x0318, 0x831d, 0x8317, 0x0312, 0x0330, 0x8335, 0x833f, 0x033a, 0x832b, 0x032e, 0x0324, 0x8321, 0x0360, 0x8365, 0x836f, 0x036a, 0x837b, 0x037e, 0x0374, 0x8371, 0x8353, 0x0356, 0x035c, 0x8359, 0x0348, 0x834d, 0x8347, 0x0342, 0x03c0, 0x83c5, 0x83cf, 0x03ca, 0x83db, 0x03de, 0x03d4, 0x83d1, 0x83f3, 0x03f6, 0x03fc, 0x83f9, 0x03e8, 0x83ed, 0x83e7, 0x03e2, 0x83a3, 0x03a6, 0x03ac, 0x83a9, 0x03b8, 0x83bd, 0x83b7, 0x03b2, 0x0390, 0x8395, 0x839f, 0x039a, 0x838b, 0x038e, 0x0384, 0x8381, 0x0280, 0x8285, 0x828f, 0x028a, 0x829b, 0x029e, 0x0294, 0x8291, 0x82b3, 0x02b6, 0x02bc, 0x82b9, 0x02a8, 0x82ad, 0x82a7, 0x02a2, 0x82e3, 0x02e6, 0x02ec, 0x82e9, 0x02f8, 0x82fd, 0x82f7, 0x02f2, 0x02d0, 0x82d5, 0x82df, 0x02da, 0x82cb, 0x02ce, 0x02c4, 0x82c1, 0x8243, 0x0246, 0x024c, 0x8249, 0x0258, 0x825d, 0x8257, 0x0252, 0x0270, 0x8275, 0x827f, 0x027a, 0x826b, 0x026e, 0x0264, 0x8261, 0x0220, 0x8225, 0x822f, 0x022a, 0x823b, 0x023e, 0x0234, 0x8231, 0x8213, 0x0216, 0x021c, 0x8219, 0x0208, 0x820d, 0x8207, 0x0202, }; @ The following is the function definition for |add_a_byte_to_checksum|. @= CheckSum add_a_byte_to_checksum(new_byte,oldCheckSum) unsigned int new_byte; CheckSum oldCheckSum; { CheckSum u; u = (CheckSum)new_byte; oldCheckSum = oldCheckSum & 0xffff; u = ((oldCheckSum <<8) & 0xffff) ^ u; u = u ^ crc_table[(oldCheckSum >> 8) & 0xff]; return(u); } @ We next wish to indicate the trick that allows us to compute the checksum in one pass through the input file rather than two. First let $b$ be an unsigned char. Then $b$ determines a function $f_b: R \to R$ as follows: $f_b(r) = X^8\cdot r + b$. More generally let $a$ and $b$ be elements of $R$. Then these determine a map $g_{a,b}:R \to R$ via the stipulation $$g_{a,b}(x) = a\cdot x + b.$$ Let $G$ be the set of all maps from $R$ to $R$ obtained in this way (for various choices of $a$ and $b$.) Then clearly $f_b$ is of this form, and the collection of maps of this form is readily seen to be closed under composition. Let $g \in G$. Then $g$ is completely determined by the values of $g(0)$ and $g(1)$. Indeed, $g(0)=b$ and $g(1)= a + b$. Remembering that in $R$, $-1=1$, we see that $a = g(0) + g(1)$. @ Our strategy for determining the checksum in one pass should now be clear. Let $s_0,\ldots s_r$ be the sequence of bytes that correspond to the lines of the output file up to and including the critical line. Let $s_{r+1}\ldots s_t$ be the remaining bytes of the input file. These will determine a function $g \in G$ such that if $c$ is the checksum determined by $s_0\ldots s_r$, then $g(c)$ will be the desired final checksum. To compute $g$ it suffices to compute $g(0)$ and $g(1)$. These are the numbers we are computing in the variables |auxchksum| and |auxchksum2|. It remains to derive the formula for $g(c)$ in terms of $c$, $g(0)$, and $g(1)$. But this is easy. We know $g(c) = a \cdot c + b$ and $a= g(0) + g(1)$, $b = g(0)$. So $g(c) = c\cdot(g(0) + g(1)) + g(0)$. @ We need some code to multiply two polynomials in the ring $R$. Since there will be precisely one such multiplication in the course of a run of the program, we opt for straightforward coding rather than blinding speed. The first part of the code puts the $\Zt$ polynomial of degree at most $31$ which is the product of the two $\Zt$ polynomials corresponding to the inputs $u$ and $v$ in the array |w_array| . @= CheckSum MultiplyTwoPolys(u,v) CheckSum u,v;@/ { CheckSum r = 0; int u_array[SIZE]; int v_array[SIZE]; int w_array[SIZE+SIZE]; int i,j; CheckSum s[4]; for(i=0;i= for (i = 0; i < 4; i++) {s[i] = 0; for(j=0;j<8;j++) s[i] = (s[i]*2) + w_array[(8*i)+(7-j)];} @ Finally we have to reduce this polynomial to one of degree at most 15 that is its canonical image in $R$. The table |crc_table| has all the needed information to do this expeditiously. @= for(i=3;i>1;i--) {s[i-1] = s[i-1] ^ ((crc_table[s[i]] >> 8) & 0xff); s[i-2] = s[i-2] ^ ((crc_table[s[i]]) & 0xff);} r = (s[1]<<8)^s[0]; return(r); } @ The motivation for the following function is contained in the preceding discussion. @= CheckSum merge_three_checksums(s,t,u) CheckSum s,t,u; { CheckSum w; w = t ^ u; w = MultiplyTwoPolys(w,s); return(w^t); } @* Input and output. This section contains code concerned with opening and closing files. @ We next have to open the input file (and if |mode == COMPUTING| the output file). We will also need a pointer to a temporary file. We also will need a flag |input_from_pipe| which we will set to $1$ if the input is coming from stdin. @= FILE *input_file, *output_file, *temp_file; int input_from_pipe = 0; @ If the argument for the input file is missing or the string |"-"| is used, then the input is taken from |stdin|. Simarly, if the argument for the output file is missing or the string |"-"| is used, then the output is sent to |stdout|. @= if (argc == 0) {@}@/ else@# @t\quad@>@+ if (!(strcmp(argv[0],"-"))){argc--;argv++;{@}}@/ @t\quad@>@+else { if((input_file = fopen(argv[0],"r"))== NULL){@}@/ @t\quad\quad\quad\quad@>@+else{@+argc--;argv++;@+}@+} @ Now we open the output file. @= if ((argc == 0) || !(strcmp(argv[0],"-"))) output_file = stdout;@/ else {if((output_file = fopen(argv[0],"w"))== NULL)@}@/ @ We have to set the flag that tells us that the input is coming from |stdin|. In addition, if the mode is |COMPUTING|, we must open a temporary file. Finally, we have to make the |input_file| pointer equal to |stdin|. @= input_from_pipe = 1;@/ if(mode == COMPUTING) {if ((temp_file = tmpfile()) == NULL) {@}}@;@/ input_file = stdin; @ @= fclose(input_file); if (mode == COMPUTING) fclose(output_file); @* Error Messages. This section contains the various error messages we send when things go wrong. @ @= {status += usage_error; fprintf(stderr,"%s: Command line has incorrect format.\n", prog_name); fprintf(stderr,"%% checksum inputfile outputfile\n"); fprintf(stderr, " is the format for installing a checksum in a \ file.\n"); fprintf(stderr, "%% checksum -v inputfile\n"); fprintf(stderr," is the format for verifying an installed \ checksum.\n"); exit(EXIT_FAILURE);} @ @= {status += cannot_open_read_file; fprintf(stderr,"%s: cannot open input file %s\n", prog_name, *argv); exit(EXIT_FAILURE); } @ @= {status += cannot_open_temporary_file; fprintf(stderr,"%s: cannot open temporary file.\n",prog_name); exit(EXIT_FAILURE);} @ @= {status += cannot_open_write_file; fprintf(stderr,"%s: cannot open output file %s\n", prog_name, *argv); exit(EXIT_FAILURE); } @ @= {status += error_reading_input_file; fprintf(stderr, "%s: Error encountered while reading input file.\n", prog_name); exit(EXIT_FAILURE);} @ @= {status += usage_error; fprintf(stderr, "The input file did not have the correct format.\n"); fprintf(stderr, "It should contain a line which has the word \ \"checksum\"\n"); fprintf(stderr,"and no other alphabetic characters.\n"); exit(EXIT_FAILURE);}