#!/usr/bin/perl # # ======================================================================== # @perl-file{ # author = "Alan Jeffrey", # version = "0.19", # date = "15 January 1996", # time = "12:26:55 GMT", # filename = "texfaq2html", # address = "School of Cognitive and Computing Sciences # University of Sussex # Brighton BN1 9QH # UK", # FAX = "+44 1273 671320" # email = "alanje@cogs.sussex.ac.uk", # codetable = "ISO/ASCII", # keywords = "LaTeX FAQ HTML", # supported = "yes", # abstract = "This perl script converts the UKTUG TeX FAQ # LaTeX source document into HTML, on the fly." # package = "stands alone", # dependencies = "faqbody.tex, newfaq.aux, dirctan.tex, # filectan,tex", # } # ======================================================================== # A script to provide a searchable WWW interface to the the UKTUG TeX # FAQ file. # # The script takes parameters in the form of the QUERY_STRING environment # variable. # Copyright 1994 Alan Jeffrey # 15 Sep 1994, v0.01: Created script. # # 16 Sep 1994, v0.02: Added the ability to download more then one # question at once. Added some extra filters. # # 23 Sep 1994, v0.03: Updated to new markup syntax. # # 5 Oct 1994, v0.04: Updated to new markup syntax. # # 7 Oct 1994, v0.05: Added --, ---, \CTANref and \CTANlabel. # # 26 Oct 1994, v0.06: Added some more markup. Read CTAN definitions # from separate files. Read the Qrefs from the aux file. # # 28 Oct 1994, v0.07: Added \@ and the optional argument to \Qref. # Changed the banner. # # 8 Nov 1994, v0.08: Added footnoteenv. Changed the \Qref syntax to # match Robin's. Added \ldots and \large. Added \ProgName. Added # \&. Moved \cs before \Qref so you can have \cs's inside \Qref's. # Renamed newfaq.tex to faqbody.tex. # # 25 Nov 1994, v0.09: Added \dots, $e$, \ISBN, \#, \ , # \begin{comment}...\end{comment}. # \cs now takes its argument between |...|. # # 2 Dec 1994, v0.10: Added \protect and proglist. # # 6 Dec 1994, v0.11: Allow \fileversion and \filedate on the same # line. # # 9 Mar 1995, v0.12: Made relative paths explicit for the new httpd. # # 15 Sep 1995, v0.13: Fixed a bug where I'd assumed that html-ignored # material wasn't nested (in fact the document has a footnote # inside an htmlignore). # # 23 Nov 1995, v0.14: Added \MP{}. # # 28 Nov 1995, v0.15: Added \TUGboat{} and \Package|...|. # # 2 Jan 1996, v0.16: Added \AllTeX{}, \acro{...} and \nothtml{...}. # # 15 Jan 1996, v0.17: Added \twee. # # 10 Jun 1997, v0.17a: for operation at Cambridge # # 18 Aug 1997, v0.18: alternative sites and formats # # 8 Oct 1997, v0.19: perl5 udpate, changed format of all file-name variables # so don't contain directory name, so that reports # of problems don't constitute a security hole # # 14 Sep 2000: version to run with betas of the faq require "sanitize.pl"; # The site-specific stuff: $href_script="http://www.tex.ac.uk/cgi-bin/texfaq2html"; $home = "$ENV{FAQ_HOME}" || "/anfs/www/VH-tex/faq-source"; $texfaq = "faqbody.tex"; $auxfaq = "newfaq.aux"; $ctandir = "dirctan.tex"; $ctanfiles = "filectan.tex"; $archive_list = "archive.list"; # defaults; these have to be allowed by the $archive_list file $default_archive = "cam.ctan.org"; $default_format = "tar.gz"; # table of symbols we believe in %SymbolChar = ( 92 => "\\", 123 => "\{", 125 => "\}" ); # This script produces HTML: print ("Content-type: text/html\n\n"); # Parse the arguments, and substitute hex(nn) for %nn: $_=$ENV{QUERY_STRING}; while (/(\b\w*)\=([^\&]*)/g) { ($key,$val)=($1,$2); $val =~ s/\+/ /g; $val =~ s/\%(\w\w)/sprintf("%c",hex($&))/eg; $key =~ s/^archive$/a/; $key =~ s/^format$/f/; $keys{$key} = $val; } $original_keyword = $keys{'keyword'}; $original_keyword =~ s/ /\+/g; $keyword = $keys{'keyword'}; $keyword =~ s/\+/ /g; $introduction = $keys{'introduction'}; $question = $keys{'question'}; $label = $keys{'label'}; if ( $arch = $keys{'a'} ) { $got_arch = 1; $xtra_k = '&a=' . $arch } else { $arch = $default_archive } if ( $fmt = $keys{'f'} ) { $got_fmt = 1; $xtra_k = $xtra_k . '&f=' . $fmt } else { $fmt = $default_format } grep ($question{$_}=1, split(/\s+/,$question)); grep ($label{"Q-$_"}=1, split(/\s+/,$label)); # By default, we convert LaTeX to HTML. $converting = 1; $ignoring = 0; $sectioning = 0; # two things used in conversion of \item[ ] $itemset = ""; $enditemset = ""; # Get the list of CTAN directories: open (CTANDIR, "$home/$ctandir") || &oh_dear ("Couldn't open $ctandir"); while (<CTANDIR>) { if ( /\\CTANdirectory\{([^\}]*)\}\{([^\}]*)\}/ ) { $ctanref{$1} = "$2.$fmt"; } elsif ( /\\CTANdirectory\*\{([^\}]*)\}\{([^\}]*)\}/ ) { $ctanref{$1} = "$2/"; } } # Get the list of CTAN files open (CTANFILES, "$home/$ctanfiles") || &oh_dear ("Couldn't open $ctanfiles"); while (<CTANFILES>) { if ( /\\CTANfile\{([^\}]*)\}\{([^\}]*)\}/ ) { $ctanref{$1} = "$2"; } } # Get the list of allowable archives open (ARCHIVE_LIST, "$home/$archive_list") || &oh_dear ("Couldn't open $archive_list"); while (<ARCHIVE_LIST>) { chop; ($archive, $root_dir) = split(/\s+/, $_, 2); $archive_root{$archive} = $root_dir; } unless ( $root = $archive_root{$arch} ) { &oh_dear("Archive $arch isn't in my list") } # Get the Qrefs: open (AUXFAQ, "$home/$auxfaq") || &oh_dear ("Couldn't open $auxfaq"); $last_question = -1; $labels = ""; while (<AUXFAQ>) { if (/\\newlabel\{([^\}]*)\}\{\{([^\}]*)\}/) { $last_question = $2; $this_Qlabel = $1; $this_label = $this_Qlabel; $this_label =~ s/^Q-//; $label_list{$last_question} = $this_label; $question{$last_question}=1 if $label{$this_Qlabel}; if ($question{$last_question}) { $qref{$this_Qlabel} = "#[$this_Qlabel]"; $labels .= "+" . $this_label; } else { $qref{$this_Qlabel} = "$href_script?label=$this_label$xtra_k"; } } } # $labels to hold a (list of) label names for these question(s) $labels =~ s/^\+//; # Open the FAQ file: open (TEXFAQ, "$home/$texfaq") || &oh_dear ("Couldn't open $texfaq"); # Run through to the introduction, grabbing useful info. while (<TEXFAQ>) { last if /\\section\{Introduction\}/; $fileversion=$1 if /\\def\\faqfileversion\{([^\}]*)\}/; $filedate=$1 if /\\def\\faqfiledate\{([^\}]*)\}/; } # Print the title (and build up some information for the trailer): $next_question = $previous_question = -1; $title_modifier=""; if ($question || $label) { $question_count=0; foreach $key ( keys %question ) { $question_count++; $title_modifier=" -- question label \"$label_list{$key}\""; $next_question = $key + 1; if ( $key > 1 ) { $previous_question = $key - 1; } } if ( $question_count > 1 ) { $title_modifier=""; $next_question = $previous_question = -1; $url_name = "these questions"; } else { $url_name = "this question"; } } if ( $next_question > 0 && $next_question > $last_question ) { $next_question = -1 }; printf (" <html><head> <title>TeX Frequently Asked Questions $title_modifier</title> </head><body> <h1 align=\"center\">Welcome to the UK List of<br> TeX Frequently Asked Questions<br> on the Web</h1> "); # Blast out the introduction until we get to the first section. if ($introduction) { print "<h2>Introduction</h2>\n"; while (<TEXFAQ>) { last if /\\section/; &sanitize_line; print; } } else { $_ = ""; } # Produce the form. printf " <h2>Searching</h2> <form action=\"$href_script\" method=get> The index of Frequently Asked Questions about TeX is searchable.<br> Please enter your keyword here:"; if ( $got_arch ) { printf "\n<input type=hidden name=a value=\"$arch\">"; } if ( $got_fmt ) { printf "\n<input type=hidden name=f value=\"$fmt\">"; } printf " <input type=\"text\" name=\"keyword\" value=\"$keyword\"><br> then press here: <input type=\"submit\" value=\"search\"> </form> <hr> "; if ($question || $label) { do { if (s/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/$2/) { $qnum++; &sanitize_line; if ($question{$qnum}) { print "<h2>"; print "<a name=\"$1\">" if $1; print; print "</a>" if $1; print "</h2>\n"; } } elsif ($question{$qnum}) { $printed = 1; &sanitize_line; print; } } while (<TEXFAQ>); print "<p>\n"; if ( ! $printed ) { print "<h2>Warning</h2>\n"; print "There is no question '$question'!<p>\n" if $question; print "There is no question with label '$label'!<p>\n" if $label; } } else { if ($keyword) { $_=$keyword; sanitize_line; print "Questions matching the expression '"; print; print "'"; } else { $keyword = "."; $sectioning = 1; } print "<ul>"; do { if ($sectioning && $converting) { if (/^\s*\\section\{(.*)\}\s*$/ ) { $_=$1; &sanitize_line; print "</ul><h3>$_</h3><ul>\n"; $_=""; } elsif ( /^\s*\\subsection\{(.*)\}\s*$/ ) { $_=$1; &sanitize_line; print "</ul><h4>$_</h4><ul>\n"; $_=""; } } &sanitize_line; if (/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/) { $qnum++; $entry = "<li><a href=\"$href_script?label=$label_list{$qnum}$xtra_k\">$2</a>\n"; } if (/$keyword/ && $qnum) { print $entry; $entry = ""; $matches{$qnum} = 1; } } while (<TEXFAQ>); print "</ul>"; @matches=sort(keys(%matches)); if ($#matches == -1) { print "<em>There were no matches.</em><p>\n"; } elsif ($#matches == 0) { print "There was one match.<p>\n"; } else { $stupid_perl = @matches; print "There were $stupid_perl matches. ", "You can get all of them by pressing ", "<a href=\"$href_script?question=", (join("+",@matches)),$xtra_k, "\">here</a>.<p>\n"; } } # An error report: sub oh_dear { print "<i>This shouldn\'t happen!</i><p>\n", $_[0], "\n<p>Please report this!\n"; die $_; } # print links to other questions # Print a tail. printf(" <hr><address> Maintenance of the <a href=\"http://www.tex.ac.uk/faq\">TeX FAQ</a> is coordinated by Robin Fairbairns.<p> "); if ( $next_question >0 && $previous_question > 0 ) { print "Go to <a href=\"$href_script?label=$label_list{$previous_question}\">\ previous question</a>, or <a href=\"$href_script?label=$label_list{$next_question}\">\ next question</a>"; } elsif ( $next_question > 0 ) { print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\ next question</a>"; } elsif ( $previous_question > 0 ) { print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\ previous question</a>"; } print "<p>\n"; if ( $question || $label ) { print "URL for $url_name: $href_script?label=$labels<p>\n"; } printf(" Comments, suggestions, or error reports? - see \"<a href=\"$href_script?label=noans+newans\">how to improve the FAQ</a>\". <p> This is FAQ version $fileversion, last modified on $filedate. </address> </body></html> "); # That's it!