# A script to provide a searchable WWW interface to the the UKTUG TeX
# FAQ file.
# The script takes parameters in the form of the QUERY_STRING environment 
# variable. 

# Copyright 1994 Alan Jeffrey

# 15 Sep 1994, v0.01: Created script.
# 16 Sep 1994, v0.02: Added the ability to download more then one
#    question at once.  Added some extra filters.
# 23 Sep 1994, v0.03: Updated to new markup syntax.
# 5 Oct 1994, v0.04: Updated to new markup syntax.
# 7 Oct 1994, v0.05: Added --, ---, \CTANref and \CTANlabel.
# 26 Oct 1994, v0.06: Added some more markup.  Read CTAN definitions
#    from separate files.  Read the Qrefs from the aux file.
# 28 Oct 1994, v0.07: Added \@ and the optional argument to \Qref.
#    Changed the banner.  
# 8 Nov 1994, v0.08: Added footnoteenv.   Changed the \Qref syntax to
#    match Robin's.  Added \ldots and \large.  Added \ProgName.  Added
#    \&. Moved \cs before \Qref so you can have \cs's inside \Qref's.
#    Renamed newfaq.tex to faqbody.tex.
# 25 Nov 1994, v0.09: Added \dots, $e$, \ISBN, \#, \ ,
#    \begin{comment}...\end{comment}. 
#    \cs now takes its argument between |...|.
# 2 Dec 1994, v0.10: Added \protect and proglist.
# 6 Dec 1994, v0.11: Allow \fileversion and \filedate on the same
#    line.
# 9 Mar 1995, v0.12: Made relative paths explicit for the new httpd.
# 15 Sep 1995, v0.13: Fixed a bug where I'd assumed that html-ignored
#    material wasn't nested (in fact the document has a footnote
#    inside an htmlignore).
# 23 Nov 1995, v0.14: Added \MP{}.
# 28 Nov 1995, v0.15: Added \TUGboat{} and \Package|...|.
# 2 Jan 1996, v0.16: Added \AllTeX{}, \acro{...} and \nothtml{...}.
# 15 Jan 1996, v0.17: Added \twee.
# 10 Jun 1997, v0.17a: for operation at Cambridge
# 18 Aug 1997, v0.18: alternative sites and formats
# 8 Oct 1997, v0.19: perl5 udpate, changed format of all file-name variables
#                    so don't contain directory name, so that reports
#                    of problems don't constitute a security hole
# 14 Sep 2000: version to run with betas of the faq

require "sanitize.pl";

# The site-specific stuff:

$home = "$ENV{FAQ_HOME}" || "/anfs/www/VH-tex/faq-source";
$texfaq = "faqbody.tex";
$auxfaq = "newfaq.aux";
$ctandir = "dirctan.tex";
$ctanfiles = "filectan.tex";
$archive_list = "archive.list";

# defaults; these have to be allowed by the $archive_list file

$default_archive = "cam.ctan.org";
$default_format  = "tar.gz";

# table of symbols we believe in
%SymbolChar = (
	        92 => "\\",
	       123 => "\{",
	       125 => "\}"

# This script produces HTML:

print ("Content-type: text/html\n\n");

# Parse the arguments, and substitute hex(nn) for %nn:


while (/(\b\w*)\=([^\&]*)/g) {
	$val =~ s/\+/ /g;
	$val =~ s/\%(\w\w)/sprintf("%c",hex($&))/eg;
	$key =~ s/^archive$/a/;
	$key =~ s/^format$/f/;
	$keys{$key} = $val;

$original_keyword = $keys{'keyword'};
$original_keyword =~ s/ /\+/g;
$keyword = $keys{'keyword'};
$keyword =~ s/\+/ /g;
$introduction = $keys{'introduction'};
$question = $keys{'question'};
$label = $keys{'label'};

if ( $arch = $keys{'a'} ) {
    $got_arch = 1;
    $xtra_k = '&a=' . $arch }
else {
    $arch = $default_archive }
if ( $fmt  = $keys{'f'} ) {
    $got_fmt = 1;
    $xtra_k = $xtra_k . '&f=' . $fmt }
else {
    $fmt = $default_format }

grep ($question{$_}=1,  split(/\s+/,$question));
grep ($label{"Q-$_"}=1, split(/\s+/,$label));

# By default, we convert LaTeX to HTML.

$converting = 1;
$ignoring = 0;
$sectioning = 0;

# two things used in conversion of \item[ ]

$itemset = "";
$enditemset = "";

# Get the list of CTAN directories:

open (CTANDIR, "$home/$ctandir")
	|| &oh_dear ("Couldn't open $ctandir");

while (<CTANDIR>) {
    if ( /\\CTANdirectory\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2.$fmt";
    } elsif ( /\\CTANdirectory\*\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2/";

# Get the list of CTAN files

open (CTANFILES, "$home/$ctanfiles") 
	|| &oh_dear ("Couldn't open $ctanfiles");

while (<CTANFILES>) {

    if ( /\\CTANfile\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2";

# Get the list of allowable archives

open (ARCHIVE_LIST, "$home/$archive_list")
    || &oh_dear ("Couldn't open $archive_list");

while (<ARCHIVE_LIST>) {
    ($archive, $root_dir) = split(/\s+/, $_, 2);
    $archive_root{$archive} = $root_dir;
unless ( $root = $archive_root{$arch} ) {
    &oh_dear("Archive $arch isn't in my list") }

# Get the Qrefs:

open (AUXFAQ, "$home/$auxfaq") 
	|| &oh_dear ("Couldn't open $auxfaq");

$last_question = -1;
$labels = "";

while (<AUXFAQ>) {
    if (/\\newlabel\{([^\}]*)\}\{\{([^\}]*)\}/) {
	$last_question = $2;
	$this_Qlabel = $1;
	$this_label = $this_Qlabel;
	$this_label =~ s/^Q-//;
	$label_list{$last_question} = $this_label;
	$question{$last_question}=1 if $label{$this_Qlabel};
	if ($question{$last_question}) {
	    $qref{$this_Qlabel} = "#[$this_Qlabel]";
	    $labels .= "+" . $this_label;
	} else {
	    $qref{$this_Qlabel} =

# $labels to hold a (list of) label names for these question(s)
$labels =~ s/^\+//;

# Open the FAQ file:

open (TEXFAQ, "$home/$texfaq")
	|| &oh_dear ("Couldn't open $texfaq");

# Run through to the introduction, grabbing useful info.

while (<TEXFAQ>) {
    last if /\\section\{Introduction\}/;
    $fileversion=$1 if /\\def\\faqfileversion\{([^\}]*)\}/;
    $filedate=$1 if /\\def\\faqfiledate\{([^\}]*)\}/;

# Print the title (and build up some information for the trailer):

$next_question = $previous_question = -1;

if ($question || $label) {
    foreach $key ( keys %question ) {
	$title_modifier=" -- question label \"$label_list{$key}\"";
	$next_question = $key + 1;
	if ( $key > 1 ) {
	    $previous_question = $key - 1;
    if ( $question_count > 1 ) {
	$next_question = $previous_question = -1;
	$url_name = "these questions";
    } else {
	$url_name = "this question";

if ( $next_question > 0 &&
     $next_question > $last_question ) { $next_question = -1 };

printf ("
<title>TeX Frequently Asked Questions $title_modifier</title>
<h1 align=\"center\">Welcome to the UK List of<br>
                TeX Frequently Asked Questions<br>
                        on the Web</h1>

# Blast out the introduction until we get to the first section.

if ($introduction) {
    print "<h2>Introduction</h2>\n";
    while (<TEXFAQ>) {
	last if /\\section/;
} else {
    $_ = "";

# Produce the form.

printf "
<form action=\"$href_script\" method=get>
The index of Frequently Asked Questions about TeX is searchable.<br>
Please enter your keyword here:";
if ( $got_arch ) {
    printf "\n<input type=hidden name=a value=\"$arch\">";
if ( $got_fmt ) {
    printf "\n<input type=hidden name=f value=\"$fmt\">";
printf "
<input type=\"text\" name=\"keyword\" value=\"$keyword\"><br>
then press here: <input type=\"submit\" value=\"search\">

if ($question || $label) {
    do {
	if (s/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/$2/) {
	    if ($question{$qnum}) {
		print "<h2>";
		print "<a name=\"$1\">" if $1;
		print "</a>" if $1;
		print "</h2>\n";
	} elsif ($question{$qnum}) {
	    $printed = 1;
    } while (<TEXFAQ>);
    print "<p>\n";
    if ( ! $printed ) {
	print "<h2>Warning</h2>\n";
	print "There is no question '$question'!<p>\n"
	    if $question;
	print "There is no question with label '$label'!<p>\n"
	    if $label;
} else {
    if ($keyword) {
	print "Questions matching the expression '";
	print "'";
    } else {
	$keyword = ".";
	$sectioning = 1;
    print "<ul>";
    do {
	if ($sectioning && $converting) {
	    if (/^\s*\\section\{(.*)\}\s*$/ ) {
		print "</ul><h3>$_</h3><ul>\n";
	    } elsif ( /^\s*\\subsection\{(.*)\}\s*$/ ) {
		print "</ul><h4>$_</h4><ul>\n";
	if (/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/) {
	    $entry = "<li><a href=\"$href_script?label=$label_list{$qnum}$xtra_k\">$2</a>\n";
	if (/$keyword/ && $qnum) {
	    print $entry;
	    $entry = "";
	    $matches{$qnum} = 1;
    } while (<TEXFAQ>);
    print "</ul>";
    if ($#matches == -1) {
	print "<em>There were no matches.</em><p>\n";
    } elsif ($#matches == 0) {
	print "There was one match.<p>\n";
    } else {
	$stupid_perl = @matches;
	print "There were $stupid_perl matches. ",
	    "You can get all of them by pressing ",
	    "<a href=\"$href_script?question=",

# An error report:

sub oh_dear {
    print "<i>This shouldn\'t happen!</i><p>\n",
	  "\n<p>Please report this!\n";
    die $_;

# print links to other questions

# Print a tail.
   Maintenance of the
   <a href=\"http://www.tex.ac.uk/faq\">TeX FAQ</a>
   is coordinated by Robin Fairbairns.<p>
if ( $next_question >0 && $previous_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$previous_question}\">\
           previous question</a>, or
           <a href=\"$href_script?label=$label_list{$next_question}\">\
           next question</a>";
elsif ( $next_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\
           next question</a>";
elsif ( $previous_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\
           previous question</a>";
print "<p>\n";

if ( $question || $label ) {
    print "URL for $url_name: $href_script?label=$labels<p>\n";

   Comments, suggestions, or error reports? - see
   \"<a href=\"$href_script?label=noans+newans\">how to improve the FAQ</a>\".
   This is FAQ version $fileversion, last modified on $filedate.

# That's it!