#!/usr/bin/perl
#
# ========================================================================
#  @perl-file{
#     author              = "Alan Jeffrey",
#     version             = "0.19",
#     date                = "15 January 1996",
#     time                = "12:26:55 GMT",
#     filename            = "texfaq2html",
#     address             = "School of Cognitive and Computing Sciences
#                            University of Sussex
#                            Brighton BN1 9QH
#                            UK",
#     FAX                 = "+44 1273 671320"
#     email               = "alanje@cogs.sussex.ac.uk",
#     codetable           = "ISO/ASCII",
#     keywords            = "LaTeX FAQ HTML",
#     supported           = "yes",
#     abstract            = "This perl script converts the UKTUG TeX FAQ
#                            LaTeX source document into HTML, on the fly."
#     package             = "stands alone",
#     dependencies        = "faqbody.tex, newfaq.aux, dirctan.tex,
#                            filectan,tex", 
#  }
# ========================================================================

# A script to provide a searchable WWW interface to the the UKTUG TeX
# FAQ file.
#
# The script takes parameters in the form of the QUERY_STRING environment 
# variable. 

# Copyright 1994 Alan Jeffrey

# 15 Sep 1994, v0.01: Created script.
#
# 16 Sep 1994, v0.02: Added the ability to download more then one
#    question at once.  Added some extra filters.
#
# 23 Sep 1994, v0.03: Updated to new markup syntax.
#
# 5 Oct 1994, v0.04: Updated to new markup syntax.
#
# 7 Oct 1994, v0.05: Added --, ---, \CTANref and \CTANlabel.
#
# 26 Oct 1994, v0.06: Added some more markup.  Read CTAN definitions
#    from separate files.  Read the Qrefs from the aux file.
#
# 28 Oct 1994, v0.07: Added \@ and the optional argument to \Qref.
#    Changed the banner.  
#
# 8 Nov 1994, v0.08: Added footnoteenv.   Changed the \Qref syntax to
#    match Robin's.  Added \ldots and \large.  Added \ProgName.  Added
#    \&. Moved \cs before \Qref so you can have \cs's inside \Qref's.
#    Renamed newfaq.tex to faqbody.tex.
#
# 25 Nov 1994, v0.09: Added \dots, $e$, \ISBN, \#, \ ,
#    \begin{comment}...\end{comment}. 
#    \cs now takes its argument between |...|.
#
# 2 Dec 1994, v0.10: Added \protect and proglist.
#
# 6 Dec 1994, v0.11: Allow \fileversion and \filedate on the same
#    line.
#
# 9 Mar 1995, v0.12: Made relative paths explicit for the new httpd.
#
# 15 Sep 1995, v0.13: Fixed a bug where I'd assumed that html-ignored
#    material wasn't nested (in fact the document has a footnote
#    inside an htmlignore).
#
# 23 Nov 1995, v0.14: Added \MP{}.
#
# 28 Nov 1995, v0.15: Added \TUGboat{} and \Package|...|.
#
# 2 Jan 1996, v0.16: Added \AllTeX{}, \acro{...} and \nothtml{...}.
#
# 15 Jan 1996, v0.17: Added \twee.
#
# 10 Jun 1997, v0.17a: for operation at Cambridge
#
# 18 Aug 1997, v0.18: alternative sites and formats
#
# 8 Oct 1997, v0.19: perl5 udpate, changed format of all file-name variables
#                    so don't contain directory name, so that reports
#                    of problems don't constitute a security hole
#
# 14 Sep 2000: version to run with betas of the faq


require "sanitize.pl";

# The site-specific stuff:

$href_script="http://www.tex.ac.uk/cgi-bin/texfaq2html";
$home = "$ENV{FAQ_HOME}" || "/anfs/www/VH-tex/faq-source";
$texfaq = "faqbody.tex";
$auxfaq = "newfaq.aux";
$ctandir = "dirctan.tex";
$ctanfiles = "filectan.tex";
$archive_list = "archive.list";

# defaults; these have to be allowed by the $archive_list file

$default_archive = "cam.ctan.org";
$default_format  = "tar.gz";

# table of symbols we believe in
%SymbolChar = (
	        92 => "\\",
	       123 => "\{",
	       125 => "\}"
);

# This script produces HTML:

print ("Content-type: text/html\n\n");

# Parse the arguments, and substitute hex(nn) for %nn:

$_=$ENV{QUERY_STRING};

while (/(\b\w*)\=([^\&]*)/g) {
	($key,$val)=($1,$2);
	$val =~ s/\+/ /g;
	$val =~ s/\%(\w\w)/sprintf("%c",hex($&))/eg;
	$key =~ s/^archive$/a/;
	$key =~ s/^format$/f/;
	$keys{$key} = $val;
}

$original_keyword = $keys{'keyword'};
$original_keyword =~ s/ /\+/g;
$keyword = $keys{'keyword'};
$keyword =~ s/\+/ /g;
$introduction = $keys{'introduction'};
$question = $keys{'question'};
$label = $keys{'label'};

if ( $arch = $keys{'a'} ) {
    $got_arch = 1;
    $xtra_k = '&a=' . $arch }
else {
    $arch = $default_archive }
if ( $fmt  = $keys{'f'} ) {
    $got_fmt = 1;
    $xtra_k = $xtra_k . '&f=' . $fmt }
else {
    $fmt = $default_format }

grep ($question{$_}=1,  split(/\s+/,$question));
grep ($label{"Q-$_"}=1, split(/\s+/,$label));

# By default, we convert LaTeX to HTML.

$converting = 1;
$ignoring = 0;
$sectioning = 0;

# two things used in conversion of \item[ ]

$itemset = "";
$enditemset = "";

# Get the list of CTAN directories:

open (CTANDIR, "$home/$ctandir")
	|| &oh_dear ("Couldn't open $ctandir");

while (<CTANDIR>) {
    if ( /\\CTANdirectory\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2.$fmt";
    } elsif ( /\\CTANdirectory\*\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2/";
    }
}

# Get the list of CTAN files

open (CTANFILES, "$home/$ctanfiles") 
	|| &oh_dear ("Couldn't open $ctanfiles");

while (<CTANFILES>) {

    if ( /\\CTANfile\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2";
    }
}

# Get the list of allowable archives

open (ARCHIVE_LIST, "$home/$archive_list")
    || &oh_dear ("Couldn't open $archive_list");

while (<ARCHIVE_LIST>) {
    chop;
    ($archive, $root_dir) = split(/\s+/, $_, 2);
    $archive_root{$archive} = $root_dir;
}
unless ( $root = $archive_root{$arch} ) {
    &oh_dear("Archive $arch isn't in my list") }

# Get the Qrefs:

open (AUXFAQ, "$home/$auxfaq") 
	|| &oh_dear ("Couldn't open $auxfaq");

$last_question = -1;
$labels = "";

while (<AUXFAQ>) {
    if (/\\newlabel\{([^\}]*)\}\{\{([^\}]*)\}/) {
	$last_question = $2;
	$this_Qlabel = $1;
	$this_label = $this_Qlabel;
	$this_label =~ s/^Q-//;
	$label_list{$last_question} = $this_label;
	$question{$last_question}=1 if $label{$this_Qlabel};
	if ($question{$last_question}) {
	    $qref{$this_Qlabel} = "#[$this_Qlabel]";
	    $labels .= "+" . $this_label;
	} else {
	    $qref{$this_Qlabel} =
		"$href_script?label=$this_label$xtra_k";
	}
    }
}

# $labels to hold a (list of) label names for these question(s)
$labels =~ s/^\+//;

# Open the FAQ file:

open (TEXFAQ, "$home/$texfaq")
	|| &oh_dear ("Couldn't open $texfaq");

# Run through to the introduction, grabbing useful info.

while (<TEXFAQ>) {
    last if /\\section\{Introduction\}/;
    $fileversion=$1 if /\\def\\faqfileversion\{([^\}]*)\}/;
    $filedate=$1 if /\\def\\faqfiledate\{([^\}]*)\}/;
}

# Print the title (and build up some information for the trailer):

$next_question = $previous_question = -1;

$title_modifier="";
if ($question || $label) {
    $question_count=0;
    foreach $key ( keys %question ) {
	$question_count++;
	$title_modifier=" -- question label \"$label_list{$key}\"";
	$next_question = $key + 1;
	if ( $key > 1 ) {
	    $previous_question = $key - 1;
	}
    }
    if ( $question_count > 1 ) {
	$title_modifier="";
	$next_question = $previous_question = -1;
	$url_name = "these questions";
    } else {
	$url_name = "this question";
    }
}

if ( $next_question > 0 &&
     $next_question > $last_question ) { $next_question = -1 };

printf ("
<html><head>
<title>TeX Frequently Asked Questions $title_modifier</title>
</head><body>
<h1 align=\"center\">Welcome to the UK List of<br>
                TeX Frequently Asked Questions<br>
                        on the Web</h1>
");

# Blast out the introduction until we get to the first section.

if ($introduction) {
    print "<h2>Introduction</h2>\n";
    while (<TEXFAQ>) {
	last if /\\section/;
	&sanitize_line;
	print;
    }
} else {
    $_ = "";
}

# Produce the form.

printf "
<h2>Searching</h2>
<form action=\"$href_script\" method=get>
The index of Frequently Asked Questions about TeX is searchable.<br>
Please enter your keyword here:";
if ( $got_arch ) {
    printf "\n<input type=hidden name=a value=\"$arch\">";
}
if ( $got_fmt ) {
    printf "\n<input type=hidden name=f value=\"$fmt\">";
}
printf "
<input type=\"text\" name=\"keyword\" value=\"$keyword\"><br>
then press here: <input type=\"submit\" value=\"search\">
</form>
<hr>
";

if ($question || $label) {
    do {
	if (s/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/$2/) {
	    $qnum++;
	    &sanitize_line;
	    if ($question{$qnum}) {
		print "<h2>";
		print "<a name=\"$1\">" if $1;
		print;
		print "</a>" if $1;
		print "</h2>\n";
	    }
	} elsif ($question{$qnum}) {
	    $printed = 1;
	    &sanitize_line;
	    print;
	}
    } while (<TEXFAQ>);
    print "<p>\n";
    if ( ! $printed ) {
	print "<h2>Warning</h2>\n";
	print "There is no question '$question'!<p>\n"
	    if $question;
	print "There is no question with label '$label'!<p>\n"
	    if $label;
    }
} else {
    if ($keyword) {
	$_=$keyword; 
	sanitize_line; 
	print "Questions matching the expression '";
	print;
	print "'";
    } else {
	$keyword = ".";
	$sectioning = 1;
    }
    print "<ul>";
    do {
	if ($sectioning && $converting) {
	    if (/^\s*\\section\{(.*)\}\s*$/ ) {
		$_=$1;
		&sanitize_line;
		print "</ul><h3>$_</h3><ul>\n";
		$_="";
	    } elsif ( /^\s*\\subsection\{(.*)\}\s*$/ ) {
		$_=$1;
		&sanitize_line;
		print "</ul><h4>$_</h4><ul>\n";
		$_="";
	    }
	}
	&sanitize_line;
	if (/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/) {
	    $qnum++;
	    $entry = "<li><a href=\"$href_script?label=$label_list{$qnum}$xtra_k\">$2</a>\n";
	}
	if (/$keyword/ && $qnum) {
	    print $entry;
	    $entry = "";
	    $matches{$qnum} = 1;
	}
    } while (<TEXFAQ>);
    print "</ul>";
    @matches=sort(keys(%matches));
    if ($#matches == -1) {
	print "<em>There were no matches.</em><p>\n";
    } elsif ($#matches == 0) {
	print "There was one match.<p>\n";
    } else {
	$stupid_perl = @matches;
	print "There were $stupid_perl matches. ",
	    "You can get all of them by pressing ",
	    "<a href=\"$href_script?question=",
	    (join("+",@matches)),$xtra_k,
	    "\">here</a>.<p>\n";
    }
}

# An error report:

sub oh_dear {
    print "<i>This shouldn\'t happen!</i><p>\n",
          $_[0],
	  "\n<p>Please report this!\n";
    die $_;
}

# print links to other questions


# Print a tail.
    
printf("
<hr><address>
   Maintenance of the
   <a href=\"http://www.tex.ac.uk/faq\">TeX FAQ</a>
   is coordinated by Robin Fairbairns.<p>
");
if ( $next_question >0 && $previous_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$previous_question}\">\
           previous question</a>, or
           <a href=\"$href_script?label=$label_list{$next_question}\">\
           next question</a>";
}
elsif ( $next_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\
           next question</a>";
}
elsif ( $previous_question > 0 ) {
    print "Go to <a href=\"$href_script?label=$label_list{$next_question}\">\
           previous question</a>";
}
print "<p>\n";

if ( $question || $label ) {
    print "URL for $url_name: $href_script?label=$labels<p>\n";
}

printf("
   Comments, suggestions, or error reports? - see
   \"<a href=\"$href_script?label=noans+newans\">how to improve the FAQ</a>\".
<p>
   This is FAQ version $fileversion, last modified on $filedate.
</address>
</body></html>
");

# That's it!