#!/usr/bin/perl # # findhyph -- find words hyphenated by TeX in a document # # Usage: 1) set \tracingparagraphs=1 in TeX document and run: # 2) tex # 3) findhyph [-p] # # Options: -p # includes also informations about one-letter # prepositions and conjunctions at end of line # (useful for Slovak or Czech language) # # Output files: # List of hyphenated words. All punctuation characters are included # in this list. Page numbers in square brackets refer to first line # of a paragraph in which the word occurs. # List of prepositions if option -p is used. # # $Date: 2001/04/08 14:40:23 $ # $RCSfile: findhyph,v $ # $Revision: 1.4 $ # # Copyright (c) Martin Budaj 2000, 2001 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # any later version. die ("Usage: findhyph [-p] \n") unless $ARGV[0]; use Getopt::Std; getopts('p'); # find some prepositions at end of line $filename = $ARGV[0]; if ($filename =~ /\.(log|tex|dvi|ps|pdf)$/) { $filename = $`; } open(IN, "$filename.log") or die ("Can't read $filename.log: $!\n"); open(O1, ">$filename.hyph") or die ("Can't write $filename.hyph: $!\n"); if ($opt_p) { open(O2, ">$filename.prep") or die ("Can't write $filename.prep: $!\n"); } $search = 0; # 0 no search, 1 prepositions, 2 hyphens, 3 both $write_hyph_page = 0; $write_prep_page = 0; $act_text = ""; while() { chomp; if (/^\@firstpass$/) { if ($opt_p) { $search = 1; $act_break = 0; $act_text = ""; @USED = (); %BREAKS = (); } next; } if ($search && $_ eq "") { $max_break = $act_break; do_hyph(); $search = 0; next; } if (/^\@secondpass$/) { $search += 2; $act_break = 0; $act_text = ""; @USED = (); %BREAKS = (); next; } if ($search && /^@@(\d+)/) { $act_break = $1; /@@(\d+)$/; $BREAKS{$act_break}{'prev'} = $1; $act_text .= "\@$act_break\@"; next; } if ($search && /^@/) { next; } if ($search) { $act_text .= $_; } else { if (/\[(\d+)]/) { # page number $pageno = $1; if ($write_hyph_page) { print O1 "[$pageno]\n\n"; $write_hyph_page = 0; } if ($write_prep_page) { print O2 "[$pageno]\n\n"; $write_prep_page = 0; } } } } close(O1); close(O2) if $opt_p; sub do_hyph { $br = $max_break; while (1) { $br = $BREAKS{$br}{'prev'}; last if ($br == 0); $USED[$br] = 1; } for $i (1..$max_break-1) { if (not defined $USED[$i]) { $act_text =~ s/\@$i\@//; }; } for $i (1..$max_break-1) { if (defined $USED[$i]) { if ($search > 1) { # hyphenated words if ($act_text =~ /(\S+\@$i\@\S+)/) { # only hyphenated words $out_text = $1; # match this pattern $out_text =~ s/-//g; $out_text =~ s/\@\d+\@/-/g; $out_text =~ s/^-//; # very narrow columns $out_text =~ s/\\\w+$//; # font change after word print O1 "$out_text\n"; $write_hyph_page = 1; } } if ($search != 2) { # prepositions if ($act_text =~ / (\S) \@$i\@/) { $out_text = $1; if ($out_text =~ /[kKsSvVzZoOuUiIA]/) { # for Slovak and Czech print O2 "$out_text\n"; $write_prep_page = 1; } } } } } }