bird/tools/linuxdoc-tools/LinuxDocTools.pm

669 lines
22 KiB
Perl
Raw Normal View History

#! /usr/bin/perl
#
# LinuxDocTools.pm
#
# LinuxDoc-Tools driver core. This contains all the basic functionality
# we need to control all other components.
#
# Copyright <20> 1996, Cees de Groot.
# Copyright <20> 2000, Taketoshi Sano
# Copyright <20> 2006-2018, Agustin Martin
# --------------------------------------------------------------------------------
package LinuxDocTools;
require 5.006;
use strict;
=head1 NAME
LinuxDocTools - SGML conversion utilities for LinuxDoc DTD.
=head1 SYNOPSIS
use LinuxDocTools;
LinuxDocTools::init;
@files = LinuxDocTools::process_options ($0, @ARGV);
for $curfile (@files) {
LinuxDocTools::process_file ($curfile);
}
=head1 DESCRIPTION
The LinuxDocTools package encapsulates all the functionality offered by
LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools;
but the encapsulation should provide for a simple interface for other users as well.
=head1 FUNCTIONS
=over 4
=cut
use File::Copy;
use File::Temp qw(tempdir);
use File::Basename qw(fileparse);
use LinuxDocTools::Lang;
use LinuxDocTools::Utils qw(usage cleanup trap_signals remove_tmpfiles create_temp);
use LinuxDocTools::Vars;
sub BEGIN
{
#
# Make sure we're always looking here. Note that "use lib" adds
# on the front of the search path, so we first push dist, then
# site, so that site is searched first.
#
use lib "$main::DataDir/dist";
use lib "$main::DataDir/site";
}
# -----------------------------------------------------------------------------------
sub ldt_searchfile {
# -----------------------------------------------------------------------------------
# Look for a readable file in the locations. Return first math.
# -----------------------------------------------------------------------------------
my $files = shift;
foreach my $file ( @$files ){
return $file if -r $file;
}
}
# -----------------------------------------------------------------------------------
sub ldt_getdtd_v1 {
# -----------------------------------------------------------------------------------
# Get the dtd
# -----------------------------------------------------------------------------------
my $file = shift;
my $error_header = "LinuxdocTools::ldt_getdtd_v1";
my $dtd;
open ( my $FILE, "< $file")
or die "$error_header: Could not open \"$file\" for reading. Aborting ...\n";
while ( <$FILE> ) {
tr/A-Z/a-z/;
# check for [<!doctype ... system] type definition
if ( /<!doctype\s*(\w*)\s*system/ ) {
$dtd = $1;
last;
# check for <!doctype ... PUBLIC ... DTD ...
} elsif ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi ) {
$dtd = $1;
last;
# check for <!doctype ...
# PUBLIC ... DTD ...
# (multi-line version)
} elsif ( /<!doctype\s*(\w*)/ ) {
$dtd = "precheck";
next;
} elsif ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" ) {
$dtd = $1;
last;
}
}
close $FILE;
return $dtd;
}
# -----------------------------------------------------------------------------------
sub ldt_getdtd_v2 {
# -----------------------------------------------------------------------------------
# Second way of getting dtd, fron nsgmls output.
# -----------------------------------------------------------------------------------
my $preaspout = shift;
my $error_header = "LinuxdocTools::ldt_getdtd_v2";
my $dtd2;
open (my $TMP,"< $preaspout")
or die "%error_header: Could not open $preaspout for reading. Aborting ...\n";
while ( defined ($dtd2 = <$TMP>) && ! ( $dtd2 =~ /^\(/) ) { };
close $TMP;
$dtd2 =~ s/^\(//;
$dtd2 =~ tr/A-Z/a-z/;
chomp $dtd2;
return $dtd2;
}
# -----------------------------------------------------------------------------------
sub ldt_latin1tosgml {
# -----------------------------------------------------------------------------------
# Convert latin1 chars in input filehandle to sgml entities in the returned string
# -----------------------------------------------------------------------------------
my $FILE = shift;
my $sgmlout;
while (<$FILE>){
# Outline these commands later on - CdG
#change latin1 characters to SGML
#by Farzad Farid, adapted by Greg Hankins
s/<2F>/\&Agrave;/g;
s/<2F>/\&Aacute;/g;
s/<2F>/\&Acirc;/g;
s/<2F>/\&Atilde;/g;
s/<2F>/\&Auml;/g;
s/<2F>/\&Aring;/g;
s/<2F>/\&AElig;/g;
s/<2F>/\&Ccedil;/g;
s/<2F>/\&Egrave;/g;
s/<2F>/\&Eacute;/g;
s/<2F>/\&Ecirc;/g;
s/<2F>/\&Euml;/g;
s/<2F>/\&Igrave;/g;
s/<2F>/\&Iacute;/g;
s/<2F>/\&Icirc;/g;
s/<2F>/\&Iuml;/g;
s/<2F>/\&Ntilde;/g;
s/<2F>/\&Ograve;/g;
s/<2F>/\&Oacute;/g;
s/<2F>/\&Ocirc;/g;
s/<2F>/\&Otilde;/g;
s/<2F>/\&Ouml;/g;
s/<2F>/\&Oslash;/g;
s/<2F>/\&Ugrave;/g;
s/<2F>/\&Uacute;/g;
s/<2F>/\&Ucirc;/g;
s/<2F>/\&Uuml;/g;
s/<2F>/\&Yacute;/g;
s/<2F>/\&THORN;/g;
s/<2F>/\&szlig;/g;
s/<2F>/\&agrave;/g;
s/<2F>/\&aacute;/g;
s/<2F>/\&acirc;/g;
s/<2F>/\&atilde;/g;
s/<2F>/\&auml;/g;
s/<2F>/\&aring;/g;
s/<2F>/\&aelig;/g;
s/<2F>/\&ccedil;/g;
s/<2F>/\&egrave;/g;
s/<2F>/\&eacute;/g;
s/<2F>/\&ecirc;/g;
s/<2F>/\&euml;/g;
s/<2F>/\&igrave;/g;
s/<2F>/\&iacute;/g;
s/<2F>/\&icirc;/g;
s/<2F>/\&iuml;/g;
s/<2F>/\&mu;/g;
s/<2F>/\&eth;/g;
s/<2F>/\&ntilde;/g;
s/<2F>/\&ograve;/g;
s/<2F>/\&oacute;/g;
s/<2F>/\&ocirc;/g;
s/<2F>/\&otilde;/g;
s/<2F>/\&ouml;/g;
s/<2F>/\&oslash;/g;
s/<2F>/\&ugrave;/g;
s/<2F>/\&uacute;/g;
s/<2F>/\&ucirc;/g;
s/<2F>/\&uuml;/g;
s/<2F>/\&yacute;/g;
s/<2F>/\&thorn;/g;
s/<2F>/\&yuml;/g;
$sgmlout .= $_;
}
return $sgmlout;
}
# ------------------------------------------------------------------------
=item LinuxDocTools::init
Takes care of initialization of package-global variables (which are actually
defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>,
a reference to a hash containing numerous settings, I<%Formats>, a hash
containing all the formats, and I<%FmtList>, a hash containing the currently
active formats for help texts.
Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local
formatting backends and C<require>s them.
=cut
# -----------------------------------------------------------------------------------
sub init {
# -----------------------------------------------------------------------------------
trap_signals;
# Register the ``global'' pseudoformat. Apart from the global settings, we
# also use $global to keep the global variable name space clean everything
# that we need to provide to other modules is stuffed into $global.
$global = {};
$global->{NAME} = "global";
$global->{HELP} = "";
$global->{OPTIONS} = [
{ option => "backend",
type => "l",
'values' => [ "html", "info", "latex", "lyx", "rtf", "txt", "check" ],
short => "B" },
{ option => "papersize",
type => "l",
'values' => [ "a4", "letter" ],
short => "p" },
{ option => "language",
type => "l",
'values' => [ @LinuxDocTools::Lang::Languages ],
short => "l" },
{ option => "charset", type => "l",
'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" },
{ option => "style", type => "s", short => "S" },
{ option => "tabsize", type => "i", short => "t" },
# { option => "verbose", type => "f", short => "v" },
{ option => "debug", type => "f", short => "d" },
{ option => "define", type => "s", short => "D" },
{ option => "include", type => "s", short => "i" },
{ option => "pass", type => "s", short => "P" }
];
$global->{backend} = "linuxdoc";
$global->{papersize} = "a4";
$global->{language} = "en";
$global->{charset} = "ascii";
$global->{style} = "";
$global->{tabsize} = 8;
$global->{verbose} = 0;
$global->{define} = "";
$global->{debug} = 0;
$global->{include} = "";
$global->{pass} = "";
$global->{InFiles} = [];
$global->{fmtlist} = ""; # List of loaded fmt files
$Formats{$global->{NAME}} = $global; # All formats we know.
$FmtList{$global->{NAME}} = $global; # List of formats for help msgs.
$global->{sgmlpre} = "$main::AuxBinDir/sgmlpre";
my $error_header = "LinuxdocTools::init";
if ( -e "/etc/papersize" ){
open (my $PAPERSIZE,"< /etc/papersize") ||
die "$error_header: Count not open \"/etc/papersize\" for reading\n";
chomp (my $paper = <$PAPERSIZE>);
$global->{papersize} = "letter" if ( $paper eq "letter");
close $PAPERSIZE;
}
# automatic language detection: disabled by default
# {
# my $lang;
# foreach $lang (@LinuxDocTools::Lang::Languages)
# {
# if (($ENV{"LC_ALL"} =~ /^$lang/i) ||
# ($ENV{"LC_CTYPE"} =~ /^$lang/i) ||
# ($ENV{"LANG"} =~ /^$lang/i)) {
# $global->{language} = Any2ISO($lang);
# }
# }
# }
# --------------------------------------------------------------------------------
$global->{preNSGMLS} = sub {
# ------------------------------------------------------------------------------
# Define a fallback preNSGMLS. Used when the format is "global" (from sgmlcheck).
# ------------------------------------------------------------------------------
$global->{NsgmlsOpts} .= " -s ";
$global->{NsgmlsPrePipe} = "cat $global->{file}";
};
# We need to load all fmt files here, so the allowed options for all
# format are put into $global and a complete usage message is built,
# including options for all formats.
my %locations = ();
foreach my $path ("$main::DataDir/site",
"$main::DataDir/dist",
"$main::DataDir/fmt"){
foreach my $location (<$path/fmt_*.pl>){
my $fmt = $location;
$fmt =~ s/^.*_//;
$fmt =~ s/\.pl$//;
$locations{$fmt} = $location unless defined $locations{$fmt};
}
}
foreach my $fmt ( keys %locations ){
$global->{fmtlist} .= " Loading $locations{$fmt}\n";
require $locations{$fmt};
}
}
# ------------------------------------------------------------------------
=item LinuxDocTools::process_options ($0, @ARGV)
This function contains all initialization that is bound to the current
invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that
should be used (ld2txt activates the I<txt> backend) and parses the
options array. It returns an array of filenames it encountered during
option processing.
As a side effect, the environment variable I<SGML_CATALOG_FILES> is
modified and, once I<$global->{format}> is known, I<SGMLDECL> is set.
=cut
# ------------------------------------------------------------------------
sub process_options {
# ------------------------------------------------------------------------
my $progname = shift;
my @tmpargs = @_;
my @args = ();
my $format = '';
# Try getting the format. We need to do this here so process_options
# knows which is the format and which format options are allowed
# First, see if we have an explicit backend option by looping over command line.
# Do not shift in the while condition itself, 0 in options like '-s 0' will
# otherwise stop looping
while ( @tmpargs ){
$_ = shift @tmpargs;
if ( s/--backend=// ){
$format = $_;
} elsif ( $_ eq "-B" ){
$format = shift @tmpargs;
} else {
push @args, $_;
}
}
unless ( $format ){
my ($tmpfmt, $dummy1, $dummy2) = fileparse($progname, "");
if ( $tmpfmt =~ s/^sgml2// ) { # Calling program through sgml2xx symlinks
$format = $tmpfmt;
} elsif ( $tmpfmt eq "sgmlcheck" ) { # Calling program through sgmlcheck symlink
$format = "global";
}
}
if ( $format ) {
if ( $format eq "check" ){
$format = "global";
} elsif ( $format eq "latex" ){
$format = "latex2e";
}
$FmtList{$format} = $Formats{$format} or
usage("$format: Unknown format");
$global->{format} = $format;
} else {
usage("");
}
# Parse all the options from @args, and return files.
my @files = LinuxDocTools::Utils::process_options(@args);
# Check the number of given files
$#files > -1 || usage("No filenames given");
# Normalize language string
$global->{language} = Any2ISO($global->{language});
# Setup the SGML environment.
my @sgmlcatalogs =
(# SGML iso-entities catalog location in Debian sgml-data package
"$main::isoentities_prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog",
# SGML iso-entities catalog location in ArchLinux, Fedora and Gentoo
"$main::isoentities_prefix/share/sgml/sgml-iso-entities-8879.1986/catalog",
# SGML iso-entities catalog location when installed from linuxdoc-tools
"$main::isoentities_prefix/share/sgml/iso-entities-8879.1986/iso-entities.cat",
# dtd/catalog for SGML-Tools
"$main::DataDir/linuxdoc-tools.catalog",
# The super catalog
"/etc/sgml/catalog");
@sgmlcatalogs = ($ENV{SGML_CATALOG_FILES}, @sgmlcatalogs) if defined $ENV{SGML_CATALOG_FILES};
$ENV{SGML_CATALOG_FILES} = join(':', @sgmlcatalogs);
# Set to one of these if readable, nil otherwise
$ENV{SGMLDECL} = ldt_searchfile(["$main::DataDir/dtd/$global->{format}.dcl",
"$main::DataDir/dtd/$global->{style}.dcl",
"$main::DataDir/dtd/sgml.dcl"]);
# Show the list of loaded fmt_*.pl files if debugging
print STDERR $global->{fmtlist} if $global->{debug};
# Return the list of files to be processed
return @files;
}
# ------------------------------------------------------------------------
=item LinuxDocTools::process_file
With all the configuration done, this routine will take a single filename
and convert it to the currently active backend format. The conversion is
done in a number of steps in tight interaction with the currently active
backend (see also L<LinuxDocTools::BackEnd>):
=over
=item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe.
=item 2. Here: Run the preprocessor to handle conditionals.
=item 3. Here: Run NSGMLS.
=item 4. Backend: run pre-ASP conversion.
=item 5. Here: Run SGMLSASP.
=item 6. Backend: run post-ASP conversion, generating the output.
=back
All stages are influenced by command-line settings, currently active format,
etcetera. See the code for details.
=cut
# ------------------------------------------------------------------------
sub process_file {
# ------------------------------------------------------------------------
my $file = $global->{origfile} = shift (@_);
my $saved_umask = umask;
my $error_header = "LinuxdocTools::process_file";
print "Processing file $file\n";
umask 0077;
my ($filename, $filepath, $filesuffix) = fileparse($file, "\.sgml");
$global->{filename} = $filename;
$global->{filepath} = $filepath;
$global->{file} = ldt_searchfile(["$filepath/$filename.sgml",
"$filepath/$filename.SGML"])
or die "$error_header: Cannot find $file. Aborting ...\n";
my $dtd = ldt_getdtd_v1("$global->{file}");
print STDERR "DTD: " . $dtd . "\n" if $global->{debug};
# Prepare temporary directory
my $tmpdir = $ENV{'TMPDIR'} || '/tmp';
$tmpdir = tempdir("linuxdoc-tools.XXXXXXXXXX", DIR => "$tmpdir");
# Set common base name for temp files and temp file names
my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename;
my $precmdout = "$tmpbase.01.precmdout";
my $nsgmlsout = "$tmpbase.02.nsgmlsout"; # Was $tmpbase.1
my $preaspout = "$tmpbase.03.preaspout"; # Was $tmpbase.2
my $aspout = "$tmpbase.04.aspout"; # Was $tmpbase.3
# Set up the preprocessing command. Conditionals have to be
# handled here until they can be moved into the DTD, otherwise
# a validating SGML parser will choke on them.
# Check if output option for latex is pdf or not
if ($global->{format} eq "latex2e") {
if ($Formats{$global->{format}}{output} eq "pdf") {
$global->{define} .= " pdflatex=yes";
}
}
# Set the actual pre-processing command
my($precmd) = "| $global->{sgmlpre} output=$global->{format} $global->{define}";
# Make sure path of file to be processed is in SGML_SEARCH_PATH
$ENV{"SGML_SEARCH_PATH"} .= ":$filepath";
# You can hack $NsgmlsOpts here, etcetera.
$global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir";
$global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include});
# If a preNSGMLS function is defined in the fmt file, pipe its output to $FILE,
# otherwise just open $global->{file} as $IFILE
# ----------------------------------------------------------------------------
my $IFILE;
if ( defined $Formats{$global->{format}}{preNSGMLS} ) {
$global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}};
open ($IFILE,"$global->{NsgmlsPrePipe} |")
|| die "$error_header: Could not open pipe from $global->{NsgmlsPrePipe}. Aborting ...\n";
} else {
open ($IFILE,"< $global->{file}")
|| die "$error_header: Could not open $global->{file} for reading. Aborting ...\n";
}
# Create a temp file with $precmd output
my $precmd_command = "$precmd > $precmdout";
open (my $PRECMDOUT, "$precmd_command")
or die "$error_header: Could not open pipe to $precmdout. Aborting ...\n";
if ($global->{charset} eq "latin") {
print $PRECMDOUT ldt_latin1tosgml($IFILE);
} else {
copy($IFILE,$PRECMDOUT);
}
close $IFILE;
close $PRECMDOUT;
# Process with nsgmls.
my $nsgmls_command = "$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} $precmdout > $nsgmlsout";
system($nsgmls_command) == 0
or die "Error: \"$nsgmls_command\" failed with exit status: ",$? >> 8,"\n";
# Special case: if format is global, we're just checking.
cleanup if ( $global->{format} eq "global");
# If output file does not exists or is empty, something went wrong.
if ( ! -e "$nsgmlsout" ) {
die "$error_header: Can't create file $nsgmlsout. Aborting ...\n";
} elsif ( -z "$nsgmlsout" ){
die "$error_header: $nsgmlsout empty, SGML parsing error. Aborting ...\n";
}
print "- Nsgmls stage finished.\n" if $global->{debug};
# If a preASP stage is defined, let the format handle it.
# --------------------------------------------------------
open (my $PREASP_IN, "< $nsgmlsout")
or die "$error_header: Could not open $nsgmlsout for reading. Aborting ...\n";
open (my $PREASP_OUT, "> $preaspout")
or die "$error_header: Could not open $preaspout for writing. Aborting ...\n";
if (defined $Formats{$global->{format}}{preASP}) {
# Usage: preASP ($INHANDLE, $OUTHANDLE);
&{$Formats{$global->{format}}{preASP}}($PREASP_IN, $PREASP_OUT) == 0
or die "$error_header: Error pre-processing $global->{format}.\n";
} else {
copy ($PREASP_IN, $PREASP_OUT);
}
close $PREASP_IN;
close $PREASP_OUT;
die "$error_header: Can't create $preaspout file. Aborting ...\n"
unless -e "$preaspout";
print "- PreASP stage finished.\n" if ( $global->{debug} );
# Run sgmlsasp, with an optional style if specified.
# -----------------------------------------------------------
my $dtd2 = ldt_getdtd_v2($preaspout)
or die "$error_header: Could not read dtd from $preaspout. Aborting ...\n";
unless ( $dtd eq $dtd2 ){
print STDERR "Warning: Two different values for dtd, dtd1: $dtd, dtd2: $dtd2\n";
$dtd = $dtd2;
}
$global->{'dtd'} = $dtd;
# Search order:
# - datadir/site/<dtd>/<format>
# - datadir/dist/<dtd>/<format>
my $style = ($global->{style}) ?
ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping",
"$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping",
"$main::DataDir/mappings/$global->{format}/$global->{style}mapping"])
:
'';
my $mapping = ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/mapping",
"$main::DataDir/dist/$dtd/$global->{format}/mapping",
"$main::DataDir/mappings/$global->{format}/mapping"])
or die "$error_header: Could not find mapping file for $dtd/$global->{format}. Aborting ...\n";
$mapping = "$style $mapping" if $style;
$global->{charset} = "nippon" if ($global->{language} eq "ja");
# We don't have Korean groff so charset should be latin1.
if ($global->{language} eq "ko") {
if ($global->{format} eq "groff") {
$global->{charset} = "latin1";
} else {
$global->{charset} = "euc-kr";
}
}
if ($global->{format} eq "groff"){
if ($dtd eq "linuxdoctr") {
$mapping = "$main::DataDir/mappings/$global->{format}/tr-mapping";
}
}
my $sgmlsasp_command = "$main::progs->{SGMLSASP} $mapping < $preaspout |
expand -t $global->{tabsize} > $aspout";
system ($sgmlsasp_command) == 0
or die "$error_header: Error running $sgmlsasp_command. Aborting ...\n";
die "$error_header: Can't create $aspout file. Aborting ...\n"
unless -e "$aspout";
print "- ASP stage finished.\n" if ( $global->{debug} );
# If a postASP stage is defined, let the format handle it.
# ----------------------------------------------------------------
umask $saved_umask;
open (my $INPOSTASP, "< $aspout" )
or die "$error_header: Could not open $aspout for reading. Aborting ...\n";
if (defined $Formats{$global->{format}}{postASP}) {
# Usage: postASP ($INHANDLE)
# Should leave whatever it thinks is right based on $INHANDLE.
&{$Formats{$global->{format}}{postASP}}($INPOSTASP) == 0
or die "$error_header: Error post-processing $global->{format}. Aborting ...\n";
}
close $INPOSTASP;
print "- postASP stage finished.\n" if ( $global->{debug} );
# All done, remove the temporaries.
remove_tmpfiles($tmpbase) unless ( $global->{debug} );
}
=pod
=back
=head1 SEE ALSO
Documentation for various sub-packages of LinuxDocTools.
=head1 AUTHOR
SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>,
and various SGML-Tools contributors as listed in C<CONTRIBUTORS>.
Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools.
=cut
1;