bird/tools/linuxdoc-tools/LinuxDocTools.pm
Ondrej Zajicek (work) 58510024be Doc: Include full LinuxDocTools code
BIRD uses hacked LinuxDocTools for building documentation, keeping some
parts locally and using remaining parts from system-installed one. This
setup breaks when LinuxDocTools makes some internal changes and is hard
to keep consistent.

Just include full LinuxDocTools code (both hacked and unmodified parts)
to avoid consistency issues. Note that we still need some binaries from
LinuxDocTools, so it still needs to be installed to build documentation.
2021-04-25 02:21:05 +02:00

669 lines
22 KiB
Perl

#! /usr/bin/perl
#
# LinuxDocTools.pm
#
# LinuxDoc-Tools driver core. This contains all the basic functionality
# we need to control all other components.
#
# Copyright © 1996, Cees de Groot.
# Copyright © 2000, Taketoshi Sano
# Copyright © 2006-2018, Agustin Martin
# --------------------------------------------------------------------------------
package LinuxDocTools;
require 5.006;
use strict;
=head1 NAME
LinuxDocTools - SGML conversion utilities for LinuxDoc DTD.
=head1 SYNOPSIS
use LinuxDocTools;
LinuxDocTools::init;
@files = LinuxDocTools::process_options ($0, @ARGV);
for $curfile (@files) {
LinuxDocTools::process_file ($curfile);
}
=head1 DESCRIPTION
The LinuxDocTools package encapsulates all the functionality offered by
LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools;
but the encapsulation should provide for a simple interface for other users as well.
=head1 FUNCTIONS
=over 4
=cut
use File::Copy;
use File::Temp qw(tempdir);
use File::Basename qw(fileparse);
use LinuxDocTools::Lang;
use LinuxDocTools::Utils qw(usage cleanup trap_signals remove_tmpfiles create_temp);
use LinuxDocTools::Vars;
sub BEGIN
{
#
# Make sure we're always looking here. Note that "use lib" adds
# on the front of the search path, so we first push dist, then
# site, so that site is searched first.
#
use lib "$main::DataDir/dist";
use lib "$main::DataDir/site";
}
# -----------------------------------------------------------------------------------
sub ldt_searchfile {
# -----------------------------------------------------------------------------------
# Look for a readable file in the locations. Return first math.
# -----------------------------------------------------------------------------------
my $files = shift;
foreach my $file ( @$files ){
return $file if -r $file;
}
}
# -----------------------------------------------------------------------------------
sub ldt_getdtd_v1 {
# -----------------------------------------------------------------------------------
# Get the dtd
# -----------------------------------------------------------------------------------
my $file = shift;
my $error_header = "LinuxdocTools::ldt_getdtd_v1";
my $dtd;
open ( my $FILE, "< $file")
or die "$error_header: Could not open \"$file\" for reading. Aborting ...\n";
while ( <$FILE> ) {
tr/A-Z/a-z/;
# check for [<!doctype ... system] type definition
if ( /<!doctype\s*(\w*)\s*system/ ) {
$dtd = $1;
last;
# check for <!doctype ... PUBLIC ... DTD ...
} elsif ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi ) {
$dtd = $1;
last;
# check for <!doctype ...
# PUBLIC ... DTD ...
# (multi-line version)
} elsif ( /<!doctype\s*(\w*)/ ) {
$dtd = "precheck";
next;
} elsif ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" ) {
$dtd = $1;
last;
}
}
close $FILE;
return $dtd;
}
# -----------------------------------------------------------------------------------
sub ldt_getdtd_v2 {
# -----------------------------------------------------------------------------------
# Second way of getting dtd, fron nsgmls output.
# -----------------------------------------------------------------------------------
my $preaspout = shift;
my $error_header = "LinuxdocTools::ldt_getdtd_v2";
my $dtd2;
open (my $TMP,"< $preaspout")
or die "%error_header: Could not open $preaspout for reading. Aborting ...\n";
while ( defined ($dtd2 = <$TMP>) && ! ( $dtd2 =~ /^\(/) ) { };
close $TMP;
$dtd2 =~ s/^\(//;
$dtd2 =~ tr/A-Z/a-z/;
chomp $dtd2;
return $dtd2;
}
# -----------------------------------------------------------------------------------
sub ldt_latin1tosgml {
# -----------------------------------------------------------------------------------
# Convert latin1 chars in input filehandle to sgml entities in the returned string
# -----------------------------------------------------------------------------------
my $FILE = shift;
my $sgmlout;
while (<$FILE>){
# Outline these commands later on - CdG
#change latin1 characters to SGML
#by Farzad Farid, adapted by Greg Hankins
s/À/\&Agrave;/g;
s/Á/\&Aacute;/g;
s/Â/\&Acirc;/g;
s/Ã/\&Atilde;/g;
s/Ä/\&Auml;/g;
s/Å/\&Aring;/g;
s/Æ/\&AElig;/g;
s/Ç/\&Ccedil;/g;
s/È/\&Egrave;/g;
s/É/\&Eacute;/g;
s/Ê/\&Ecirc;/g;
s/Ë/\&Euml;/g;
s/Ì/\&Igrave;/g;
s/Í/\&Iacute;/g;
s/Î/\&Icirc;/g;
s/Ï/\&Iuml;/g;
s/Ñ/\&Ntilde;/g;
s/Ò/\&Ograve;/g;
s/Ó/\&Oacute;/g;
s/Ô/\&Ocirc;/g;
s/Õ/\&Otilde;/g;
s/Ö/\&Ouml;/g;
s/Ø/\&Oslash;/g;
s/Ù/\&Ugrave;/g;
s/Ú/\&Uacute;/g;
s/Û/\&Ucirc;/g;
s/Ü/\&Uuml;/g;
s/Ý/\&Yacute;/g;
s/Þ/\&THORN;/g;
s/ß/\&szlig;/g;
s/à/\&agrave;/g;
s/á/\&aacute;/g;
s/â/\&acirc;/g;
s/ã/\&atilde;/g;
s/ä/\&auml;/g;
s/å/\&aring;/g;
s/æ/\&aelig;/g;
s/ç/\&ccedil;/g;
s/è/\&egrave;/g;
s/é/\&eacute;/g;
s/ê/\&ecirc;/g;
s/ë/\&euml;/g;
s/ì/\&igrave;/g;
s/í/\&iacute;/g;
s/î/\&icirc;/g;
s/ï/\&iuml;/g;
s/µ/\&mu;/g;
s/ð/\&eth;/g;
s/ñ/\&ntilde;/g;
s/ò/\&ograve;/g;
s/ó/\&oacute;/g;
s/ô/\&ocirc;/g;
s/õ/\&otilde;/g;
s/ö/\&ouml;/g;
s/ø/\&oslash;/g;
s/ù/\&ugrave;/g;
s/ú/\&uacute;/g;
s/û/\&ucirc;/g;
s/ü/\&uuml;/g;
s/ý/\&yacute;/g;
s/þ/\&thorn;/g;
s/ÿ/\&yuml;/g;
$sgmlout .= $_;
}
return $sgmlout;
}
# ------------------------------------------------------------------------
=item LinuxDocTools::init
Takes care of initialization of package-global variables (which are actually
defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>,
a reference to a hash containing numerous settings, I<%Formats>, a hash
containing all the formats, and I<%FmtList>, a hash containing the currently
active formats for help texts.
Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local
formatting backends and C<require>s them.
=cut
# -----------------------------------------------------------------------------------
sub init {
# -----------------------------------------------------------------------------------
trap_signals;
# Register the ``global'' pseudoformat. Apart from the global settings, we
# also use $global to keep the global variable name space clean everything
# that we need to provide to other modules is stuffed into $global.
$global = {};
$global->{NAME} = "global";
$global->{HELP} = "";
$global->{OPTIONS} = [
{ option => "backend",
type => "l",
'values' => [ "html", "info", "latex", "lyx", "rtf", "txt", "check" ],
short => "B" },
{ option => "papersize",
type => "l",
'values' => [ "a4", "letter" ],
short => "p" },
{ option => "language",
type => "l",
'values' => [ @LinuxDocTools::Lang::Languages ],
short => "l" },
{ option => "charset", type => "l",
'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" },
{ option => "style", type => "s", short => "S" },
{ option => "tabsize", type => "i", short => "t" },
# { option => "verbose", type => "f", short => "v" },
{ option => "debug", type => "f", short => "d" },
{ option => "define", type => "s", short => "D" },
{ option => "include", type => "s", short => "i" },
{ option => "pass", type => "s", short => "P" }
];
$global->{backend} = "linuxdoc";
$global->{papersize} = "a4";
$global->{language} = "en";
$global->{charset} = "ascii";
$global->{style} = "";
$global->{tabsize} = 8;
$global->{verbose} = 0;
$global->{define} = "";
$global->{debug} = 0;
$global->{include} = "";
$global->{pass} = "";
$global->{InFiles} = [];
$global->{fmtlist} = ""; # List of loaded fmt files
$Formats{$global->{NAME}} = $global; # All formats we know.
$FmtList{$global->{NAME}} = $global; # List of formats for help msgs.
$global->{sgmlpre} = "$main::AuxBinDir/sgmlpre";
my $error_header = "LinuxdocTools::init";
if ( -e "/etc/papersize" ){
open (my $PAPERSIZE,"< /etc/papersize") ||
die "$error_header: Count not open \"/etc/papersize\" for reading\n";
chomp (my $paper = <$PAPERSIZE>);
$global->{papersize} = "letter" if ( $paper eq "letter");
close $PAPERSIZE;
}
# automatic language detection: disabled by default
# {
# my $lang;
# foreach $lang (@LinuxDocTools::Lang::Languages)
# {
# if (($ENV{"LC_ALL"} =~ /^$lang/i) ||
# ($ENV{"LC_CTYPE"} =~ /^$lang/i) ||
# ($ENV{"LANG"} =~ /^$lang/i)) {
# $global->{language} = Any2ISO($lang);
# }
# }
# }
# --------------------------------------------------------------------------------
$global->{preNSGMLS} = sub {
# ------------------------------------------------------------------------------
# Define a fallback preNSGMLS. Used when the format is "global" (from sgmlcheck).
# ------------------------------------------------------------------------------
$global->{NsgmlsOpts} .= " -s ";
$global->{NsgmlsPrePipe} = "cat $global->{file}";
};
# We need to load all fmt files here, so the allowed options for all
# format are put into $global and a complete usage message is built,
# including options for all formats.
my %locations = ();
foreach my $path ("$main::DataDir/site",
"$main::DataDir/dist",
"$main::DataDir/fmt"){
foreach my $location (<$path/fmt_*.pl>){
my $fmt = $location;
$fmt =~ s/^.*_//;
$fmt =~ s/\.pl$//;
$locations{$fmt} = $location unless defined $locations{$fmt};
}
}
foreach my $fmt ( keys %locations ){
$global->{fmtlist} .= " Loading $locations{$fmt}\n";
require $locations{$fmt};
}
}
# ------------------------------------------------------------------------
=item LinuxDocTools::process_options ($0, @ARGV)
This function contains all initialization that is bound to the current
invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that
should be used (ld2txt activates the I<txt> backend) and parses the
options array. It returns an array of filenames it encountered during
option processing.
As a side effect, the environment variable I<SGML_CATALOG_FILES> is
modified and, once I<$global->{format}> is known, I<SGMLDECL> is set.
=cut
# ------------------------------------------------------------------------
sub process_options {
# ------------------------------------------------------------------------
my $progname = shift;
my @tmpargs = @_;
my @args = ();
my $format = '';
# Try getting the format. We need to do this here so process_options
# knows which is the format and which format options are allowed
# First, see if we have an explicit backend option by looping over command line.
# Do not shift in the while condition itself, 0 in options like '-s 0' will
# otherwise stop looping
while ( @tmpargs ){
$_ = shift @tmpargs;
if ( s/--backend=// ){
$format = $_;
} elsif ( $_ eq "-B" ){
$format = shift @tmpargs;
} else {
push @args, $_;
}
}
unless ( $format ){
my ($tmpfmt, $dummy1, $dummy2) = fileparse($progname, "");
if ( $tmpfmt =~ s/^sgml2// ) { # Calling program through sgml2xx symlinks
$format = $tmpfmt;
} elsif ( $tmpfmt eq "sgmlcheck" ) { # Calling program through sgmlcheck symlink
$format = "global";
}
}
if ( $format ) {
if ( $format eq "check" ){
$format = "global";
} elsif ( $format eq "latex" ){
$format = "latex2e";
}
$FmtList{$format} = $Formats{$format} or
usage("$format: Unknown format");
$global->{format} = $format;
} else {
usage("");
}
# Parse all the options from @args, and return files.
my @files = LinuxDocTools::Utils::process_options(@args);
# Check the number of given files
$#files > -1 || usage("No filenames given");
# Normalize language string
$global->{language} = Any2ISO($global->{language});
# Setup the SGML environment.
my @sgmlcatalogs =
(# SGML iso-entities catalog location in Debian sgml-data package
"$main::isoentities_prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog",
# SGML iso-entities catalog location in ArchLinux, Fedora and Gentoo
"$main::isoentities_prefix/share/sgml/sgml-iso-entities-8879.1986/catalog",
# SGML iso-entities catalog location when installed from linuxdoc-tools
"$main::isoentities_prefix/share/sgml/iso-entities-8879.1986/iso-entities.cat",
# dtd/catalog for SGML-Tools
"$main::DataDir/linuxdoc-tools.catalog",
# The super catalog
"/etc/sgml/catalog");
@sgmlcatalogs = ($ENV{SGML_CATALOG_FILES}, @sgmlcatalogs) if defined $ENV{SGML_CATALOG_FILES};
$ENV{SGML_CATALOG_FILES} = join(':', @sgmlcatalogs);
# Set to one of these if readable, nil otherwise
$ENV{SGMLDECL} = ldt_searchfile(["$main::DataDir/dtd/$global->{format}.dcl",
"$main::DataDir/dtd/$global->{style}.dcl",
"$main::DataDir/dtd/sgml.dcl"]);
# Show the list of loaded fmt_*.pl files if debugging
print STDERR $global->{fmtlist} if $global->{debug};
# Return the list of files to be processed
return @files;
}
# ------------------------------------------------------------------------
=item LinuxDocTools::process_file
With all the configuration done, this routine will take a single filename
and convert it to the currently active backend format. The conversion is
done in a number of steps in tight interaction with the currently active
backend (see also L<LinuxDocTools::BackEnd>):
=over
=item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe.
=item 2. Here: Run the preprocessor to handle conditionals.
=item 3. Here: Run NSGMLS.
=item 4. Backend: run pre-ASP conversion.
=item 5. Here: Run SGMLSASP.
=item 6. Backend: run post-ASP conversion, generating the output.
=back
All stages are influenced by command-line settings, currently active format,
etcetera. See the code for details.
=cut
# ------------------------------------------------------------------------
sub process_file {
# ------------------------------------------------------------------------
my $file = $global->{origfile} = shift (@_);
my $saved_umask = umask;
my $error_header = "LinuxdocTools::process_file";
print "Processing file $file\n";
umask 0077;
my ($filename, $filepath, $filesuffix) = fileparse($file, "\.sgml");
$global->{filename} = $filename;
$global->{filepath} = $filepath;
$global->{file} = ldt_searchfile(["$filepath/$filename.sgml",
"$filepath/$filename.SGML"])
or die "$error_header: Cannot find $file. Aborting ...\n";
my $dtd = ldt_getdtd_v1("$global->{file}");
print STDERR "DTD: " . $dtd . "\n" if $global->{debug};
# Prepare temporary directory
my $tmpdir = $ENV{'TMPDIR'} || '/tmp';
$tmpdir = tempdir("linuxdoc-tools.XXXXXXXXXX", DIR => "$tmpdir");
# Set common base name for temp files and temp file names
my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename;
my $precmdout = "$tmpbase.01.precmdout";
my $nsgmlsout = "$tmpbase.02.nsgmlsout"; # Was $tmpbase.1
my $preaspout = "$tmpbase.03.preaspout"; # Was $tmpbase.2
my $aspout = "$tmpbase.04.aspout"; # Was $tmpbase.3
# Set up the preprocessing command. Conditionals have to be
# handled here until they can be moved into the DTD, otherwise
# a validating SGML parser will choke on them.
# Check if output option for latex is pdf or not
if ($global->{format} eq "latex2e") {
if ($Formats{$global->{format}}{output} eq "pdf") {
$global->{define} .= " pdflatex=yes";
}
}
# Set the actual pre-processing command
my($precmd) = "| $global->{sgmlpre} output=$global->{format} $global->{define}";
# Make sure path of file to be processed is in SGML_SEARCH_PATH
$ENV{"SGML_SEARCH_PATH"} .= ":$filepath";
# You can hack $NsgmlsOpts here, etcetera.
$global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir";
$global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include});
# If a preNSGMLS function is defined in the fmt file, pipe its output to $FILE,
# otherwise just open $global->{file} as $IFILE
# ----------------------------------------------------------------------------
my $IFILE;
if ( defined $Formats{$global->{format}}{preNSGMLS} ) {
$global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}};
open ($IFILE,"$global->{NsgmlsPrePipe} |")
|| die "$error_header: Could not open pipe from $global->{NsgmlsPrePipe}. Aborting ...\n";
} else {
open ($IFILE,"< $global->{file}")
|| die "$error_header: Could not open $global->{file} for reading. Aborting ...\n";
}
# Create a temp file with $precmd output
my $precmd_command = "$precmd > $precmdout";
open (my $PRECMDOUT, "$precmd_command")
or die "$error_header: Could not open pipe to $precmdout. Aborting ...\n";
if ($global->{charset} eq "latin") {
print $PRECMDOUT ldt_latin1tosgml($IFILE);
} else {
copy($IFILE,$PRECMDOUT);
}
close $IFILE;
close $PRECMDOUT;
# Process with nsgmls.
my $nsgmls_command = "$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} $precmdout > $nsgmlsout";
system($nsgmls_command) == 0
or die "Error: \"$nsgmls_command\" failed with exit status: ",$? >> 8,"\n";
# Special case: if format is global, we're just checking.
cleanup if ( $global->{format} eq "global");
# If output file does not exists or is empty, something went wrong.
if ( ! -e "$nsgmlsout" ) {
die "$error_header: Can't create file $nsgmlsout. Aborting ...\n";
} elsif ( -z "$nsgmlsout" ){
die "$error_header: $nsgmlsout empty, SGML parsing error. Aborting ...\n";
}
print "- Nsgmls stage finished.\n" if $global->{debug};
# If a preASP stage is defined, let the format handle it.
# --------------------------------------------------------
open (my $PREASP_IN, "< $nsgmlsout")
or die "$error_header: Could not open $nsgmlsout for reading. Aborting ...\n";
open (my $PREASP_OUT, "> $preaspout")
or die "$error_header: Could not open $preaspout for writing. Aborting ...\n";
if (defined $Formats{$global->{format}}{preASP}) {
# Usage: preASP ($INHANDLE, $OUTHANDLE);
&{$Formats{$global->{format}}{preASP}}($PREASP_IN, $PREASP_OUT) == 0
or die "$error_header: Error pre-processing $global->{format}.\n";
} else {
copy ($PREASP_IN, $PREASP_OUT);
}
close $PREASP_IN;
close $PREASP_OUT;
die "$error_header: Can't create $preaspout file. Aborting ...\n"
unless -e "$preaspout";
print "- PreASP stage finished.\n" if ( $global->{debug} );
# Run sgmlsasp, with an optional style if specified.
# -----------------------------------------------------------
my $dtd2 = ldt_getdtd_v2($preaspout)
or die "$error_header: Could not read dtd from $preaspout. Aborting ...\n";
unless ( $dtd eq $dtd2 ){
print STDERR "Warning: Two different values for dtd, dtd1: $dtd, dtd2: $dtd2\n";
$dtd = $dtd2;
}
$global->{'dtd'} = $dtd;
# Search order:
# - datadir/site/<dtd>/<format>
# - datadir/dist/<dtd>/<format>
my $style = ($global->{style}) ?
ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping",
"$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping",
"$main::DataDir/mappings/$global->{format}/$global->{style}mapping"])
:
'';
my $mapping = ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/mapping",
"$main::DataDir/dist/$dtd/$global->{format}/mapping",
"$main::DataDir/mappings/$global->{format}/mapping"])
or die "$error_header: Could not find mapping file for $dtd/$global->{format}. Aborting ...\n";
$mapping = "$style $mapping" if $style;
$global->{charset} = "nippon" if ($global->{language} eq "ja");
# We don't have Korean groff so charset should be latin1.
if ($global->{language} eq "ko") {
if ($global->{format} eq "groff") {
$global->{charset} = "latin1";
} else {
$global->{charset} = "euc-kr";
}
}
if ($global->{format} eq "groff"){
if ($dtd eq "linuxdoctr") {
$mapping = "$main::DataDir/mappings/$global->{format}/tr-mapping";
}
}
my $sgmlsasp_command = "$main::progs->{SGMLSASP} $mapping < $preaspout |
expand -t $global->{tabsize} > $aspout";
system ($sgmlsasp_command) == 0
or die "$error_header: Error running $sgmlsasp_command. Aborting ...\n";
die "$error_header: Can't create $aspout file. Aborting ...\n"
unless -e "$aspout";
print "- ASP stage finished.\n" if ( $global->{debug} );
# If a postASP stage is defined, let the format handle it.
# ----------------------------------------------------------------
umask $saved_umask;
open (my $INPOSTASP, "< $aspout" )
or die "$error_header: Could not open $aspout for reading. Aborting ...\n";
if (defined $Formats{$global->{format}}{postASP}) {
# Usage: postASP ($INHANDLE)
# Should leave whatever it thinks is right based on $INHANDLE.
&{$Formats{$global->{format}}{postASP}}($INPOSTASP) == 0
or die "$error_header: Error post-processing $global->{format}. Aborting ...\n";
}
close $INPOSTASP;
print "- postASP stage finished.\n" if ( $global->{debug} );
# All done, remove the temporaries.
remove_tmpfiles($tmpbase) unless ( $global->{debug} );
}
=pod
=back
=head1 SEE ALSO
Documentation for various sub-packages of LinuxDocTools.
=head1 AUTHOR
SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>,
and various SGML-Tools contributors as listed in C<CONTRIBUTORS>.
Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools.
=cut
1;