2000-05-31 22:27:49 +08:00
|
|
|
|
#
|
|
|
|
|
# fmt_txt.pl
|
|
|
|
|
#
|
|
|
|
|
# $Id$
|
|
|
|
|
#
|
|
|
|
|
# TXT-specific driver stuff
|
|
|
|
|
#
|
|
|
|
|
# <20> Copyright 1996, Cees de Groot
|
|
|
|
|
#
|
2003-04-07 03:35:50 +08:00
|
|
|
|
package LinuxDocTools::fmt_txt;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
|
|
use File::Copy;
|
|
|
|
|
use Text::EntityMap;
|
2003-04-07 03:35:50 +08:00
|
|
|
|
use LinuxDocTools::CharEnts;
|
|
|
|
|
use LinuxDocTools::Lang;
|
|
|
|
|
use LinuxDocTools::Vars;
|
|
|
|
|
use LinuxDocTools::Utils qw(create_temp);
|
2000-05-31 22:27:49 +08:00
|
|
|
|
|
|
|
|
|
my $txt = {};
|
|
|
|
|
$txt->{NAME} = "txt";
|
|
|
|
|
$txt->{HELP} = "";
|
|
|
|
|
$txt->{OPTIONS} = [
|
|
|
|
|
{ option => "manpage", type => "f", short => "m" },
|
2003-04-07 03:35:50 +08:00
|
|
|
|
{ option => "filter", type => "f", short => "f" },
|
|
|
|
|
{ option => "blanks", type => "i", short => "b" }
|
2000-05-31 22:27:49 +08:00
|
|
|
|
];
|
|
|
|
|
$txt->{manpage} = 0;
|
|
|
|
|
$txt->{filter} = 0;
|
2003-04-07 03:35:50 +08:00
|
|
|
|
$txt->{blanks} = 3;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
|
|
|
|
|
$Formats{$txt->{NAME}} = $txt;
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Set correct NsgmlsOpts
|
|
|
|
|
#
|
|
|
|
|
$txt->{preNSGMLS} = sub
|
|
|
|
|
{
|
|
|
|
|
if ($txt->{manpage})
|
|
|
|
|
{
|
|
|
|
|
$global->{NsgmlsOpts} .= " -iman ";
|
|
|
|
|
$global->{charset} = "man";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
$global->{NsgmlsOpts} .= " -ifmttxt ";
|
|
|
|
|
$global->{charset} = "latin1" if $global->{charset} eq "latin";
|
|
|
|
|
}
|
|
|
|
|
|
2003-04-07 03:35:50 +08:00
|
|
|
|
|
2000-05-31 22:27:49 +08:00
|
|
|
|
#
|
|
|
|
|
# Is there a cleaner solution than this? Can't do it earlier,
|
|
|
|
|
# would show up in the help messages...
|
|
|
|
|
#
|
2003-04-07 03:35:50 +08:00
|
|
|
|
# the language support ja.
|
|
|
|
|
# the charset support nippon.
|
|
|
|
|
#
|
2000-05-31 22:27:49 +08:00
|
|
|
|
$global->{format} = $global->{charset};
|
2003-04-07 03:35:50 +08:00
|
|
|
|
$global->{charset} = "nippon" if $global->{language} eq "ja";
|
2000-05-31 22:27:49 +08:00
|
|
|
|
$global->{format} = "groff" if $global->{format} eq "ascii";
|
2003-04-07 03:35:50 +08:00
|
|
|
|
$global->{format} = "groff" if $global->{format} eq "nippon";
|
|
|
|
|
$global->{format} = "groff" if $global->{format} eq "euc-kr";
|
2000-05-31 22:27:49 +08:00
|
|
|
|
$ENV{SGML_SEARCH_PATH} =~ s/txt/$global->{format}/;
|
|
|
|
|
|
|
|
|
|
$Formats{"groff"} = $txt;
|
|
|
|
|
$Formats{"latin1"} = $txt;
|
|
|
|
|
$Formats{"man"} = $txt;
|
|
|
|
|
|
2003-04-07 03:35:50 +08:00
|
|
|
|
$global->{NsgmlsPrePipe} = "cat $global->{file} " ;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Ascii escape sub. this is called-back by `parse_data' below in
|
|
|
|
|
# `txt_preASP' to properly escape `\' characters coming from the SGML
|
|
|
|
|
# source.
|
|
|
|
|
my $txt_escape = sub {
|
|
|
|
|
my ($data) = @_;
|
|
|
|
|
|
|
|
|
|
$data =~ s|"|\\\&\"|g; # Insert zero-width space in front of "
|
|
|
|
|
$data =~ s|^\.|\\&.|; # ditto in front of . at start of line
|
|
|
|
|
$data =~ s|\\|\\\\|g; # Escape backslashes
|
|
|
|
|
|
|
|
|
|
return ($data);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Run the file through the genertoc utility before sgmlsasp. Not necessary
|
|
|
|
|
# when producing a manpage. A lot of code from FJM, untested by me.
|
|
|
|
|
#
|
|
|
|
|
$txt->{preASP} = sub
|
|
|
|
|
{
|
|
|
|
|
my ($infile, $outfile) = @_;
|
|
|
|
|
my (@toc, @lines);
|
2003-04-07 03:35:50 +08:00
|
|
|
|
my $char_maps = load_char_maps ('.2tr', [ Text::EntityMap::sdata_dirs() ]);
|
|
|
|
|
if ( $global->{charset} eq "latin1" )
|
|
|
|
|
{
|
|
|
|
|
$char_maps = load_char_maps ('.2l1tr', [ Text::EntityMap::sdata_dirs() ]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($txt->{manpage})
|
2000-05-31 22:27:49 +08:00
|
|
|
|
{
|
2003-04-07 03:35:50 +08:00
|
|
|
|
while (<$infile>)
|
|
|
|
|
{
|
|
|
|
|
if (/^-/)
|
|
|
|
|
{
|
|
|
|
|
my ($str) = $';
|
|
|
|
|
chop ($str);
|
|
|
|
|
print $outfile "-" .
|
|
|
|
|
parse_data ($str, $char_maps, $txt_escape) . "\n";
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
elsif (/^A/)
|
|
|
|
|
{
|
|
|
|
|
/^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
|
|
|
|
|
|| die "bad attribute data: $_\n";
|
|
|
|
|
my ($name,$type,$value) = ($1,$2,$4);
|
|
|
|
|
if ($type eq "CDATA")
|
|
|
|
|
{
|
|
|
|
|
# CDATA attributes get translated also
|
|
|
|
|
$value = parse_data ($value, $char_maps, $txt_escape);
|
|
|
|
|
}
|
|
|
|
|
print $outfile "A$name $type $value\n";
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
#
|
|
|
|
|
# Default action if not skipped over with next: copy in to out.
|
|
|
|
|
#
|
|
|
|
|
print $outfile $_;
|
|
|
|
|
}
|
|
|
|
|
|
2000-05-31 22:27:49 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# note the conversion of `sdata_dirs' list to an anonymous array to
|
|
|
|
|
# make a single argument
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Build TOC. The file is read into @lines in the meantime, we need to
|
|
|
|
|
# traverse it twice.
|
|
|
|
|
#
|
|
|
|
|
push (@toc, "(HLINE\n");
|
|
|
|
|
push (@toc, ")HLINE\n");
|
|
|
|
|
push (@toc, "(P\n");
|
|
|
|
|
push (@toc, "-" . Xlat ("Table of Contents") . "\n");
|
|
|
|
|
push (@toc, ")P\n");
|
|
|
|
|
push (@toc, "(VERB\n");
|
|
|
|
|
my (@prevheader, @header);
|
2003-04-07 03:35:50 +08:00
|
|
|
|
my $appendix = 0;
|
|
|
|
|
my $nonprint = 0;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
while (<$infile>)
|
|
|
|
|
{
|
|
|
|
|
push (@lines, $_);
|
|
|
|
|
|
|
|
|
|
if (/^\(SECT(.*)/)
|
|
|
|
|
{
|
|
|
|
|
@prevheader = @header;
|
|
|
|
|
@header = @header[0..$1];
|
2003-04-07 03:35:50 +08:00
|
|
|
|
if ($appendix == 1)
|
|
|
|
|
{
|
|
|
|
|
$header[$1] = "A";
|
|
|
|
|
$appendix = 0;
|
|
|
|
|
} else
|
|
|
|
|
{
|
|
|
|
|
$header[$1]++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (/^\(APPEND(.*)/)
|
|
|
|
|
{
|
|
|
|
|
$appendix = 1;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
if (/^\(HEADING/)
|
|
|
|
|
{
|
|
|
|
|
$_ = <$infile>;
|
2003-04-07 03:35:50 +08:00
|
|
|
|
s/\\n/ /g;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
push (@lines, $_);
|
|
|
|
|
chop;
|
|
|
|
|
s/^-//;
|
|
|
|
|
$_ = join(".",@header) . " " . $_;
|
|
|
|
|
s/\(\\[0-9][0-9][0-9]\)/\\\1/g;
|
|
|
|
|
|
|
|
|
|
if (!$#header)
|
|
|
|
|
{
|
|
|
|
|
# put a newline before top-level sections unless previous was also
|
|
|
|
|
# a top level section
|
|
|
|
|
$_ = "\\n" . $_ unless (!$#prevheader);
|
|
|
|
|
# put a . and a space after top level sections
|
|
|
|
|
s/ /. /;
|
2003-04-07 03:35:50 +08:00
|
|
|
|
##### $_ = "-" . $_ . "\\n";
|
|
|
|
|
$_ = "-" . $_;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
# subsections get indentation matching hierarchy
|
|
|
|
|
$_ = "-" . " " x $#header . $_;
|
|
|
|
|
}
|
2003-04-07 03:35:50 +08:00
|
|
|
|
|
|
|
|
|
# remove tags from a toc
|
|
|
|
|
s/\)TT//g;
|
|
|
|
|
s/\(TT//g;
|
|
|
|
|
s/\)IT//g;
|
|
|
|
|
s/\(IT//g;
|
|
|
|
|
s/\)EM//g;
|
|
|
|
|
s/\(EM//g;
|
|
|
|
|
s/\)BF//g;
|
|
|
|
|
s/\(BF//g;
|
|
|
|
|
s/AID * CDATA.*$//g;
|
|
|
|
|
s/\)LABEL//g;
|
|
|
|
|
s/\(LABEL//g;
|
|
|
|
|
|
|
|
|
|
push(@toc, parse_data ($_, $char_maps, $txt_escape));
|
|
|
|
|
|
|
|
|
|
$_ = <$infile>;
|
|
|
|
|
while (!/^\)HEADING/) {
|
|
|
|
|
s/\\n/ /g; ####
|
|
|
|
|
push(@lines, $_);
|
|
|
|
|
chop;
|
|
|
|
|
s/^-//;
|
|
|
|
|
|
|
|
|
|
# remove tags from a toc
|
|
|
|
|
s/\)TT//g;
|
|
|
|
|
s/\(TT//g;
|
|
|
|
|
s/\)IT//g;
|
|
|
|
|
s/\(IT//g;
|
|
|
|
|
s/\)EM//g;
|
|
|
|
|
s/\(EM//g;
|
|
|
|
|
s/\)BF//g;
|
|
|
|
|
s/\(BF//g;
|
|
|
|
|
s/AID * CDATA.*$//g;
|
|
|
|
|
s/\)LABEL//g;
|
|
|
|
|
s/\(LABEL//g;
|
|
|
|
|
|
|
|
|
|
# remove NIDX, NCDX from a toc entry
|
|
|
|
|
if (/^\(NIDX$/ || /^\(NCDX$/) { $nonprint = 1; }
|
|
|
|
|
if (/^\)NIDX$/ || /^\)NCDX$/) { $nonprint = 1; }
|
|
|
|
|
|
|
|
|
|
# $_ = "-" . $_ . "\\n";
|
|
|
|
|
push(@toc, parse_data ($_, $char_maps, $txt_escape))
|
|
|
|
|
if (! $nonprint);
|
|
|
|
|
$_ = <$infile>;
|
|
|
|
|
}
|
|
|
|
|
s/\\n/ /g; ###
|
|
|
|
|
push(@lines, $_);
|
|
|
|
|
push(@toc, "\\n\n");
|
|
|
|
|
}
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
push (@toc, ")VERB\n");
|
|
|
|
|
push (@toc, "(HLINE\n");
|
|
|
|
|
push (@toc, ")HLINE\n");
|
|
|
|
|
|
|
|
|
|
my $inheading = 0;
|
|
|
|
|
my $tipo = '';
|
|
|
|
|
for (@lines)
|
|
|
|
|
{
|
|
|
|
|
if ($inheading)
|
|
|
|
|
{
|
|
|
|
|
next if (/^\)TT/ || /^\(TT/ || /^\)IT/ || /^\(IT/ ||
|
|
|
|
|
/^\)EM/ || /^\(EM/ || /^\)BF/ || /^\(BF/);
|
|
|
|
|
if (/^-/)
|
|
|
|
|
{
|
|
|
|
|
$tipo .= $' ;
|
|
|
|
|
chop ($tipo);
|
|
|
|
|
$tipo .= " " unless $tipo =~ / $/;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
$tipo =~ s/ $//;
|
|
|
|
|
if ($tipo)
|
|
|
|
|
{
|
|
|
|
|
print $outfile "-"
|
|
|
|
|
. parse_data ($tipo, $char_maps, $txt_escape)
|
|
|
|
|
. "\n";
|
|
|
|
|
}
|
|
|
|
|
print $outfile $_;
|
|
|
|
|
$tipo = '';
|
|
|
|
|
}
|
|
|
|
|
if (/^\)HEADING/)
|
|
|
|
|
{
|
|
|
|
|
$inheading = 0;
|
|
|
|
|
}
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
if (/^\(HEADING/)
|
|
|
|
|
{
|
|
|
|
|
#
|
|
|
|
|
# Go into heading processing mode.
|
|
|
|
|
#
|
|
|
|
|
$tipo = '';
|
|
|
|
|
$inheading = 1;
|
|
|
|
|
}
|
|
|
|
|
if (/^\(TOC/)
|
|
|
|
|
{
|
|
|
|
|
print $outfile @toc;
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
if (/^-/)
|
|
|
|
|
{
|
|
|
|
|
my ($str) = $';
|
|
|
|
|
chop ($str);
|
|
|
|
|
print $outfile "-" . parse_data ($str, $char_maps, $txt_escape) . "\n";
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
elsif (/^A/)
|
|
|
|
|
{
|
|
|
|
|
/^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
|
|
|
|
|
|| die "bad attribute data: $_\n";
|
|
|
|
|
my ($name,$type,$value) = ($1,$2,$4);
|
|
|
|
|
if ($type eq "CDATA")
|
|
|
|
|
{
|
|
|
|
|
# CDATA attributes get translated also
|
|
|
|
|
$value = parse_data ($value, $char_maps, $txt_escape);
|
|
|
|
|
}
|
|
|
|
|
print $outfile "A$name $type $value\n";
|
|
|
|
|
next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Default action if not skipped over with next: copy in to out.
|
|
|
|
|
#
|
|
|
|
|
print $outfile $_;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Take the sgmlsasp output, and make something
|
|
|
|
|
# useful from it.
|
|
|
|
|
#
|
|
|
|
|
$txt->{postASP} = sub
|
|
|
|
|
{
|
|
|
|
|
my $infile = shift;
|
|
|
|
|
my ($outfile, $groffout);
|
|
|
|
|
|
|
|
|
|
if ($txt->{manpage})
|
|
|
|
|
{
|
|
|
|
|
$outfile = new FileHandle ">$global->{filename}.man";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2003-04-07 03:35:50 +08:00
|
|
|
|
create_temp("$global->{tmpbase}.txt.1");
|
2000-05-31 22:27:49 +08:00
|
|
|
|
$outfile = new FileHandle
|
2003-04-07 03:35:50 +08:00
|
|
|
|
"|$main::progs->{GROFF} $global->{pass} -T $global->{charset} -t $main::progs->{GROFFMACRO} >\"$global->{tmpbase}.txt.1\"";
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Feed $outfile with roff input.
|
|
|
|
|
#
|
|
|
|
|
while (<$infile>)
|
|
|
|
|
{
|
|
|
|
|
unless (/^\.DS/.../^\.DE/)
|
|
|
|
|
{
|
|
|
|
|
s/^[ \t]{1,}(.*)/$1/g;
|
|
|
|
|
}
|
|
|
|
|
s/^\.[ \t].*/\\\&$&/g;
|
|
|
|
|
s/\\fC/\\fR/g;
|
|
|
|
|
s/^.ft C/.ft R/g;
|
|
|
|
|
print $outfile $_;
|
|
|
|
|
}
|
|
|
|
|
$outfile->close;
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# If we were making a manpage, we're done. Otherwise, a little bit
|
|
|
|
|
# of work is left.
|
|
|
|
|
#
|
|
|
|
|
if ($txt->{manpage})
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
$outfile->open (">$global->{filename}.txt");
|
|
|
|
|
$groffout = new FileHandle "<$global->{tmpbase}.txt.1";
|
2003-04-07 03:35:50 +08:00
|
|
|
|
my $count = 0;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
if ($txt->{filter})
|
|
|
|
|
{
|
|
|
|
|
while (<$groffout>)
|
|
|
|
|
{
|
2003-04-07 03:35:50 +08:00
|
|
|
|
s/[^\cH][^\cH]\cH\cH//g;
|
2000-05-31 22:27:49 +08:00
|
|
|
|
s/.//g;
|
2003-04-07 03:35:50 +08:00
|
|
|
|
if ($txt->{blanks})
|
|
|
|
|
{
|
|
|
|
|
$count = &{$txt->{cutblank}}($count, $outfile, $_);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
print $outfile $_;
|
|
|
|
|
}
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2003-04-07 03:35:50 +08:00
|
|
|
|
if ($txt->{blanks})
|
|
|
|
|
{
|
|
|
|
|
while (<$groffout>)
|
|
|
|
|
{
|
|
|
|
|
$count = &{$txt->{cutblank}}($count, $outfile, $_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
copy ($groffout, $outfile);
|
|
|
|
|
}
|
2000-05-31 22:27:49 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
$groffout->close;
|
|
|
|
|
$outfile->close;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
};
|
|
|
|
|
|
2003-04-07 03:35:50 +08:00
|
|
|
|
$txt->{cutblank} = sub
|
|
|
|
|
{
|
|
|
|
|
my ($num, $out, $in) = @_;
|
|
|
|
|
if ( $in =~ /^$/ )
|
|
|
|
|
{
|
|
|
|
|
$num++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
$num = 0;
|
|
|
|
|
}
|
|
|
|
|
if ( $num <= $txt->{blanks} )
|
|
|
|
|
{
|
|
|
|
|
print $out $in;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ($num);
|
|
|
|
|
};
|
|
|
|
|
|
2000-05-31 22:27:49 +08:00
|
|
|
|
1;
|