177 lines
4.1 KiB
Perl
177 lines
4.1 KiB
Perl
|
#
|
||
|
# CharEnts.pm
|
||
|
#
|
||
|
# $Id: CharEnts.pm,v 1.1.1.1 2001/05/24 15:57:41 sano Exp $
|
||
|
#
|
||
|
# SGML Character Entity utilities -- interface to Perl module
|
||
|
# Text::EntityMap.
|
||
|
#
|
||
|
package LinuxDocTools::CharEnts;
|
||
|
use strict;
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
LinuxDocTools::CharEnts - Interface to Text::EntityMap
|
||
|
|
||
|
=head1 SYNOPSIS
|
||
|
|
||
|
my $char_maps = load_char_maps ('.2ext', [ Text::EntityMap::sdata_dirs() ]);
|
||
|
|
||
|
$value = parse_data ($value, $char_maps, $escape_sub);
|
||
|
|
||
|
=head1 DESCRIPTION
|
||
|
|
||
|
This module provides a simple interface to the entity map handling provided by
|
||
|
B<Text::EntityMap>.
|
||
|
|
||
|
=head1 FUNCTIONS
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
=cut
|
||
|
|
||
|
use Text::EntityMap;
|
||
|
use Exporter;
|
||
|
|
||
|
use vars qw(@ISA @EXPORT $VERSION);
|
||
|
@ISA = qw(Exporter);
|
||
|
@EXPORT = qw(load_char_maps parse_data);
|
||
|
$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/);
|
||
|
|
||
|
# `%warn_map' tracks entities that were not able to be mapped so they
|
||
|
# are only warned once.
|
||
|
my %warn_map = ();
|
||
|
|
||
|
=item parse_data ($data, $char_map, $escape_sub)
|
||
|
|
||
|
B<parse_data> takes a string of I<$data> in the output format of
|
||
|
B<nsgmls> (see SP's C<sgmlsout.htm> document) without the leading dash.
|
||
|
B<parse_data> calls I<$char_map>'s lookup method for each sdata
|
||
|
entity reference. If the entity reference is undefined, it is
|
||
|
left alone (without the (n)sgmls C<\|>). For all remaining data,
|
||
|
B<parse_data> calls back into I<$escape_sub> to properly escape
|
||
|
characters for the backend formatter. Strings returned from the
|
||
|
lookup method are assumed to be already escaped.
|
||
|
|
||
|
This routine is derived from David Megginson's SGMLSpm.
|
||
|
|
||
|
=cut
|
||
|
|
||
|
sub parse_data {
|
||
|
my ($data, $char_map, $escape_sub) = @_;
|
||
|
my ($result) = "";
|
||
|
|
||
|
my $sdata_flag = 0;
|
||
|
my $out = '';
|
||
|
|
||
|
while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) {
|
||
|
$out .= $`;
|
||
|
$data = $';
|
||
|
|
||
|
if ($1 eq '|') {
|
||
|
# beginning or end of SDATA
|
||
|
if ("$out" ne '') {
|
||
|
if ($sdata_flag) {
|
||
|
my ($mapping) = $char_map->lookup ($out);
|
||
|
if (defined $mapping) {
|
||
|
# escape `\' in mapping for ASP
|
||
|
$mapping =~ s/\\/\\\\/g;
|
||
|
$result .= $mapping;
|
||
|
} else {
|
||
|
if (!$warn_map{$out}) {
|
||
|
warn "parse_data: no entity map for \`$out'\n";
|
||
|
$warn_map{$out} = 1;
|
||
|
}
|
||
|
# output the entity reference inside of `{}'
|
||
|
$result .= &$escape_sub ("{" . $out . "}");
|
||
|
}
|
||
|
} else {
|
||
|
$result .= &$escape_sub ($out);
|
||
|
}
|
||
|
$out = '';
|
||
|
}
|
||
|
$sdata_flag = !$sdata_flag;
|
||
|
|
||
|
} elsif ($1 eq 'n') {
|
||
|
# record end
|
||
|
|
||
|
# pass '\\n' through to ASP
|
||
|
$result .= &$escape_sub ($out) . '\\n';
|
||
|
$out = '';
|
||
|
} elsif ($1 eq '\\') {
|
||
|
# backslash
|
||
|
|
||
|
$result .= &$escape_sub ($out);
|
||
|
|
||
|
$out = '[bsol ]'; # bsol == entity name for backslash
|
||
|
my ($mapping) = $char_map->lookup ($out);
|
||
|
if (defined $mapping) {
|
||
|
# escape `\' in mapping for ASP
|
||
|
$mapping =~ s/\\/\\\\/g;
|
||
|
$result .= $mapping;
|
||
|
} else {
|
||
|
if (!$warn_map{$out}) {
|
||
|
warn "parse_data: no entity map for \`$out'\n";
|
||
|
$warn_map{$out} = 1;
|
||
|
}
|
||
|
# output the entity reference inside of `{}'
|
||
|
$result .= &$escape_sub ("{" . $out . "}");
|
||
|
}
|
||
|
$out = '';
|
||
|
} else {
|
||
|
# other octal character
|
||
|
$result .= &$escape_sub ($out . chr(oct($1)));
|
||
|
$out = '';
|
||
|
}
|
||
|
}
|
||
|
$out .= $data;
|
||
|
if ("$out" ne '') {
|
||
|
$result .= &$escape_sub ($out);
|
||
|
}
|
||
|
|
||
|
return ($result);
|
||
|
}
|
||
|
|
||
|
=item load_char_maps ($format, $paths)
|
||
|
|
||
|
B<load_char_maps> takes an EntityMap format suffix and loads all of the
|
||
|
character entity replacement sets for that suffix into an EntityMapGroup.
|
||
|
It searches every directory in I<@{$path}>.
|
||
|
|
||
|
=cut
|
||
|
|
||
|
sub load_char_maps {
|
||
|
my ($format, $paths) = @_;
|
||
|
|
||
|
my (@char_maps) = ();
|
||
|
my ($path, $file_name, $char_map);
|
||
|
|
||
|
foreach $path (@{$paths}) {
|
||
|
if (-d $path) {
|
||
|
opendir (SDATADIR, $path)
|
||
|
|| die "load_char_map: opening directory \`$path' for reading: $!\n";
|
||
|
foreach $file_name (readdir (SDATADIR)) {
|
||
|
next if ($file_name !~ /$format$/);
|
||
|
eval {$char_map = Text::EntityMap->load ("$path/$file_name")}
|
||
|
|| die "load_char_map: loading \`$path/$file_name'\n$@\n";
|
||
|
push (@char_maps, $char_map);
|
||
|
}
|
||
|
closedir (SDATADIR);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
warn "load_char_maps: no entity maps found\n"
|
||
|
if ($#char_maps == -1);
|
||
|
|
||
|
return (Text::EntityMap->group (@char_maps));
|
||
|
}
|
||
|
|
||
|
=back
|
||
|
|
||
|
=head1 AUTHOR
|
||
|
|
||
|
Ken MacLeod, C<E<lt>ken@bitsko.slc.ut.usE<gt>>
|
||
|
|
||
|
=cut
|
||
|
1;
|