########################################################################
# SGMLSPL script to convert from the DocBook DTD to HTML pages.
#
# by David Megginson
#
# This is a slightly more complicated script than tolatex.pl, since it
# uses forward references and an external reference file. Note that
# this script is customised for the SGMLS.pm and sgmlspl documentation
# in this directory, and is not meant as a general-purpose
# DocBook->HTML translator (though it could form the basis of one).
# Because each parse uses information saved from the last parse,
# you might need two passes to make certain that all references are
# up to date.
#
# $Log: tohtml.pl,v $
# Revision 1.4 1995/12/03 22:07:21 david
# Changed to use SGMLS::Output instead of Output, and to take advantage
# of the SGMLS::Refs package for forward references.
#
# Revision 1.3 1995/08/12 16:25:53 david
# Oops! Fixed comment leader in RCS file.
#
# Revision 1.2 1995/08/12 16:21:06 david
# Changes for release 1.01: fixed handling of prefixed sysid's from
# NSGMLS.
#
########################################################################
use SGMLS; # Use the SGMLS package.
use SGMLS::Output; # Use stack-based output.
use SGMLS::Refs;
$version = '$Id: tohtml.pl,v 1.4 1995/12/03 22:07:21 david Exp $';
$basename = shift; # Extra argument to script is basename.
#
# This conversion script keeps the idea of a current ID and a current
# file. Since the SGML document will be broken up into a series of
# smaller HTML documents, it is necessary to keep track of the current
# file name. The current ID is the ID (explicit or implied) of the
# most recent element which wants to capture titles, etc.
#
$current_id = ''; # The ID of the current container element.
@current_id_stack = (); # The IDs of any parent container elements.
$current_file = ''; # The name of the current output file.
@current_file_stack = (); # The names of the parent output files.
$top_id = ''; # The ID of the top element.
$top_file = ''; # The ID of the top file.
$previous_file = ''; # The previous file on the same level.
$table_counter = 0; # The number of the current table.
########################################################################
# Handler declarations for sgmlspl.
########################################################################
#
# Use the 'start' and 'end' handlers of the document to begin and
# terminate reference handling.
#
sgml('start', sub {
system("touch .redo_$basename");
# Start up the reference manager.
$Refs = new SGMLS::Refs("$basename.refs");
});
sgml('end', sub {
unlink(".redo_$basename") unless $Refs->warn;
});
#
# The is the top-level element.
#
sgml('', sub {
start_html(shift);
$top_id = $current_id;
$top_file = $current_file;
});
sgml('', sub { end_html(); });
#
# Ignore all of the header except for the bits which we actually want,
# by pushing output to 'nul'.
#
sgml('', sub { push_output 'nul'; });
sgml('', sub { pop_output(); });
#
# Save the title of something for future reference.
#
sgml('', sub { push_output 'string'; });
sgml('', sub { $Refs->put("title:$current_id",pop_output()); });
#
# These are just containers in the .
#
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
sgml('', "");
#
# Save the author's first name for future reference.
#
sgml('', sub { push_output 'string'; });
sgml('', sub { $Refs->put("firstname:$current_id",pop_output()); });
#
# Save the author's surname for future reference.
#
sgml('', sub { push_output 'string'; });
sgml('', sub { $Refs->put("surname:$current_id",pop_output()); });
#
# Save the organisation name for future reference.
#
sgml('', sub { push_output 'string'; });
sgml('', sub { $Refs->put("orgname:$current_id",pop_output()); });
#
# Save the organisation division for future reference.
#
sgml('', sub { push_output 'string'; });
sgml('', sub { $Refs->put("orgdiv:$current_id",pop_output()); });
#
# Save the email address for future reference.
#
sgml('', sub { push_output('string'); });
sgml('', sub { $Refs->put("email:$current_id",pop_output()); });
#
# Sectioning elements -- all of these simply call the &start_html
# and &end_html subroutines, which do all of the real work.
#
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
sgml('', sub { start_html(shift); });
sgml('', sub { end_html(); });
#
# Paragraphs must be marked explicitly in HTML -- use the HTML 3
# practice (actually just _real_ SGML, for a change) of marking both
# the beginning and the end.
#
sgml('', "
");
sgml('
', "
\n\n");
#
# Cross-references.
#
#
# This is an internal cross reference -- get the URL by
# simply adding ".html" to the IDREF (note that this would not work
# for tables!!!).
#
sgml('', sub {
my $element = shift;
output "";
});
sgml('', "");
#
# This is an external cross-reference, with a supplied URL.
#
sgml('', sub {
my $element = shift;
output "attribute(URL)->value;
output "\">";
});
sgml('', "");
#
# This is a pointer to something (in this case, always a table).
#
sgml('', sub {
my $element = shift;
output $Refs->get('xref:' . lc($element->attribute(LINKEND)->value));
});
sgml('', "");
#
# Inline elements.
#
#
# Print application names in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print acronyms in bold.
#
sgml('', "");
sgml('', "");
#
# Print terms in italics.
#
sgml('', "");
sgml('', "");
#
# Print file names in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print symbols in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print return values in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print quotations in quotation marks.
#
sgml('', '"');
sgml('', '"');
#
# Print commands in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print parameters in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print literal elements in typewriter.
#
sgml('', "");
sgml('', "");
#
# Print class names in typewriter.
#
sgml('', "");
sgml('', "");
#
# Emphasise emphasis.
#
sgml('', "");
sgml('', "");
#
# Block elements.
#
#
# Program listings are preformatted.
#
sgml('', "
\n
");
sgml('
', "\n\n");
#
# Keep a counter for table numbers, note the ID, and look up the
# title (caption) for the table.
#
sgml('
', sub {
my $element = shift;
push @current_id_stack, $current_id;
$current_id = lc($element->attribute(ID)->value || gen_id());
$table_counter++;
$Refs->put("xref:$current_id",$table_counter);
output "\n
', sub {
output "\n";
$current_id = pop @current_id_stack;
});
#
# Nothing needs to be done here -- we don't care how many cells there are.
#
sgml('', "");
sgml('', "");
#
# We will keep track of all of the entries in the head, for later use.
#
sgml('', sub { @cell_headings = (); push_output('nul'); });
sgml('', sub { pop_output(); });
#
# Print a single horizontal rule before the beginning of the body.
#
sgml('', "");
sgml('', "");
#
# Make each row into a labelled list (!!) -- HTML 3 does have tables,
# but they might not be able to handle the paragraph-length entries
# which I used in my documentation (these will not print if we are
# in the , since output will be 'nul').
#
sgml('', sub {
output "\n
\n";
$cell_counter = 0;
});
sgml('
', "\n\n\n\n");
#
# If an entry is in the , save it for later use; otherwise,
# print the entry as a list item with its corresponding entry
# as a label.
#
sgml('', sub {
my $element = shift;
if ($element->within(THEAD)) {
push_output 'string';
} else {
output "
";
output $cell_headings[$cell_counter];
output "
\n
";
}
});
sgml('
', sub {
my $element = shift;
if ($element->within(THEAD)) {
$cell_headings[$cell_counter] = pop_output();
} else {
output "\n";
}
$cell_counter++;
});
########################################################################
# SDATA Handlers -- use HTML entities wherever possible.
########################################################################
sgml('|[lt ]|', "<");
sgml('|[gt ]|', ">");
sgml('|[mdash ]|', "--");
sgml('|[LaTeX]|', "LaTeX");
sgml('|[hellip]|', "...");
sgml('|[amp ]|', "&");
########################################################################
# The generic external data entity handler. Handle only entities
# with type CDATA, and simply dump their files into the current
# document with minimal conversion.
########################################################################
sgml('entity',sub {
my $entity = shift;
# Use the first generated filename
# or the system identifier.
my $filename = $entity->filenames->[0] || $entity->sysid;
# A strange, NSGMLS-thing.
if ($filename =~ /^FILE:/ || $filename =~ /^\]+\>/) {
$filename = $';
}
# Handle only CDATA.
if ($entity->type eq 'CDATA') {
if (-r $filename) {
unless (open INPUT, "<$filename") {
die "Cannot open external file $filename\n";
}
# Convert special SGML characters.
while () {
s/\&/\&/g;
s/\\</g;
s/\>/\>/g;
output $_;
}
close INPUT;
} else {
die "Cannot read file $filename\n";
}
} else {
die "Cannot handle external entity with type " . $entity->type . "\n";
}
});
########################################################################
# Default handlers -- these will pick up any unrecognised elements,
# SDATA strings, processing instructions, or subdocument entities,
# and report an error to the user.
#########################################################################
sgml('start_element',sub { die "Unknown element: " . $_[0]->name; });
sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; });
sgml('pi',sub { die "Unknown processing instruction: " . $_[0]; });
sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; });
#
# End of sgmlspl handler declarations.
#
########################################################################
# Utility procedures.
########################################################################
#
# Given an element, start a new HTML document for it.
#
sub start_html {
my $element = shift;
my $old_file = $current_file;
# Save the old values on the stack.
push @current_id_stack, $current_id;
push @current_file_stack, $current_file;
# Get the new ID and file.
$current_id = lc($element->attribute(ID)->value || gen_id());
$current_file = $current_id . '.html';
# Note the previous child, if any.
if ($previous_file) {
$Refs->put("previous:$current_file",$previous_file);
$Refs->put("next:$previous_file",$current_file);
}
$previous_file = '';
# Put a reference up to the parent.
if ($old_file) {
$Refs->put("up:$current_file",$old_file);
}
# Look up the title reference.
my $plaintitle = my $title = $Refs->get("title:$current_id");
# Strip the title.
$plaintitle =~ s/\<[^\>]+\>//g;
# If this is not the top-level
# file, send out a link
# before beginning the new file.
if ($old_file) {
output "
\n";
}
# Send output to the new file.
push_output('file',$current_file);
# Print the front matter.
output "\n\n$plaintitle\n\n\n";
# Navigational aids, if this is not
# the top-level file.
if ($old_file) {
output "\n
Links: ";
my $up = $Refs->get("up:$current_file");
my $previous = $Refs->get("previous:$current_file");
my $next = $Refs->get("next:$current_file");
output "Next " if $next;
output "Previous " if $previous;
output "Up " if $up;
output "Top";
output "
\n\n";
}
output "
$title
\n\n";
}
#
# End the HTML document.
#
sub end_html {
# Look up the name and email info.
my $firstname = $Refs->get("firstname:$current_id") ||
$Refs->get("firstname:$top_id");
my $surname = $Refs->get("surname:$current_id") ||
$Refs->get("surname:$top_id");
my $email = $Refs->get("email:$current_id") ||
$Refs->get("email:$top_id");
# Restore the previous ID and file,
# and note this as the previous
# child.
$previous_file = $current_file;
$current_id = pop @current_id_stack;
$current_file = pop @current_file_stack;
# If this is not the top-level file,
# add some navigational information.
if ($current_file) {
output "\n
Links: ";
my $up = $Refs->get("up:$previous_file");
my $previous = $Refs->get("previous:$previous_file");
my $next = $Refs->get("next:$previous_file");
output "Next " if $next;
output "Previous " if $previous;
output "Up " if $up;
output "Top";
output "
\n\n";
}
# Add an address, if available,
# including a MAILTO URL.
output "\n";
output "$firstname $surname " if $firstname || $surname;
output "<$email>" if $email;
output "\n\n\n";
pop_output();
}
#
# Generate a new ID for anything which does not already have one.
#
sub gen_id {
$id_counter++;
return "node$id_counter";
}
1;