######################################################################## # SGMLSPL script to convert from the DocBook DTD to HTML pages. # # by David Megginson # # This is a slightly more complicated script than tolatex.pl, since it # uses forward references and an external reference file. Note that # this script is customised for the SGMLS.pm and sgmlspl documentation # in this directory, and is not meant as a general-purpose # DocBook->HTML translator (though it could form the basis of one). # Because each parse uses information saved from the last parse, # you might need two passes to make certain that all references are # up to date. # # $Log: tohtml.pl,v $ # Revision 1.4 1995/12/03 22:07:21 david # Changed to use SGMLS::Output instead of Output, and to take advantage # of the SGMLS::Refs package for forward references. # # Revision 1.3 1995/08/12 16:25:53 david # Oops! Fixed comment leader in RCS file. # # Revision 1.2 1995/08/12 16:21:06 david # Changes for release 1.01: fixed handling of prefixed sysid's from # NSGMLS. # ######################################################################## use SGMLS; # Use the SGMLS package. use SGMLS::Output; # Use stack-based output. use SGMLS::Refs; $version = '$Id: tohtml.pl,v 1.4 1995/12/03 22:07:21 david Exp $'; $basename = shift; # Extra argument to script is basename. # # This conversion script keeps the idea of a current ID and a current # file. Since the SGML document will be broken up into a series of # smaller HTML documents, it is necessary to keep track of the current # file name. The current ID is the ID (explicit or implied) of the # most recent element which wants to capture titles, etc. # $current_id = ''; # The ID of the current container element. @current_id_stack = (); # The IDs of any parent container elements. $current_file = ''; # The name of the current output file. @current_file_stack = (); # The names of the parent output files. $top_id = ''; # The ID of the top element. $top_file = ''; # The ID of the top file. $previous_file = ''; # The previous file on the same level. $table_counter = 0; # The number of the current table. ######################################################################## # Handler declarations for sgmlspl. ######################################################################## # # Use the 'start' and 'end' handlers of the document to begin and # terminate reference handling. # sgml('start', sub { system("touch .redo_$basename"); # Start up the reference manager. $Refs = new SGMLS::Refs("$basename.refs"); }); sgml('end', sub { unlink(".redo_$basename") unless $Refs->warn; }); # # The
is the top-level element. # sgml('
', sub { start_html(shift); $top_id = $current_id; $top_file = $current_file; }); sgml('
', sub { end_html(); }); # # Ignore all of the header except for the bits which we actually want, # by pushing output to 'nul'. # sgml('', sub { push_output 'nul'; }); sgml('', sub { pop_output(); }); # # Save the title of something for future reference. # sgml('', sub { push_output 'string'; }); sgml('', sub { $Refs->put("title:$current_id",pop_output()); }); # # These are just containers in the . # sgml('', ""); sgml('', ""); sgml('', ""); sgml('', ""); sgml('', ""); sgml('', ""); sgml('
', ""); sgml('
', ""); sgml('', ""); sgml('', ""); # # Save the author's first name for future reference. # sgml('', sub { push_output 'string'; }); sgml('', sub { $Refs->put("firstname:$current_id",pop_output()); }); # # Save the author's surname for future reference. # sgml('', sub { push_output 'string'; }); sgml('', sub { $Refs->put("surname:$current_id",pop_output()); }); # # Save the organisation name for future reference. # sgml('', sub { push_output 'string'; }); sgml('', sub { $Refs->put("orgname:$current_id",pop_output()); }); # # Save the organisation division for future reference. # sgml('', sub { push_output 'string'; }); sgml('', sub { $Refs->put("orgdiv:$current_id",pop_output()); }); # # Save the email address for future reference. # sgml('', sub { push_output('string'); }); sgml('', sub { $Refs->put("email:$current_id",pop_output()); }); # # Sectioning elements -- all of these simply call the &start_html # and &end_html subroutines, which do all of the real work. # sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); sgml('', sub { start_html(shift); }); sgml('', sub { end_html(); }); # # Paragraphs must be marked explicitly in HTML -- use the HTML 3 # practice (actually just _real_ SGML, for a change) of marking both # the beginning and the end. # sgml('', "

"); sgml('', "

\n\n"); # # Cross-references. # # # This is an internal cross reference -- get the URL by # simply adding ".html" to the IDREF (note that this would not work # for tables!!!). # sgml('', sub { my $element = shift; output ""; }); sgml('', ""); # # This is an external cross-reference, with a supplied URL. # sgml('', sub { my $element = shift; output "attribute(URL)->value; output "\">"; }); sgml('', ""); # # This is a pointer to something (in this case, always a table). # sgml('', sub { my $element = shift; output $Refs->get('xref:' . lc($element->attribute(LINKEND)->value)); }); sgml('', ""); # # Inline elements. # # # Print application names in typewriter. # sgml('', ""); sgml('', ""); # # Print acronyms in bold. # sgml('', ""); sgml('', ""); # # Print terms in italics. # sgml('', ""); sgml('', ""); # # Print file names in typewriter. # sgml('', ""); sgml('', ""); # # Print symbols in typewriter. # sgml('', ""); sgml('', ""); # # Print return values in typewriter. # sgml('', ""); sgml('', ""); # # Print quotations in quotation marks. # sgml('', '"'); sgml('', '"'); # # Print commands in typewriter. # sgml('', ""); sgml('', ""); # # Print parameters in typewriter. # sgml('', ""); sgml('', ""); # # Print literal elements in typewriter. # sgml('', ""); sgml('', ""); # # Print class names in typewriter. # sgml('', ""); sgml('', ""); # # Emphasise emphasis. # sgml('', ""); sgml('', ""); # # Block elements. # # # Program listings are preformatted. # sgml('', "

\n

");
sgml('', "
\n

\n"); # # Keep a counter for table numbers, note the ID, and look up the # title (caption) for the table. # sgml('', sub { my $element = shift; push @current_id_stack, $current_id; $current_id = lc($element->attribute(ID)->value || gen_id()); $table_counter++; $Refs->put("xref:$current_id",$table_counter); output "\n

Table $table_counter: " . $Refs->get("title:$current_id") . "

\n\n"; }); sgml('
', sub { output "\n"; $current_id = pop @current_id_stack; }); # # Nothing needs to be done here -- we don't care how many cells there are. # sgml('', ""); sgml('', ""); # # We will keep track of all of the entries in the head, for later use. # sgml('', sub { @cell_headings = (); push_output('nul'); }); sgml('', sub { pop_output(); }); # # Print a single horizontal rule before the beginning of the body. # sgml('', "
"); sgml('', ""); # # Make each row into a labelled list (!!) -- HTML 3 does have tables, # but they might not be able to handle the paragraph-length entries # which I used in my documentation (these will not print if we are # in the , since output will be 'nul'). # sgml('', sub { output "\n
\n"; $cell_counter = 0; }); sgml('', "\n
\n
\n\n"); # # If an entry is in the , save it for later use; otherwise, # print the entry as a list item with its corresponding entry # as a label. # sgml('', sub { my $element = shift; if ($element->within(THEAD)) { push_output 'string'; } else { output "
"; output $cell_headings[$cell_counter]; output "
\n
"; } }); sgml('', sub { my $element = shift; if ($element->within(THEAD)) { $cell_headings[$cell_counter] = pop_output(); } else { output "
\n"; } $cell_counter++; }); ######################################################################## # SDATA Handlers -- use HTML entities wherever possible. ######################################################################## sgml('|[lt ]|', "<"); sgml('|[gt ]|', ">"); sgml('|[mdash ]|', "--"); sgml('|[LaTeX]|', "LaTeX"); sgml('|[hellip]|', "..."); sgml('|[amp ]|', "&"); ######################################################################## # The generic external data entity handler. Handle only entities # with type CDATA, and simply dump their files into the current # document with minimal conversion. ######################################################################## sgml('entity',sub { my $entity = shift; # Use the first generated filename # or the system identifier. my $filename = $entity->filenames->[0] || $entity->sysid; # A strange, NSGMLS-thing. if ($filename =~ /^FILE:/ || $filename =~ /^\]+\>/) { $filename = $'; } # Handle only CDATA. if ($entity->type eq 'CDATA') { if (-r $filename) { unless (open INPUT, "<$filename") { die "Cannot open external file $filename\n"; } # Convert special SGML characters. while () { s/\&/\&/g; s/\/\>/g; output $_; } close INPUT; } else { die "Cannot read file $filename\n"; } } else { die "Cannot handle external entity with type " . $entity->type . "\n"; } }); ######################################################################## # Default handlers -- these will pick up any unrecognised elements, # SDATA strings, processing instructions, or subdocument entities, # and report an error to the user. ######################################################################### sgml('start_element',sub { die "Unknown element: " . $_[0]->name; }); sgml('sdata',sub { die "Unknown SDATA: " . $_[0]; }); sgml('pi',sub { die "Unknown processing instruction: " . $_[0]; }); sgml('start_subdoc',sub { die "Unknown subdoc entity: " . $_[0]->name; }); # # End of sgmlspl handler declarations. # ######################################################################## # Utility procedures. ######################################################################## # # Given an element, start a new HTML document for it. # sub start_html { my $element = shift; my $old_file = $current_file; # Save the old values on the stack. push @current_id_stack, $current_id; push @current_file_stack, $current_file; # Get the new ID and file. $current_id = lc($element->attribute(ID)->value || gen_id()); $current_file = $current_id . '.html'; # Note the previous child, if any. if ($previous_file) { $Refs->put("previous:$current_file",$previous_file); $Refs->put("next:$previous_file",$current_file); } $previous_file = ''; # Put a reference up to the parent. if ($old_file) { $Refs->put("up:$current_file",$old_file); } # Look up the title reference. my $plaintitle = my $title = $Refs->get("title:$current_id"); # Strip the title. $plaintitle =~ s/\<[^\>]+\>//g; # If this is not the top-level # file, send out a link # before beginning the new file. if ($old_file) { output "
  • $plaintitle
  • \n"; } # Send output to the new file. push_output('file',$current_file); # Print the front matter. output "\n\n$plaintitle\n\n\n"; # Navigational aids, if this is not # the top-level file. if ($old_file) { output "\n

    Links: "; my $up = $Refs->get("up:$current_file"); my $previous = $Refs->get("previous:$current_file"); my $next = $Refs->get("next:$current_file"); output "Next " if $next; output "Previous " if $previous; output "Up " if $up; output "Top"; output "

    \n\n"; } output "

    $title

    \n\n"; } # # End the HTML document. # sub end_html { # Look up the name and email info. my $firstname = $Refs->get("firstname:$current_id") || $Refs->get("firstname:$top_id"); my $surname = $Refs->get("surname:$current_id") || $Refs->get("surname:$top_id"); my $email = $Refs->get("email:$current_id") || $Refs->get("email:$top_id"); # Restore the previous ID and file, # and note this as the previous # child. $previous_file = $current_file; $current_id = pop @current_id_stack; $current_file = pop @current_file_stack; # If this is not the top-level file, # add some navigational information. if ($current_file) { output "\n

    Links: "; my $up = $Refs->get("up:$previous_file"); my $previous = $Refs->get("previous:$previous_file"); my $next = $Refs->get("next:$previous_file"); output "Next " if $next; output "Previous " if $previous; output "Up " if $up; output "Top"; output "

    \n\n"; } # Add an address, if available, # including a MAILTO URL. output "\n
    "; output "$firstname $surname " if $firstname || $surname; output "<$email>" if $email; output "
    \n\n\n"; pop_output(); } # # Generate a new ID for anything which does not already have one. # sub gen_id { $id_counter++; return "node$id_counter"; } 1;