svn commit - mysqldoc@docsrva: r7622 - trunk/tools
Author: mcbrown
Date: 2007-08-31 13:20:57 +0200 (Fri, 31 Aug 2007)
New Revision: 7622
Log:
Divide DocBook File:
Splits up files by their section ID (sections only), writes the section to a new file (using the parent DOCTYPE definition), and replaces the original section with a suitable xi:include fragment
To use, specify the list of IDs you want to split out from the original file:
$ divide-docbook-file.pl --idlist=section1,section2 original.xml
This will write:
section1.xml
section2.xml
And update original.xml with the necessary xi:include elements
The *original* (unmodified) file will be in original.xml.old, the modified file will be written to original.xml
WARNING: The process is not recursive - that is, you cannot split out sections and subsections in the same parse into different files and expect the xi:include to be updated properly.
Added:
trunk/tools/divide-docbook-file.pl
Property changes on: trunk/tools/divide-docbook-file.pl
Name: svn:executable
+ *
Added: trunk/tools/divide-docbook-file.pl
- trunk/tools/divide-docbook-file.pl (rev 0)
+++ trunk/tools/divide-docbook-file.pl 2007-08-31 11:20:57 UTC (rev 7622)
Changed blocks: 1, Lines Added: 108, Lines Deleted: 0; 3453 bytes
@@ -0,0 +1,108 @@
+#!/usr/bin/perl -w
+
+# divide-docbook-file.pl
+#
+# Divide a docbook file into multiple subfiles by section id
+
+# Martin MC Brown
+# mc@mysql.com
+# 2007-08-31
+
+use XML::DOM;
+use Data::Dumper;
+use IO::File;
+use Getopt::Long;
+use strict;
+
+my ($opt_idlist) = (undef);
+
+GetOptions("idlist=s" => \$opt_idlist);
+
+# Fake the new include element so we can modify and re-insert
+
+my $incstring = 'http://www.w3.org/2001/XInclude"/>';
+my $p = new XML::DOM::Parser;
+my $incdoc = $p->parse($incstring);
+my $incelem = ($incdoc->getChildNodes())[0];
+
+# Make sure we've got a list of IDs
+
+if (!defined($opt_idlist))
+{
+ print STDERR "You must specify the list of section IDs to be extracted using --idlist=xxx,xxx,...\n";
+ exit(1);
+}
+
+# Parse the document
+my $file = shift;
+
+my $parser = new XML::DOM::Parser;
+my $doc = $parser->parsefile($file) or die "Cannot open $file: $!\n";
+
+
+# Determine the current document type info so we can copy it to the new file
+
+my $doctype = undef;
+
+foreach my $child ($doc->getChildNodes())
+{
+ if ($child->getNodeType == DOCUMENT_TYPE_NODE)
+ {
+# Remap it to a section
+ my $currtype = $child->getName;
+ $child->setName('section');
+ $doctype = $child->toString;
+ $child->setName($currtype);
+ last;
+ }
+}
+
+# Get the list of sections in this document
+
+my @sections = $doc->getElementsByTagName("section");
+
+# Work through the list of sections, extract them, write them to a new file (including XML and Doctype headers)
+# Then replace the original with a new xi:include fragment
+
+my @idlist = split(/,/,$opt_idlist);
+my $idfoundlist = {};
+
+map { $idfoundlist->{$_} = 1} @idlist;
+
+foreach my $id (@idlist)
+{
+ foreach my $section (@sections)
+ {
+ my $sectionid = $section->getAttributeNode("id")->getValue;
+ if ($sectionid eq $id)
+ {
+ print STDERR "Section: $sectionid\n";
+ delete($idfoundlist->{$sectionid});
+ my $fh = new IO::File("$sectionid.xml",'w') or die "Couldn't write to $sectionid.xml: $!\n";
+ binmode($fh,":utf8");
+ print $fh (join("\n",
+ '',
+ $doctype,
+ $section->toString));
+ $fh->close();
+ print STDERR " - Written into $sectionid.xml\n";
+ my $newinc = $incelem->cloneNode();
+ $newinc->setAttribute('href',"$sectionid.xml");
+ $newinc->setOwnerDocument($doc);
+ my $parent = $section->getParentNode();
+ $parent->replaceChild($newinc,$section);
+ print STDERR " - Original content replaced with xi:include instruction\n";
+ }
+ }
+}
+
+if (scalar keys %{$idfoundlist})
+{
+ print STDERR "\nWARNING: Couldn't find IDs: ",join(', ',keys %{$idfoundlist}),"\n\n";
+}
+
+$doc->printToFile("$file.new");
+$doc->dispose;
+rename($file,"$file.old");
+rename("$file.new","$file");
+print STDERR "Renamed $file to $file.old\nNew file is in $file\n";
Property changes on: trunk/tools/divide-docbook-file.pl
Name: svn:executable
+ *
--
MySQL Code Commits Mailing List
For list archives:
http://lists.mysql.com/commits
To unsubscribe:
http://lists.mysql.com/commits?unsub=lists@pantek.com
Received on Fri Aug 31 07:22:48 2007
This archive was generated by hypermail 2.1.8
: Sun Oct 07 2007 - 09:01:16 EDT
|