#!/usr/bin/perl # # rm_ids.pl # # Eugene Eric Kim # http://www.eekim.com/software/purple/ # # $Id: rm_ids.pl,v 1.4 2001/05/04 02:46:20 eekim Exp $ # # Copyright (c) Eugene Eric Kim 2000-2001. All rights reserved. # See COPYING for licensing terms. =head1 NAME rm_ids.pl - Remove statement IDs and hierarchical addresses from a purple.dtd XML file. =head1 SYNOPSIS Usage: rm_ids.pl -r rules.purple file.xml =head1 DESCRIPTION Parses the XML file conforming to purple.dtd, and removes all SIDs and hierarchical addresses. Saves the original XML file to file.xml~. This script is not very robust. It doesn't validate the XML file, and it doesn't handle errors well. So be careful. =cut use strict; use File::Copy; use File::IO; use Getopt::Long; use XML::DOM; my $rules_file; GetOptions('r=s'=>\$rules_file); my $xml_file = $ARGV[0]; if (!$xml_file) { print <parsefile("$xml_file~"); ### element rules # default values for purple.dtd my $tag_lastsid = 'lastsid'; my @tags_with_sids = ('h','p','item','example','figure'); # override default rules with values in rules file, if it exists if (-e $rules_file) { my $fh = new IO::File $rules_file; if (defined $fh) { undef $tag_lastsid; undef %tags_with_sids; while (my $line = <$fh>) { chomp $line; if ($line =~ /^([A-Z_]+)=([A-Za-z,]+)$/) { my $var_name = $1; my $var_value = $2; if ($var_name eq 'LAST_SID') { $tag_lastsid = $var_value; } elsif ($var_name eq 'TAGS_WITH_SIDS') { @tags_with_sids = split(',', $var_value); } } } } $fh->close; } ### remove SIDs and HIDs foreach my $tag (@tags_with_sids) { &remove_ids($tag); } &remove_lastsid if ($tag_lastsid); ### print to file $doc->printToFile("$xml_file"); # fini ### subroutines sub remove_ids { my $element_name = shift; my @nodes = $doc->getElementsByTagName($element_name); foreach my $node (@nodes) { $node->removeAttribute('sid'); $node->removeAttribute('hid'); } } sub remove_lastsid { my @nodes = $doc->getElementsByTagName($tag_lastsid); my $node = ($nodes[0]->getChildNodes)[0]; $nodes[0]->removeChild($node); } sub my_tag_compression { my ($tag, $elem) = @_; return 1 if ($tag =~ /^(lastsid|p)$/); return 0; } =head1 AUTHOR Eugene Eric Kim =cut