#!/usr/bin/perl -w
# striptag -- strip given tags out of HTML
# Copyright : http://www.fsf.org/copyleft/gpl.html
# Created On : Summer 2003
# Last Modified On: Thu Aug 5 05:59:14 2004
# Update Count : 17
# Nominal Author : Dan Jacobson -- http://jidanni.org/
# Actual brains : Jonathan Stowe -- http://www.gellyfish.com/
#Usage example: striptag font div < file.html
#That will strip all
, etc.
#Be sure you give lower case tag names [why?]
#[How to strip _all_ tags? Can't give regexp.]
use strict;
use warnings;
use HTML::Parser;
my $parser = HTML::Parser->new(
text_h => [ sub { print shift; }, "text" ],
default_h => [ sub { print shift }, 'text' ],
start_h => [ \&tag, "self, tagname, text" ],
end_h => [ \&tag, "self, tagname, text" ]
);
$parser->{_striptags}->{$_} = 1 foreach @ARGV;
$parser->parse_file(*STDIN);
sub tag {
my ( $self, $tag, $text ) = @_;
print $text unless $self->{_striptags}->{$tag};
}