#!/usr/bin/env perl

use diagnostics;
use HTML::Parser;
use File::Basename;
use CGI qw(:standard :html3);
use CGI::Carp;
use Tie::IxHash;
use lib "lib";
use CCwwwutil;
use IPC::ClearTool;

($filename, $levels) = (0 , 3);
if (defined(param('file'))) {
    $filename = param('file');
    $levels = param('depth') if defined(param('depth'));
} else {
    $filename = shift;
}
$depth = $levels;
$doc = rootify("cgi-bin/1/pc.html");
usage() unless $filename;
%seen = ();
%tree = ();
@errors = ();
%buf = (); tie %buf, "Tie::IxHash";
$contents = "";
%footer = (); tie %footer, "Tie::IxHash";
$link = 0;
$title = basename($filename);

$treeRef = \%tree;
$filename = rootify($filename);
parse_recurse($filename, 1);
print start_html($title), "\n";
generate(\%tree);
if (scalar keys %footer) { write_footer(); }
print "\n", end_html, "\n";

sub parse_recurse($$) {
    my ($filename, $top) = @_;
    local $dir = dirname($filename);
    local $treeRef = \%{$$treeRef{$filename}};
    tie %$treeRef, "Tie::IxHash";
    local $levels = $levels - 1;
    $seen{$filename}++;
    my $p = HTML::Parser->new(api_version => 3);
    if ($top) {
	$p->handler(start => \&title_start_handler, "self,tagname");
	$p->report_tags("title");
    } else {
	$p->handler(start => \&body_start_handler, "self,tagname");
	$p->report_tags("body");
    }
    $p->parse_file(viewify($filename)) || push @errors, "$!: $filename\n";
    my ($k, $v) = ();
    while (($k, $v) = each(%buf)) {
	$footer{$k} = $v;
    }
    %buf = ();
    $contents = "";
    if ($levels) {
	my $f = 0;
	foreach $f (keys %$treeRef) { parse_recurse($f, 0); }
    }
}

sub title_start_handler {
    my($self, $tag) = @_;
    return unless $tag eq "title";

    $self->handler(text  => [], '@{dtext}' );
    $self->handler(end   => \&title_end_handler, "self,tagname");
}

sub title_end_handler {
    my($self, $tag) = @_;
    my $text = join("", @{$self->handler("text")});
    $text =~ s/^\s+//;
    $text =~ s/\s+$//;
    $text =~ s/\s+/ /g;
    $title = $text;

    $self->report_tags("body");
    $self->handler("text", undef);
    $self->handler("start", \&body_start_handler, "self,tagname");
    $self->handler("end", undef);
}

sub body_start_handler {
    my($self, $tag) = @_;
    return unless $tag eq "body";
    $self->handler("start", \&hr_a_start_handler,
		   "self,tagname,attr,token0,skipped_text");
    $self->report_tags(qw(a hr h1 h2 h3 h4 h5 h6));
}

sub hr_a_start_handler {
    my($self, $tag, $attr, $token, $text) = @_;
    $contents .= "$text";
    if ($tag eq "hr") {
	$$treeRef{"contents"} .= $contents;
	$contents = "";
	my ($k, $v) = ();
	while (($k, $v) = each(%buf)) {
	    $$treeRef{$k}{"text"} = $v;
	}
	return;
    } elsif ($tag =~ /h([1-6])/) {
	my $nr = $1;
	my $new = chr(ord($nr) + $depth - $levels - 1);
	my $newtok = $token;
	$newtok =~ s/[1-6]/$new/;
	$contents .= "<$newtok>";
	$self->handler(end   => \&hn_end_handler,
		       "self,tagname,token0,skipped_text,'$newtok'");
    } elsif ($tag eq "a") {
	$self->handler(end   => \&a_end_handler,
		       "self,tagname,token0,skipped_text");
	return unless exists $attr->{href};
	my $fn = $attr->{href};
	if ($fn =~ /[:\?]/) {
	    $footer{$fn}++ unless $fn =~ /mailto:/;
	    return;
	}
	$fn =~ s/\#.*$//;
	if ($fn !~ /\.html?$/) {
	    $footer{$fn}++;
	    return;
	}
	$fn = normalize($dir . '/' . $fn) unless $fn =~ /^\//;
	if ($seen{$fn}++) {
	    if ($footer{$fn}) { delete($footer{$fn}); }
	    else              { return; }
	}
	$buf{$fn} = 1;
    } else {
	$contents .= "<$token>";
    }
}

sub hn_end_handler {
    my ($self, $tag, $token, $text, $new) = @_;
    $contents .= "$text";
    if ($tag =~ /h[1-6]/) {
	$contents .= "</$new>";
	$self->handler("end", undef);
    } else {
	$contents .= "<$token>";
    }
}

sub a_end_handler {
    my ($self, $tag, $token, $text) = @_;
    $contents .= "$text";
    if ($tag eq "a") {
	$self->handler("end", undef);
    } else {
	$contents .= "<$token>";
    }
}

sub generate(%) {
    my $tRef = shift;
    print $$tRef{"contents"} if defined $$tRef{"contents"};
    my ($k, $v) = ();
    while (($k, $v) = each(%$tRef)) {
	generate(\%{$v}) unless ($k eq "contents")
    }
}

sub write_footer() {
    print "\n", h2("References"), "\n<ul>\n";
    foreach (keys %footer) {
	print "  <li> ", $_, "\n";
    }
    print "</ul>\n";
}

sub usage() {
    print header(), "\n", start_html(basename($0)), "\n", h1("Usage"), "\n",
      p("Some parameters expected:", a({href=>$doc}, "documentation")),
      "\n", end_html(), "\n";
    exit 0;
}

exit 0;
