#!/usr/bin/perl -w
use strict;

package htmlform;

use HTML::Parser;

my $debug;

#  This is an internal function.
sub start 
{
   my($self, $tagname, $attr, $line) = @_;
   my $self2 = $$self{__PACKAGE__};
   if($debug) {
      print("start=$tagname ");
      foreach (keys %$attr) { print "$_ => $$attr{$_}, "};
      print("\n");
   };
   if($tagname eq "a") {
      if($$self2{'is_in_link'}) {
          warn "Unterminated link (html line $line)\n";
          end($self, "a", $line);
      }
      if(! defined $$attr{'href'}) {  # if anchor tag has no href attibute
          if($$attr{'name'}) { 
             # do nothing
          } else { 
             warn "Anchor without href or name. (html line $line)\n";
          };
      } else {  # else anchor tag has href attribute
         $$self2{'is_in_link'} = 1;
         $$self2{'href'} = $$attr{'href'};
         $$self2{'link_dtext'} = '';
      }
   } elsif($tagname eq "form") {
      if(! $$attr{'method'}) {  
         warn "form with no method (html line $line)\n";
      };
      if(! $$attr{'action'}) {  
         warn "form with no action (html line $line)\n";
         return;
      }; 
      my $formnum = ++$$self2{'formnum'};
      $$self2{"form$formnum"}{'method'} = $$attr{'method'} || 'GET';
      $$self2{"form$formnum"}{'action'} = $$attr{'action'};
      $$self2{"form$formnum"}{'magic_cookie'} = 'form_hash';
      $$self2{"in_form"} = "form$formnum";
      $$self2{"form$formnum"}{'input'} = {};  # for saving form inputs
   } elsif($tagname eq "input") {
      my $in_form = $$self2{"in_form"};
      if(! $in_form) {
         warn "input not inside form (html line $line)\n";
         return;
      }
      my $name = $$attr{'name'};
      if(! $name) {
         warn "input with no name (html line $line)\n";
      };
      my $input = $$self2{$in_form}{'input'};
      my $value = defined $$attr{'value'} ? $$attr{'value'} : '';
      $value = 'on' if $$attr{'checked'};
      my $type = defined $$attr{'type'} ? $$attr{'type'} : '';
      $$input{$name} = [$value, $type, $$self2{'last_nonblank_dtext'} || ''];
   } elsif($tagname eq "textarea") {
      my $name = $$attr{'name'};
      if(! $name) {
         warn "textarea with no name (html line $line)\n";
      };
      $$self2{"in_textarea"} = "$name";
      $$self2{"textarea_dtext"} = $$self2{'last_nonblank_dtext'} || '';
      $$self2{'last_nonblank_dtext'} = '';
   };
}

#  This is an internal function.
sub end 
{
   my($self, $tagname, $line) = @_;
   my $self2 = $$self{__PACKAGE__};
   print("end=$tagname\n") if $debug;
   if($tagname eq "a") {
      if(! $$self2{'is_in_link'}) {
         warn "Extra close link tag (html line $line)\n";
      } else {
         push @{$$self2{'links'}}, [ $$self2{'link_dtext'}, $$self2{'href'} ];
         $$self2{'is_in_link'} = 0;
      }
   } elsif($tagname eq "form") {
      $$self2{"in_form"} = '';
   } elsif($tagname eq "textarea") {
      my $name = $$self2{"in_textarea"};
      if(! $name) {
         warn "textarea end without start (html line $line)\n";
      }
      my $in_form = $$self2{"in_form"};
      if(! $in_form) {
         warn "input not inside form (html line $line)\n";
         return;
      }
      my $input = $$self2{$in_form}{'input'};
      $$input{$name} = [$$self2{'last_nonblank_dtext'}, 'textarea', $$self2{"textarea_dtext"}];
      $$self2{"in_textarea"} = undef;
   };
}

#  This is an internal function.
sub text {
   my($self, $dtext) = @_;
   my $self2 = $$self{__PACKAGE__};
   if(! $$self2{'in_form'}) {
 
   } elsif($dtext =~ /\w/) {  # if has non-white space
      $dtext =~ s/^\s+//s;  # trim leading white space
      $dtext =~ s/\s+$//s;  # trim trailing white space
      $$self2{'last_nonblank_dtext'} = $dtext;
   }
   if($$self2{'is_in_link'}) {
      $$self2{'link_dtext'} .= $dtext;
   }
   print("dtext=$dtext\n") if $debug;
}

#  We use a HTML::Parser object as our own, tucking our stuff inside a subhas called __PACKAGE__
sub new 
{
   my($html) = @_;
   my $parser = HTML::Parser->new( api_version => 3,
                                start_h => [\&start, "self, tagname, attr, line"],
                                end_h   => [\&end,   "self, tagname, line"],
                                text_h   => [\&text,   "self, dtext"],
                                marked_sections => 1,
                              );
   my $self2 = $$parser{__PACKAGE__} = { };  # self2
   $$self2{'magic_cookie'} = __PACKAGE__;
   $$self2{'selected_form'} = 'form1';
   $parser->parse($html);
   $parser->eof();  # signal end of document
   return bless $parser;
}

# $cookies is a list ref of "name=value" cookies.
sub new_from_url
{
   my($url,$cookies) = @_;
   $cookies = [] if ! defined $cookies;
   my $curl_cmd = "curl -s ". join(" ",map { "--header 'Cookie: $_'" } @$cookies) . " '$url'";
   my $page = `$curl_cmd`;
   my $self = new($curl_cmd);
   my $self2 = $$self{__PACKAGE__};
   $$self2{'url'} = $url;
   $$self2{'cookies'} = $cookies;
   return $self;
}

sub print_links
{
   my($self) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   foreach my $ref (@{$$self2{'links'}}) {
      my($dtext, $href) = @$ref;
      print("link dtext=$dtext\nlink href=$href\n\n") if $debug;
   }
}

# This returns a reference to a list containing references to [ $dtext, $href ]
sub link_list
{
   my($self) = @_;
   return $$self{__PACKAGE__}{'links'};
}

#  This is an internal function.
sub resolve_relative_href
{
   my($href, $old_url) = @_;

   return $href if ! defined $old_url;
   return $href if $href =~ /^http:/;
   return $href if $href =~ /^ftp:/;
   return $href if $href =~ /^file:/;
   if($href =~ /^\//) {  # if absolute path
       if($old_url =~ /([^\/]*\/\/[^\/]+)/) {  # hostname
          return "$1$href";
       };
       die "url parse error on $old_url\n";
   };
   $old_url =~ s/[^\/]+$//;  # delete filename
   while($href =~ /^\.\.\// && $old_url =~ /[^\/]+\/$/) {  # parent dir references
      $href =~ s/^\.\.\///;
      $old_url =~ s/[^\/]+\/$//;
   }
   return "$old_url$href";
}

sub find_href_by_subtext
{
   my($self, $subtext, $old_url) = @_;
   foreach my $ref (@{$$self{__PACKAGE__}{'links'}}) {
      my($dtext, $href) = @$ref;
      if($dtext =~ /$subtext/i) {
         return resolve_relative_href($href, $old_url);
      };
   }
   return undef;
}

#  This is an internal function.
sub print_form
{
   my($form_hash) = @_;
   defined $form_hash or die;
   my $method = $$form_hash{'method'};
   my $action = $$form_hash{'action'};
   print qq{<form action="$action" method="$method">\n};
   my $input = $$form_hash{'input'};
   foreach my $name (sort keys %{$input}) {
      my($value, $type, $dtext) = @{$$input{$name}};
      print qq{  $dtext\n} if $dtext;
      if($type eq 'textarea') {
         print qq{    <textarea name="$name">$value</textarea>\n};
      } else {
         print qq{    <input name="$name" type="$type" value="$value">\n};
      };
   };
   print "</form>\n";
}

sub print_forms
{
   my($self, $dtext) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   foreach my $formnum (1 .. $$self2{'formnum'}) {
      my $form_hash = $$self2{"form$formnum"};
      print_form($form_hash);
   };
}

sub print_selected_form
{
   my($self, $dtext) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   my $form_hash = $$self2{$$self2{'selected_form'}};
   print_form($form_hash);
}

#  Returns 1 on success, undef on failure
sub select_form_by_submit_value
{
   my($self, $textre) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   foreach my $formnum (1 .. $$self2{'formnum'}) {
      my $form_hash = $$self2{"form$formnum"};
      my $input = $$form_hash{'input'};
      foreach my $name (sort keys %{$input}) {
         my($value, $type, $dtext) = @{$$input{$name}};
         #print "checking value=$value\n" if $type eq 'submit';
         if(lc($type) eq 'submit' && $value =~ /$textre/) {
            $$self2{'selected_form'} = "form$formnum";
            return 1;
         };
      };
   }
   warn "no form seleceted for $textre\n";
   return undef;
}

sub get_value_hashref
{
   my($self, $textre) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   my $form_hash = $$self2{$$self2{'selected_form'}};
   return {} if ! defined $form_hash;
   my $input = $$form_hash{'input'};
   my %value_hash;
   foreach my $name (sort keys %{$input}) {
      my($value, $type, $dtext) = @{$$input{$name}};
      $value_hash{$name} = $value;
   };
   return \%value_hash;
}

#  This is an internal function.
sub get_inputref_by_pretext
{
   my($self, $textre) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   my $form_hash = $$self2{$$self2{'selected_form'}};
   return undef if ! defined $form_hash;
   my $input = $$form_hash{'input'};
   foreach my $name (sort keys %{$input}) {
      my($value, $type, $dtext) = @{$$input{$name}};
      if($dtext =~ /$textre/) {
         return $$input{$name};
      };
   };
   warn "pretext $textre not found\n";
   return undef;
}

sub get_value_by_pretext
{
   my $inputref = get_inputref_by_pretext(@_); 
   return undef if ! defined $inputref;
   return $$inputref[0];
}

sub set_value_by_pretext
{
   my($self, $textre, $value) = @_;
   my $inputref = get_inputref_by_pretext(@_); 
   return undef if ! defined $inputref;
   $$inputref[0] = $value;
   return 1;  #  success
}

#  This is an internal function.
sub get_inputref_by_name
{
   my($self, $name) = @_;
   my $self2 = $$self{__PACKAGE__};
   $$self2{'magic_cookie'} eq __PACKAGE__ or die "bad ". __PACKAGE__ ." object";
   my $form_hash = $$self2{$$self2{'selected_form'}};
   return undef if ! defined $form_hash;
   my $input = $$form_hash{'input'};
   return undef if ! defined $$input{$name};
   return $$input{$name};
}

sub get_value_by_name
{
   my $inputref = get_inputref_by_name(@_); 
   return undef if ! defined $inputref;
   return $$inputref[0];
}

sub get_type_by_name
{
   my $inputref = get_inputref_by_name(@_); 
   return undef if ! defined $inputref;
   return $$inputref[1];
}

sub get_pretext_by_name
{
   my $inputref = get_inputref_by_name(@_); 
   return undef if ! defined $inputref;
   return $$inputref[2];
}

sub set_value_by_name
{
   my($self, $name, $value) = @_;
   my $inputref = get_inputref_by_name(@_); 
   return undef if ! defined $inputref;
   $$inputref[0] = $value;
   return 1;  #  success
}

# This is an internal function
sub submit_by_inputref
{
   my($inputref, $self, $name, $old_url) = @_;
   return warn "submit button not found\n" if ! defined $inputref;
   my $formcli = "--form '$name=$$inputref[0]'";
   my $self2 = $$self{__PACKAGE__};
   my $form_hash = $$self2{$$self2{'selected_form'}};
   return warn "form for submit not found\n" if ! defined $form_hash;
   my $input = $$form_hash{'input'};
   return warn "form input elements not found\n" if ! defined $input;
   foreach my $name (sort keys %{$input}) {
      my($value, $type, $dtext) = @{$$input{$name}};
      if($type ne 'submit') {
         $value =~ s/\\/\\/g;
         $value =~ s/'/\'/g;
         $formcli .= " --form '$name=$value'";
      }
   };
   my $href = resolve_relative_href($$form_hash{'action'}, $old_url);
   return ($href, $formcli);
}

sub submit_by_name
{
   my $inputref = get_inputref_by_name(@_); 
   submit_by_inputref($inputref, @_);
}

sub submit_by_pretext
{
   my $inputref = get_inputref_by_pretex(@_); 
   submit_by_inputref($inputref, @_);
}

1;

