#!/usr/bin/perl -w
use strict;

#  Copyright (c) Tropos Networks Inc. 2005
#  Copyright (c) Frederick Dean 2002, 2004

# This file is released under the GNU GPL, version 2 or later.
#  http://www.gnu.org/copyleft/gpl.html

#  v1.8 by Rick Dean <rick.dean@tropos.com>
#  This script exports a clearcase snapshot view as a CVS repository.

#  
#  Limitations:
#    * Only files in current view get exported, but exported
#      for all revisions and on all branches.
#    * Old RCS data is lost, as each RCS file is completely
#      overwritten instead of just committed to.
#    * Deleted files are not placed in the Attic, and not
#      marked as deleted correctly (i.e. checking out from a
#      snapshot view of a branch for a file deleted on the mainline).
#    * You must specify which label each branch comes from.
#      ClearCase hides it in the config spec which is not 
#      in the VOB to impede the reproducibility of builds, and 
#      encourage errors.  The ordering of this branchinfo
#      file is important to preserve branch numbers if desired.
#    * branchinfo file only allows one label per branch.
#    * Clearcase is slow.
#

#  This is a useful book for understanding CVS.
#  http://www.red-bean.com/cvs2cl/

use Getopt::Long;
my($verbose,$update) = (0);
my $branchinfo = "";
my $rawlabels;
GetOptions (
            'bi=s'   => \$branchinfo,
            'branchinfo=s'   => \$branchinfo,
            'update'   => \$update,
            'rawlabels'   => \$rawlabels,  # These four are the same
            'rawlabel'   => \$rawlabels,
            'raw-labels'   => \$rawlabels,
            'raw-label'   => \$rawlabels,
            'verbose'   => sub { $verbose++ },
            'quiet'   => sub { $verbose = 0 },
           ) or die_about_usage();

my $temp = "/tmp/cc2cvs$$/";
print "Making directory $temp\n" if $verbose;
mkdir($temp) or die "cannot create temp dir $temp $!\n";

my(%branchpoints);  # obligatory branches from branchinfo file
my(%mandatory_branches);  # (essentially) reverse of %branchpoints
my(%branchwarn_missing, %branchwarn_ugly, %branchwarn_nolabel);  # for warning about branchinfo omissions

# Revisions on the MAIN trunk (one period) go first with 1.1 last.
# Revisions not on the MAIN trunk go next in reverse order.
sub deltacmp
{
   my $r1 = $a; 
   my $r2 = $b;
   my $r1Trunk = ($r1 =~ /^[^\.]+\.[^\.]+$/);
   my $r2Trunk = ($r2 =~ /^[^\.]+\.[^\.]+$/);
   my $sign = $r1Trunk ? -1 : 1;
   return -1 if($r1Trunk && !$r2Trunk);
   return 1 if(!$r1Trunk && $r2Trunk);
   my @R1 = split(/\./,$r1);
   my @R2 = split(/\./,$r2);
   while(my $e1 = shift(@R1)) {
      my $e2 = shift(@R2);
      return $sign if !defined($e2);
      my $x = ($e1 <=> $e2);
      return ($sign * $x) if $x;
   }
   return -$sign if defined(shift(@R2));
   return 0;
}

# sort 2 elements or shorter in forward direction, longer ones backward
# The order of branches from the same branch point doesn't matter.
sub deltatextcmp
{
   my @R1 = split(/\./,$a);
   my @R2 = split(/\./,$b);
   my $length = 0;
   while(1) {
      $length++;
      my $e1 = shift(@R1);
      my $e2 = shift(@R2);
      return -1 if !defined($e1);
      return 1 if !defined($e2);
      my $x = ($e1 <=> $e2);
      return ($length>3 ?$x:-$x) if $x;
   }
}

# returns file modify time in clearcase numeric format
sub mtime {
   return 0 if ! -e $_[0];
   my $offset = $_[1] || 0;
   my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
       $atime,$mtime,$ctime,$blksize,$blocks) = stat($_[0]);
   my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime($mtime + $offset);
   return ($year + 1900) . sprintf(".%02d.%02d.%02d.%02d.%02d",$mon+1,$mday,$hour,$min,$sec);
}

my $separator = "--124s324K4T2:2L31wv--";  # hopefully never match a commit comment

sub write_escaped_open
{
   my($out_fd, $exec) = @_;
   open(my $fd,$exec) or do { die "could not open $exec"; };
   binmode($fd);
   my $text;
   while(read($fd, $text, 16*1024)) {
      $text =~ s/@/\@\@/g;  # escape at signs
      print $out_fd $text;
   };
}

#  ccfile is a filename that can be fed to clearcase
#  rcs file is a filename ending with ,v
sub do_versions_of_file
{
   my($ccfile,$rcsfile) = @_;
   print("do_versions_of_file( ccfile=$ccfile rcsfile=$rcsfile )\n") if $verbose;
   my $rcs_mtime = '';
   # fudge this by a day to fix timezones, and a race condition (commits while script runs).
   $rcs_mtime = mtime($rcsfile, -24*3600) if $update;
   # first load the history
   my %revs;
   (my $ccfile_esc = $ccfile) =~ s/'/'\\''/g;  # escape single quotes
   print "cleartool lshist '$ccfile_esc'\n" if $verbose;
   open(my $fd,"cleartool lshist -fmt \"name %n\nwhen %Nd\nwho %u\nevent %e\npred %PSn\nlabels %Nl\ncomment %c\n$separator\n\" '$ccfile_esc'|") or 
      die "cannot open cleartool for lshist";
   while(! eof($fd)) {  # for each version
      my $ccname = <$fd>;
      $ccname =~ s/name (.*)\n$/$1/ or do { warn "Syntax error for name line $.\n" ; return; };
      my $when = <$fd>;
      $when =~ /when (\d\d\d\d)(\d\d)(\d\d)\.(\d\d)(\d\d)(\d\d)$/ or do { warn "Syntax error for date line $.\n" ; return; };
      $when = "$1.$2.$3.$4.$5.$6"; # 2005.05.10.08.44.29
      if($rcs_mtime && $rcs_mtime gt $when) {  # skip because nothing new to update
          warn "Nothing to update ($rcs_mtime > $when)\n" if $verbose;
          return;
      }
      $rcs_mtime = '';
      my $who = <$fd>;
      $who =~ s/who (.*)\n$/$1/ or do { warn "Syntax error for who line $.\n" ; return; };
      my $event = <$fd>;
      $event =~ s/event (.*)\n$/$1/ or do { warn "Syntax error for event line $.\n" ; return; };
      my $pred = <$fd>;
      $pred =~ s/pred (.*)\n$/$1/ or do { warn "Syntax error for pred line $.\n" ; return; };
      my $labels = <$fd>;
      $labels =~ s/labels (.*)\n$/$1/ or do { warn "Syntax error for labels line $.\n" ; return; };
      my $cmt = <$fd>;
      $cmt =~ s/comment (.*)$/$1/ or do { warn "Syntax error for comment line $.\n" ; return; };
      while(<$fd>) {
          last if $_ eq "$separator\n";
          $cmt .= $_;
      }
      $cmt =~ s/\s+$/\n/s;  #  collapse trailing white space
      my $state = 'Exp'; 
      if(substr($event, 0, 15) eq "destroy version") {  # if deleted version
         # This piece of shit version control tool doesn't give the full name of this deleted revision
         if($cmt !~ /Destroyed version "(.+)"/i) { 
             warn "Destroyed version of $ccname had unfamiliar comment.";
             next;
         };
         $ccname = "\@\@$1";
         $state = "dead";
      }
      next if $ccname !~ /@@(.*)\/(\d+)$/; # if version name does not end "slash number" (essentially same as only event create version)
      my($branchname,$endnum) = ($1,$2 + 1);
      my $name = "$1/$2";
      print "name=$name when=$when who=$who pred=$pred branchname=$branchname endnum=$endnum\n" if $verbose > 1;
      print join("", map("  cmt=$_",split(/^/,$cmt))) if $verbose > 1;
      $revs{$name} = [$when, $who, $cmt, $pred, $labels, $branchname, $endnum, $state];
   }
   return if $. < 1;   # give up if lshist had empty output, clearcase would have printed the error
   if(0 == scalar keys %revs) {
       warn "no versions for $ccfile\n";
       return;
   }
   # assign branch numbers and version numbers 
   my %num2ccname; 
   my %branch_name2num = ( "/main" => 1 );
   my %branches_from_here = ( );  # a simple count
   my %short_branchnames;  # to avoid same name labels
   # sort chronologically, oldest one first, but
   # sometimes we have ties and need the trunk-closest branch first 
   foreach my $name (sort { ${$revs{$a}}[0] cmp ${$revs{$b}}[0] || 
                            length ${$revs{$a}}[3] <=> length ${$revs{$b}}[3] } keys %revs) {
      my($when, undef, undef, $pred, $labels, $branchname, $endnum) = @{$revs{$name}};
      if(! $branch_name2num{$branchname}) {  # if we need a branch for ourself
         if(! $revs{$pred}) {
             warn "could not find predicessor $pred for $name\n";
             next;
         }
         my(undef, undef, undef, undef, undef, undef, $pred_endnum, undef, $pred_branchnum) = @{$revs{$pred}};
         defined $pred_branchnum or do { warn "$ccfile could not find pred $pred of $name\n"; return };
         my $prednum = $pred_branchnum .'.'. $pred_endnum;
         $branches_from_here{$prednum}++;
         my $newnum = $prednum .'.'. ($branches_from_here{$prednum}*2);
         (my $bnshort = $branchname) =~ s/.*\///;  # delete path
         if($short_branchnames{$bnshort}) { #if the label specified in branchinfo already created this branch from an incompatible place.
            foreach (keys %branch_name2num) {   # look for the old branch
               delete $branch_name2num{$_} if substr($_,- (1 + length $bnshort)) eq "/$bnshort"; 
            }
            warn "warning branchinfo file had probably wrong label for $bnshort of $ccfile\n" if $verbose;
            $branchwarn_ugly{$bnshort}++;
         } elsif($mandatory_branches{$bnshort}) {
            warn "warning branchinfo label for $bnshort not found or late of $ccfile\n" if $verbose;
            $branchwarn_nolabel{$bnshort}++;
         } else {
            warn "warning branchinfo file did not have branch $bnshort of $ccfile\n" if $verbose;
            $branchwarn_missing{$bnshort}++;
         }
         $branch_name2num{$branchname} = $newnum;
         $short_branchnames{$bnshort} = $newnum;  # to avoid same name labels
      }
      my $branchnum = $branch_name2num{$branchname};
      print "name=$name when=$when pred=$pred branchname=$branchname --> $branchnum.$endnum\n" if $verbose > 2;
      my $num = "$branchnum.$endnum";
      $num2ccname{$num} = $name;
      push(@{$revs{$name}}, $branchnum);  # save branchnum in revs hash
      # check for required branches from ourself
      my @required_branches;
      foreach my $label (split(/\s+/,$labels)) {  # for each label of this revision
         if(my $branchpoint = $branchpoints{$label}) { # does branchinfo demand a branch from this label?
            if(! $short_branchnames{substr($branchpoint,5)}) {  # and we don't already have a branch defined by a revision
               push(@required_branches,$branchpoint); # save for sorting
            }
         }
      };
      foreach (sort @required_branches) {  # of this revision
         my $sproutname = substr($_,5);  # strip line num from branchinfo file 
         $branches_from_here{$num}++;
         my $newnum = $num .'.'. ($branches_from_here{$num}*2);
         $branch_name2num{"$branchname/$sproutname"} = $newnum;
         $short_branchnames{$sproutname} = $num;  # to avoid same name labels
      };
   };
   if($verbose > 2) {
      foreach (sort keys %branch_name2num) { print "\$branch_name2num{$_} = $branch_name2num{$_}\n";};
      foreach (sort keys %num2ccname) { print "\$num2ccname{$_} = $num2ccname{$_}\n";};
   }
   # start writing the RCS file
   if(!open(RCSFILE,">$rcsfile")) {
      warn("Could not create file $rcsfile\n");
      return;
   }
   binmode(RCSFILE);
   my @ordered = sort deltacmp keys %num2ccname;
   print(RCSFILE "head $ordered[0];\naccess;\nsymbols");
   foreach my $num (@ordered) {
      my $name  = $num2ccname{$num};
      my(undef, undef, undef, undef, $labels, undef, undef, undef) = @{$revs{$name}};
      foreach (sort split(/\s+/,$labels)) {
         next if $short_branchnames{$_};  # if label matches branch name, branch name wins.
         $_ =~ tr/\$\,\.\:\;\@\//_/ if ! $rawlabels;   # illegals  $,.:;@/
         $_ =~ s/^(\d)/_$1/ if ! $rawlabels;   # prefix with underscore if label start with digit
         print(RCSFILE "\n\t$_:$num") if length $_;
      };
   };
   foreach my $bname (sort keys %branch_name2num) {
      next if $bname eq "/main";
      my $bnum = $branch_name2num{$bname};
      $bnum =~ s/(\d+)$/0.$1/;  #   convert X.Y.Z into X.Y.0.Z
      $bname =~ s/.*\///;  # remove path
      $bname =~ tr/\$\,\.\:\;\@\//_/ if ! $rawlabels;   # illegals  $,.:;@/
      $bname =~ s/^(\d)/_$1/ if ! $rawlabels;   # prefix with underscore if label start with digit
      print(RCSFILE "\n\t$bname:$bnum");
   };
   print(RCSFILE ";\nlocks; strict;\ncomment \@created by cc2cvs.pl on ". scalar(localtime) ."\@;\n" );
   print(RCSFILE "expand \@b\@;\n");
   print(RCSFILE "\n\n");

   foreach my $idx (0 .. $#ordered) {
      my $num = $ordered[$idx];
      my $next = $num;
      $next =~ s/^(\d+)\.(\d+)$/ "$1.". ($2-1)/e or  # decrement last digit (on main trunk)
          $next =~ s/\.(\d+)$/ '.'. ($1+1)/e;  # increment last digit on branch
      $next = '' if ! $num2ccname{$next};
      my $name  = $num2ccname{$num};
      my($when, $who, undef, $pred, $labels, $branchname, $endnum, $state, $branchnum) = @{$revs{$name}};
      my $branches = '';
      for(my $foo = 2; $foo <= ($branches_from_here{$num} || 0) * 2; $foo += 2) {
         # should we indicate empty branches?
         $branches .= "\n\t$num.$foo.1" if $num2ccname{"$num.$foo.1"};
      }
      if($state ne 'dead') {
         print "cleartool get -to $temp/$num '$ccfile_esc\@\@$name'\n" if $verbose;
         system("cleartool get -to $temp/$num '$ccfile_esc\@\@$name'");
      }
      if(! -e "$temp/$num") {  # if cleartool did not create a file
         open(my $fd, ">$temp/$num");  # create empty file
      }
      my $md5 = `md5sum $temp/$num`;
      $md5 =~ s/\s.*//s;  # delete first whitespace and thereafter
      print "$num -- $name -- branchnum=$branchnum -- next=$next -- $when -- $who -- pred=$pred\n" if $verbose > 2;
      $who =~ s/\W/_/g; # replace non-alpha-numerics with underscores
      print(RCSFILE "$num\ndate\t$when;\tauthor $who;\tstate $state;\nbranches\t"); 
      print(RCSFILE "$branches;\nnext\t$next;\tmd5 $md5;\n");
      #print(RCSFILE "\tclearase $name;\n\n");
      print(RCSFILE "\n");
   };
   print(RCSFILE "desc\n\@\@\n\n");
   @ordered = sort deltatextcmp @ordered;
   foreach my $num (@ordered) {
      my $name  = $num2ccname{$num};
      my(undef, undef, $cmt, $pred, undef, $branchname, $endnum, undef, $branchnum) = @{$revs{$name}};
      if($endnum == 1 && 1 >= length $cmt) {
          $branchname =~ s/.*\///; # delete path
          $cmt = "branch point for $branchname";
      }
      $cmt =~ s/@/\@\@/g;
      print(RCSFILE "$num\nlog\n\@$cmt\@\ntext\n\@");
      if($branchnum !~ /\./) {  # if main trunk
         my $diffnum = "$branchnum." . ($endnum+1);
         if(-e "$temp/$diffnum") {
            write_escaped_open(\*RCSFILE, "diff -a -n $temp/$diffnum $temp/$num|");
         } else {
            write_escaped_open(\*RCSFILE, "<$temp/$num");
         };
      } else {  # else not main trunk
         my $diffnum = "$branchnum." . ($endnum-1);
         if(! -e "$temp/$diffnum") {
            $diffnum =~ s/\.\d+\.\d+$//;  # remove last two numbers
         };
         write_escaped_open(\*RCSFILE, "diff -a -n $temp/$diffnum $temp/$num|");
      }
      print(RCSFILE "\@\n\n\n");
   };
   close(RCSFILE);
   foreach (@ordered) { unlink("$temp/$_") or warn "could not delete $temp/$_ of $ccfile\n" };
   # Preserve execute file permission bits. Make world read+write.
   my $mode = 0666 | ((stat($ccfile))[2] & 0111);
   chmod $mode, $rcsfile;
}

# basically we just recurse down the directory tree
sub do_dirent
{
   my($dirent,$dest) = @_;
   # This file is trouble
   if($dirent =~ /\/,v$/) {  #   if filename is just ",v"
      warn "Sheepishly skiping file called \",v\"\n" if $verbose;
      return;
   } 
   # Yes, I have seen people commit CVS control files to Clearcase.
   if($dirent =~ /\/CVS\/?$/ || $dirent =~ /^CVS\/?$/) {
      warn "Sheepishly skiping export of $dirent\n" if $verbose;
      return;
   } 
   if(-d $dirent) {
      my $rc = opendir(my $dh,$dirent);
      if(! $rc) {
         warn "Skipping $dirent, cannot open directory\n";
         return;
      };
      print("directory $dirent\n") if $verbose;
      if(! -e $dest && ! mkdir("$dest")) {
         warn "Could not create dir $dest\n";
         return;
      }
      foreach (readdir($dh)) {
         next if $_ eq ".";
         next if $_ eq "..";
         do_dirent("$dirent/$_", "$dest/$_");
      };
      close($dh);
   } elsif(-f $dirent) {
      $dest =~ s/,v$// and warn "Stripping \",v\" of $dirent\n";  # avoid double ",v" suffixes
      (my $attic = $dest) =~ s/^(.*\/)([^\/]+)/$1Attic\/$2,v/;  # determine attic location
      if(-e $attic) {  # if attic file exists
         print "Deleting $attic\n" if $verbose;
         unlink($attic) or warn "Unable to delete $attic\n";  
      }
      do_versions_of_file($dirent,"$dest,v");
   } else {
      warn "Skipping $dirent, non-file non-dir\n";
   }
}

sub die_about_usage
{
   die "usage: $0 [options] ccdir [destination]\n".
       "\n".
       "    You need to set \$CVSROOT unless dest is an absolute path.\n".
       "\n".
       "    --bi         short name for --branchinfo\n". 
       "    --branchinfo filename to define branch points (default ccdir/.branchinfo)\n". 
       "    --quiet      do not show clearcase commands\n".
       "    --raw-labels allow any character for labels and branchnames (for cvsweb)\n".
       "    --update     skip files that are older than their ,v file\n".
       "    --verbose    show clearcase commands run\n".
       "\n".
       "    The branchinfo file defines branch points for files that have no commits\n".
       "    on the branch. The file format is space-delimited, one branch per line.\n".
       "       \"branchname label\"\n".
       "    See script comments for more details.\n".
       "\n";
}
die_about_usage() if 0 == scalar @ARGV;

my $dirent = shift @ARGV;
my $dest = shift @ARGV;
if(! $dest) {
   ($dest = $dirent) =~ s/.*\/([^\/]+)\/*$/$1/;  # delete path
}

if($dest !~ /^\//) { # if dest path is absolute (starts with slash), ignore cvsroot
   my $cvsroot = $ENV{CVSROOT};
   defined $cvsroot or die "you need to set a CVSROOT env variable\n"; 
   -d $cvsroot or die "\$CVSROOT needs to point to an existing directory\non a local filesystem."; 
   $dest = "$cvsroot/$dest";
   $dest =~ s!/+!/!g;  # collapse repeated slashes
}
# load branchinfo
if($branchinfo && ! -e $branchinfo) {
   die "specified branchinfo file does not exist";
}
if(! $branchinfo) {
   my $dir = $dirent;
   $dir =~ s/\/+[^\/]+$// if -f $dirent;  # delete filename if regular file
   $dir = "." if -f $dir;  # is just a file in current directory (had no slash)
   $branchinfo = "$dir/.branchinfo";
}
print "looking for branchinfo at $branchinfo\n" if $verbose;
if(-e $branchinfo) {
    open(my $fd,"<$branchinfo") or die "cannot open $branchinfo $!";
    print "using branchinfo file of $branchinfo\n" if $verbose;
    while(<$fd>) {
       next if /^\s*#/;  # comment lines begin with hash
       next if /^\s*$/;  # skip blank lines
       s/^\s+//;  # strip leading white space
       s/\s+$//;  # strip trailing white space
       my($branch,$label,undef) = split(/\s+/,$_);
       $branch =~ s/.*\///; # delete path
       $branchpoints{$label} = sprintf("%05d$branch",$.);  # append line number because order matters
       $mandatory_branches{$branch} = $label;
    }
}

if(-d $dirent && ! -d $dest) {
   print "Creating dir $dest\n" if $verbose;
   mkdir($dest) or die "Could not make directory $dest $!\n";
}
do_dirent($dirent,$dest);

rmdir($temp) or system("rm -rf $temp") or warn "Cannot remove directory $temp\n";

foreach my $branch (sort keys %branchwarn_missing) {
   # We found a revision on a branch not listed in the branchinfo file.
   # This means that CVS checkouts on that branch are probably incomplete,
   # and the branch numbers for that branch could change making those
   # checkouts problematic.
   warn "warning branchinfo file did not have branch $branch ($branchwarn_missing{$branch} times)\n";
}
foreach my $branch (sort keys %branchwarn_ugly) {
   # The label was on a different branch than the actual branch sprouted from.
   # You probably specified the wrong label.
   warn "warning branchinfo file had probably wrong label for $branch ($branchwarn_ugly{$branch} times)\n";
}
foreach my $branch (sort keys %branchwarn_nolabel) {
   # We found a revision on a branch that is listed in the branchinfo file, but
   # the label was either missing in the file, or defined for after the revision
   # which should not happen.  You might have specified the wrong label.
   warn "warning branchinfo label for $branch not found or late ($branchwarn_nolabel{$branch} times)\n";
}

