#!/usr/bin/env perl

# Searches for inputdata files

use Getopt::Long;

#-----------------------------------------------------------------------------------------------

if ($#ARGV == -1) {
    usage();
    exit;
}

#-----------------------------------------------------------------------------------------------

sub usage {
    die <<EOF;
SYNOPSIS
    check_input_data [options]
OPTIONS
     -inputdata <inputdata> Set the inputdata directory.  Optional
                            inputdata defaults to the CSMDATA environment variable.
     -check                 Check whether data is available in "inputdata",
     -export                Export missing data into "inputdata",
     -prestage <prestage>   Prestage data from inputdata to prestage
     -datalistdir <dir>     Directory which will be searched for .input_data_list files
                            Default is $CASEBUILD.  Optional.
     -help [or -h]          Print usage to STDOUT.  Optional.
SUMMARY

   This utility checks, exports and prestages necessary input data for CCSM.

   The utility first searches for .input_data_list files in the then
   prints their locations and passes them to the caseroot
   Tools/get-input-data utility via the -flist option.

   Default directory to search for .input_data_list files is
   $CASEBUILD

EOF
}# Process command-line options.

my %opts = ( inputdata      => undef,
	     check          => 0,
	     export         => 0,
	     prestage       => 0,
	     datalistdir    => undef,
	     help           => 0
	    );

GetOptions(
    "inputdata=s"               => \$opts{'inputdata'},
    "check"                     => \$opts{'check'},
    "export"                    => \$opts{'export'},
    "prestage=s"                => \$opts{'prestage'},
    "datalistdir=s"             => \$opts{'datalistdir'},
    "h|help"                    => \$opts{'help'},
)  or usage();

# Give usage message.
usage() if $opts{'help'};

# Check for unparsed arguments
if (@ARGV) {
    print "ERROR: unrecognized arguments: @ARGV\n";
    usage();
}

$opts{'check'} ? ($checkopt = 1) : ($checkopt = 0);
$opts{'export'} ? ($exportopt = 1) : ($exportopt = 0);
$opts{'prestage'} ? ($prestageopt = 1) : ($prestageopt = 0);

##print "opts $checkopt $exportopt $prestageopt \n";

if (!$checkopt && !$exportopt && !$prestageopt) {
  usage();
  exit;
}

if (defined($opts{'datalistdir'})) {
    # continue
}
elsif (defined $ENV{'CASEBUILD'} ){
    $opts{'datalistdir'} = $ENV{'CASEBUILD'};
}
else {
    $opts{'datalistdir'} = "./Buildconf";
}

##print "datalistdir $opts{'datalistdir'}\n";

# Check that input directory exists.
(-d $opts{'datalistdir'})  or  die <<EOF;
** $ProgName - Cannot find input directory: \"$opts{'datalistdir'}\" **
EOF

# Check that the CCSM inputdata root directory has been specified.
my $inputdata_rootdir = undef;
if (defined($opts{'inputdata'})) {
    $inputdata_rootdir = $opts{'inputdata'};
}
elsif (defined $ENV{'CSMDATA'}) {
    $inputdata_rootdir = $ENV{'CSMDATA'};
}
else {
    die "$ProgName - ERROR: CCSM inputdata root directory must be specified by either the -inputdata argument\n" .
	" or by the CSMDATA environment variable. :";
}

# The inputdata root directory must be local or nfs mounted.
(-d $inputdata_rootdir)  or  die <<EOF;
** $ProgName - CCSM inputdata root is not a directory: \"$inputdata_rootdir\" **
EOF

# Check prestage dir
if ($prestageopt) {
if (defined($opts{'prestage'})) {
    $prestage_rootdir = $opts{'prestage'};
}
(-d $prestage_rootdir)  or  die <<EOF;
** $ProgName - CCSM prestage dir is not a directory: \"$prestage_rootdir\" **
EOF
}

# Find .input_data_list files
@filelocations = `find "$opts{'datalistdir'}" -name '*.input_data_list'`;

print "Input Data List Files Found:\n";
print @filelocations;

# Determine if will prestage input data
if ($prestageopt) {
if ($prestage_rootdir eq $inputdata_rootdir) {
	print "Prestaging turned off; inputdata_rootdir eq $prestage_rootdir, $inputdata_rootdir,  $prestage_rootdir\n";
	$prestageopt = 0;
    }
}    

# Subversion repository location.
my $svn_loc = 'https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata/';

# If export option is set, test for svn install, and server
if ($exportopt) {
  my $error = `svn --version 2> /dev/null`;
  if ($error eq '') {
    die "Error: the subversion client svn was not found.\n";
  }
  my $error = `svn list $svn_loc > /dev/null`;
#  my @error = `svn list $svn_loc 2>&1`;
#  @found=grep(/atm\//g,@error);
#  chomp(@found);
#  if (length($found[0]) == 0){
#    die ("@error \n");
#  } 
}

$inputdata_rootdir .= "/";
$prestage_rootdir .= "/";
if (defined $ENV{'DIN_LOC_ROOT'}) {
  $dinlocroot = $ENV{'DIN_LOC_ROOT'};
  $dinlocroot .= "/";
}
else {
  $dinlocroot = "long_undefined_string_2_ignore";
}

foreach $i (@filelocations) {
    chop($i);
    my $flist = $i;
#    get_input_data ($flist, $inputdata_rootdir, $din_loc_root, $exportopt, $copyfiles);

    # Open input file.
    open(filelist,$flist) or die <<EOF;
** $ProgName - Cannot open input file: \"$flist\" **
EOF
    # Search for filenames in file. Note that all strings following " = " are assumed to be filenames.
    while(<filelist>) {
	my $filend = "undefined";
        my $fileneed = $_;
        my $isinput = 0;
        my $docheck = 0;
#        print "tcx fn1 is $fileneed\n";
        if ($fileneed =~ m/(\S*)\#.*/) {
           $fileneed = $1;
           $docheck = 0;
       }
#        print "tcx fn2 is $fileneed\n";
        if ($fileneed =~ m/.* = (\S*)/) {
           $fileneed = $1;
           $docheck = 0;
       }
#        print "tcx fn3 is $fileneed\n";
	if (/.* = (?:$dinlocroot|\$DIN_LOC_ROOT\/|DIN_LOC_ROOT\/|$inputdata_rootdir)+(\S*)/) { 
	   $filend = $1;
           $fileneed = $inputdata_rootdir.$filend;
           $isinput = 1;
           $docheck = 1;
        }
#        print "tcx fn4 is $fileneed\n";
#        print "tcx filend is $filend\n";
#        print "tcx docheck is $docheck\n";
#        print "tcx inputdata_rootdir is $inputdata_rootdir\n";
#        print "tcx DIN_LOC_ROOT is $dinlocroot\n";

        if ($docheck) {
           if (($checkopt)  && ($fileneed =~ m/.*(\w)+.*/)) {
	      if (! -e $fileneed) {
		 print "File is missing: $fileneed \n";
              }
           }
	   if (($isinput) && ($exportopt) && (!-e $fileneed)) {
	      my $fileloc = $svn_loc.$filend;     # Location in subversion repository.
  	      my $lsearch = $fileloc;             # Variable used in  searches for symbolic links.
	      isinrepository($fileloc) ? dataexport($fileneed, $fileloc) : 
		 print "File was not found in svn repo: $fileloc \n";
	   }
	   if (($isinput) && ($prestageopt)) {
	      my $filecopy = $prestage_rootdir.$filend;
              my $dirname = $filecopy;
              if (-d $fileneed) {
                 system("umask 2; mkdir -p -m 775 $dirname");
	         system("cp $fileneed/* $filecopy/ 2>/dev/null");
                 print "Dir  copied to: $filecopy \n";
              } else {
                 if ((-e $fileneed) && (!-e $filecopy)) {
	            $dirname =~  s!/[^/]+$!!;
  	            system("umask 2; mkdir -p -m 775 $dirname");
	            system("cp $fileneed $filecopy");
	            print "File copied to: $filecopy \n";
                 }
	      }
	   }
        }
        else {
           if (($checkopt)  && ($fileneed =~ m/.*(\w)+.*/)) {
	      if (! -e $fileneed) {
		 print "File status unknown: $fileneed \n";
              }
           }
        }
    }
}

#-----------------------------------------------------------------------------------------------

# Check for existence of a file in the subversion repository, returns 1 if so.
sub isinrepository($) {
    return 1 if `svn ls $_[0]`;
}

# Checks a link in the subversion repository and returns the target location of that link.
sub linktarget($) {
    my $filetext = `svn cat $_[0]`;
    $filetext =~ s/link // or return 0;  # File relative path from link.
    (my $linktar = $_[0]) =~ s!(.*)/+.*?$!$1/!;
    $linktar .= $filetext;               # Absolute path target of link.
    1 while ($linktar =~ s!/+[^/]*?/+\.\./!/!);       # Removes any up-directory (..) commands from target path.
    return $linktar;
}

# Exports a file from the subversion repository, creating any necessary files along the way.
sub dataexport($$) {
    $_[0] =~ m!(.*)/(.*)!;                             # Stores directory containing file in $1.
    my $dir = $1;
    print "export $_[1] $_[0] ..... " ;
    my $listing = `svn ls $_[1]`;
    if ($2 and $listing =~ /^$2\b/) {                  # Check if input is a file or directory (links treated as files).
	unless (-e $dir) {                             # Make containing directory unless it already exists.
	    my $dexist = $dir;
	    while (! -e $dexist) {
		$dexist =~ s:(.*)/(.+):$1:;
	    }
	    system("umask 2; mkdir -p -m 775 $dir");
	}
	system("svn export $_[1] $_[0] 1>/dev/null"); # Export file.
    } else {
	system("svn export --depth=files $_[1] $_[0] 1>/dev/null"); # Export dir.
    }
    if (-f $_[0]) {                                    # Store checksum.
        my $checksumfile = $dir.'/.checksums';         # Checksums saved here.
	chmod 0555, $_[0];                             # Don't allow writing to file.
	system("cksum $_[0] >>$checksumfile");
	chmod 0775, $checksumfile;
    }
    print "success\n" if (-e $_[0]);
}
