#!/usr/bin/perl
# Donata Kirchner, 30-Sep-2010

use strict;
use Getopt::Long;

my ($prosa_file, $procheck_file, $proq_file, $whatcheck_file, $verify_file, $molprobity_file,
    $pdb_validation_file, $rpf_value, $outfile);
my $pdb_error_file;

GetOptions("prosa=s"      => \$prosa_file,
           "procheck=s"   => \$procheck_file,
           "proq=s"       => \$proq_file,
           "what=s"       => \$whatcheck_file,
           "verify=s"     => \$verify_file,
           "out=s"        => \$outfile,
           "molprobity=s" => \$molprobity_file,
           "pdb=s"        => \$pdb_validation_file,
           "rpf=s"        => \$rpf_value,
          );

if (! $outfile) {
   print STDERR<<EOF;

   Usage: validation_output.pl -out outfile [-prosa prosa_file] [-procheck procheck_file] \
                               [-what whatcheck_file] [-verify verify_file] [-molprobity molprobity_file]\
                               [-pdb pdb_validation_file,pdb_error_file] [-rpf rpf_value] [-proq proq_file]

EOF
   exit(1);
}

sub write_header {

   my $date  = shift;

   print <<EOF;
***************************************************
*** Validation Overview File generated by CYANA ***
***************************************************

$date

EOF
} # end sub write_header


sub write_error {
   my $file = shift;

   print "  There was an error during the calculation.\n";
   print "  For details please see $file.\n";
}


sub get_prosa_data {

   my $input_file = shift;
   my $outfile    = shift;
   my $next;

   open OUT, ">>$outfile" or die "Could not open $outfile for writing: $!";
   select OUT;
   print "*** ProSa2003 ($input_file) ***\n";
   open IN, "<$input_file" or die "Could not open $input_file for reading: $!";
   while (my $line = <IN>) {
      if ($line =~ /^molecule/ && $next eq undef) {
         $next = 1;
         print "  $line";
      }
      elsif ($next == 1) {
         print "  $line";
         last;
      }
   }
   print "\n";
   close IN;
   close OUT;
   select STDOUT;
} # end sub get_prosa_data


sub get_proq_data {

   my $input_file = shift;
   my $outfile    = shift;
   my @values;

   open OUT, ">>$outfile" or die "Could not open $outfile for writing: $!";
   select OUT;
   print "*** ProQ ($input_file) ***\n";
   open IN, "<$input_file" or die "Could not open $input_file for reading: $!";
   while (<IN>) {
      if (/LGscore:\s+(\d+\.\d+)/) {
         print "  $_";
         push @values, $1;
      }
      elsif (/MaxSub:\s+(\d+\.\d+)/) {
         print "  $_";
         push @values, $1;
      }
   }

   if (@values == ()) {
      &write_error($input_file);
   }
   print "\n";
   close IN;
   close OUT;
   select STDOUT;
} # end sub get_proq_data


sub make_verify_plot {

   my $input_file = shift;
   my $data_file  = "tmp.dat";
   my $script     = "tmp.plt";

   if (open OUT, ">$data_file") {
      open IN, "<$input_file" or warn "Cannot open $input_file for reading: $!";
      while (<IN>) {
         if (/^\s+[A-Z]\s+(\d+\s+\d+\.\d+)\s*$/) {
            print OUT "$1\n";;
         }
      }
      close IN;
      close OUT;
   }
   else {
      warn "Cannot open $data_file for writing: $!";
      return;
   }

   # we now want to plot the data using GNUplot
   if (open SCPT, ">$script") {
      print SCPT<<EOF;
      set style data linespoints
      set xlabel 'Residue Index'
      set ylabel 'Average Score (window size: 21)'
      set terminal postscript
      set output 'verify.ps'
      plot '$data_file' notitle
EOF
      close SCPT;

      # plot creation or failure
      if (! system "gnuplot $script") {  # returns '0' if successful - will be a failure to Perl => use '!'
         unlink($script, $data_file);
      }
      else {
         warn "GNUPLOT was not found on your system!\n";
         warn "The data for creating the Verify3D plot can be found in $data_file.\n";
         unlink $script;
      }
   }
} # end sub make_verify_plot


sub get_verify_data {

   my $input_file = shift;
   my $outfile    = shift;

   open OUT, ">>$outfile" or die "Could not open $outfile for writing: $!";
   select OUT;

   print "*** Verify3D ($input_file) ***\n";
   unless (-e $input_file) {
      &write_error("verify.log");
      print "\n";
      return;
   }

   open IN, "<$input_file" or die "Could not open $input_file for reading: $!";
   while (<IN>) {
      if (/Quality:\s+(\d+\.\d+)/) {
         print "  $_";
         last;
      }
   }
   close IN;
   print "\n";
   close OUT;
   select STDOUT;

   &make_verify_plot($input_file);
} # end sub get_verify_data


sub get_molprobity_data {

   my $input_file = shift;
   my $outfile    = shift;
   my @files      = split(/,/, $input_file); # we provided two files by two scripts

   open OUT, ">>$outfile" or die "Cannot write to $outfile: $!";
   select OUT;
   print "*** MolProbity ***\n";
   print "   For output see $files[0].\n";
   print "   For Ramachandran plots see $files[1].\n\n";
   select STDOUT;
   close OUT;

} # end sub get_molprobity_data


# for use with .ps files that were output by ProcheckNMR
sub extract_Gfactors {

   my $input_file = shift;
   my %gfactors;
   my $prev_aa;

   if (open IN, "<$input_file") {
      while (<IN>) {
         if (/^\(([A-Z] [A-Z]{3} \d+)\s+\)$/) {
            $prev_aa = $1;
         }
         elsif (/^\(Gf +(-?\d+\.\d+)\)$/) {
   #         unless (defined $gfactors{$prev_aa}) {
               $gfactors{$prev_aa} = $1;
   #         }
         }
      }
      close IN;
   }
   else {
      warn "Could not read from $input_file: $!";
      return;
   }

   return \%gfactors;
} # end sub extract_Gfactors


# for sorting purposes
sub by_index {
   my $pattern = shift;
   if ($pattern =~ m/[A-Z] [A-Z]{3}\s+(\d+)/) {
      return $1;
   }
   else {
      die "$pattern is not of the format [A-Z] [A-Z]{3}\\s+(\\d+)\n";
   }
}


sub get_procheck_data {
   my $input_file = shift;
   my $outfile    = shift;
   my @ps_files   = qw(tmp_04.ps tmp_06.ps);
   my @lines;
   my $hash_ref;

   open OUT, ">>$outfile" or die "Could not open $outfile for writing: $!";
   select OUT;
   print "*** ProcheckNMR ($input_file) ***\n";
   open IN, "<$input_file"  or die "Could not open $input_file for reading: $!";
   while (my $line = <IN>) {
      if ($line =~ /Ramachandran/ || $line =~ /Chi1-chi2/) {
         $line =~ s/[*|]//g;
         $line =~ s/^ //;
         print "  $line";
         push @lines, $line;
      }
   }
   close IN;

   if (@lines == ()) {
      &write_error($input_file);
   }
   else {
      my @Gfactor_refs; # 0: chi-Gfactors; 1: Ramachandran G-factors
      foreach my $x (0,1) {
         if (-e $ps_files[$x]) {
            $Gfactor_refs[$x] = &extract_Gfactors($ps_files[$x]);
         }
      }

      my @g_files    = ('chi_Gfactors.out', 'rama_Gfactors.out');

      foreach my $index (0,1) {
         open GFACT, ">$g_files[$index]" or die "Cannot open $g_files[$index] for writing: $!";
         foreach my $aa (sort { &by_index($a) <=> &by_index($b) } keys %{$Gfactor_refs[$index]}) {
            print GFACT "$aa: ${$Gfactor_refs[$index]}{$aa}\n";
         }
         close GFACT;
      }
   }

   print "\n";
   close OUT;
   select STDOUT;
} # end sub get_procheck_data


sub get_whatcheck_data {
   my $input_file = shift;
   my $outfile    = shift;
   my $ok;
   my $read_all;
   my @problems;
   my @scores;

   open IN, "<$input_file"  or die "Could not open $input_file for reading: $!";
   while (<IN>) {
      if (/^# \d+ # \w+: /) {
         if (/# ((?:Error|Warning): .+)/) {
           $ok = 1;
           push @problems, "  line $.: $1\n";
         }
         elsif (/# Note/) {
            $ok = 1;
         }
      }
      elsif (/^calculations\.$/) {
         $read_all = 1;
      }
      elsif ($read_all == 1 && ! /REFERENCES/) {
         push @scores, $_;
      }
      elsif (/REFERENCES/) {
         $read_all = 0;
      }
   }
   close IN;

   open OUT, ">>$outfile" or die "Could not open $outfile for writing: $!";
   select OUT;
   print "*** WhatCheck ($input_file) ***\n";
   unless (@problems == ()) {
      print "  -- Warnings and Errors --\n";
      print "  $_" for @problems;
   }

   unless (@scores == ()) {
      print "\n  -- Calculated Values --";
      print "  $_" for @scores;
   }

   if ($ok eq undef) {
      &write_error($input_file);
   }
   print "\n";
   close OUT;
   select STDOUT;
} # end sub get_whatcheck_data


sub get_pdb_validation_data {

   my $infile  = shift;
   my $outfile = shift;
   my $errorfile;

   if ($infile =~ m/^(.+\.\w+),(.+\.\w+)$/) {
      $infile    = $1;
      $errorfile = $2;
   }
   else {
      die "'<PDB validation file>,<PDB error file>' does not seem to have the correct format!\n";
   }

   open OUT, ">>$outfile" or die "Cannot write to $outfile: $!";
   print OUT "*** PDB Validation Suite ($infile) ***\n";

   # The error file has > 1 lines if there were any errors
   open ERR, "<$errorfile" or die "Cannot read from $errorfile: $!";
   while (<ERR>) {
      if ($. > 1) {
         &write_error($errorfile);
         close OUT;
         close ERR;
         #return; # in the case of errors having occurred there may still be proper output though!
      }
   }
   close ERR;

   local $/ = ""; # paragraph mode: not '\n' but '\n\n+' as record delimiter
   my @rms_deviation;

   open IN, "<$infile" or die "Cannot read from $infile: $!";
   while (<IN>) {
      if (/CLOSE CONTACTS/ .. /(:?^\s+none\s*$)|(?:^\s+Chain Atom    Res  Seq)/) {
         if (/^\s+Chain Atom    Res  Seq/) {
            print OUT "   - Suspiciously close contacts between atoms detected.\n\n";
         }
         elsif (/^\s+none\s*$/) {
            print OUT "   - No suspiciously close contacts between atoms detected.\n\n";
         }
      }
      elsif (/BOND DISTANCES AND ANGLES/ .. /MISSING ATOMS/) {
         if (/RMS deviation/) {
            my $record = $_;
            $record =~ s/\n(\w)/\n   $1/g;
            push @rms_deviation, $record;
         }
         elsif (/The following table contains a list of the (covalent bond.\w*)\s*.+RMSD/) {
            print OUT "   - There are $1 deviating by > 6*RMSD\n";
            print OUT "     from their dictionary values.\n\n";
         }
      }
      elsif (/The following residues have missing atoms:/ .. /TITLE/) {
         if (/RES MOD#C SEQ          ATOMS/) {
            print OUT "   - There are missing atoms.\n\n";
         }
      }
   }
   print OUT "   $_" for @rms_deviation;
   close IN;

} # end sub get_pdb_validation_data


sub print_rpf_value {
   my $rpf_value = shift;
   my $outfile   = shift;

   open OUT, ">>$outfile" or die "Cannot write to $outfile: $!";
   print OUT "*** RPF Web Server ***\n";

   if ($rpf_value =~ m/^\d+\.\d+$/) {
      print OUT "   RPF value: $rpf_value\n\n";
   }
   else {
      print OUT "   There was an error while retrieving the RPF value.\n\n";
   }

   close OUT;
}

my @input = ($prosa_file, $procheck_file, $proq_file, $whatcheck_file, $verify_file, $molprobity_file,
             $pdb_validation_file, $rpf_value);
my @subroutines = (\&get_prosa_data, \&get_procheck_data, \&get_proq_data, \&get_whatcheck_data,
                   \&get_verify_data, \&get_molprobity_data, \&get_pdb_validation_data, \&print_rpf_value);

my @date = localtime(time);
my @abbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
my $date = sprintf "%02u-%3s-%4u, %02u:%02u", $date[3], $abbr[$date[4]], $date[5]+1900, $date[2], $date[1];
open OUT, ">$outfile" or die "Cannot open file $outfile for writing: $!";
select OUT;
&write_header($date);
select STDOUT;
close OUT;

foreach my $x (0..$#input) {
   if ($input[$x] ne undef) {
      &{$subroutines[$x]}($input[$x],$outfile);
   }
}
