#!/bin/sh

# Script to select peak entries from CYANA assignment files
# according to various criteria.

# Copyright (c) 2002-20 by Peter Guentert. All rights reserved.


ass=0
add=0
comp=0
details=1
enough=0
viol=0.0
incomp=0
unass=0
noass=0
manual=0
miss=0
numonly=0
rank=0
frac=300.0
supmin=1000.0
supmax=0.0
atom=
keptatom=
refatom=
level=0
refass=
spectrum=""

options=aAb:cef:hik:l:L:mMnp:P:q:Q:r:RsS:uUvV:

optind=1
usage=""
eval par=\$$optind
while true; do
  case $par in
  --) optind=`expr $optind + 1`; break;;
  -?*) option=`echo X$par | sed 's/^X-\(.\).*/\1/'`
       if [ `echo $options | sed 's/.*'$option'.*/+/'` != "+" ]; then
         echo "$0: -$option: unknown option"; usage=1
       elif [ `echo $options | sed 's/.*'$option':.*/+/'` = "+" ]; then
         optind=`expr $optind + 1`
         eval $option=\$$optind
       else
         eval $option=1
       fi
       par=`echo X$par | sed 's/^X-./-/'`
       if [ "$par" = "-" ]; then
         optind=`expr $optind + 1`
         eval par=\$$optind
       fi
       ;;
  *) break;;
  esac
done
shift `expr $optind - 1`

if [ "$h" ]; then usage=1; fi
if [ "$usage" ]; then
  echo "Usage: cyanafilter [options] file ..."
  echo
  echo "       -a            list additionally assigned cross peaks"
  echo "       -A            list assigned cross peaks"
  echo "       -b atom       list peaks with assignment possibility to given atom"
  echo "       -c            list compatible peaks"
  echo "       -e            list peaks with not enough quality"
  echo "       -h            help"
  echo "       -i            list incompatible peaks"
  echo "       -k atom       list peaks assigned to given atom"
  echo "       -l atom       list peaks assigned manually to given atom"
  echo "       -L level      list peaks with |i-j| >= level"
  echo "       -m            list manually assigned peaks"
  echo "       -M            list manually not assigned peaks"
  echo "       -n            list only number of peaks"
  echo "       -p peaknr     list peak with given number"
  echo "       -P assfile    list peak present in assfile"
  echo "       -q quality    list peaks with NOE support < quality"
  echo "       -Q quality    list peaks with NOE support > quality"
  echo "       -r rank       list peaks with manual rank >= rank"
  echo "       -R            list peaks with manual assignment not found"
  echo "       -s            short; do not list network-anchoring details"
  echo "       -S spectrum   list peaks from the given spectrum"
  echo "       -u            list unassigned peaks"
  echo "       -U            list peaks without assignment possibilities"
  echo "       -v            list violated peaks"
  echo "       -V viol       list peaks with violation > viol"
  echo "       file          input CYANA assignment files"
  exit 2
fi
if [ "$a" ]; then add="$a"; fi
if [ "$A" ]; then ass="$A"; fi
if [ "$b" ]; then atom="$b"; fi
if [ "$c" ]; then comp="$c"; fi
if [ "$e" ]; then enough="$e"; fi
if [ "$f" ]; then frac="$f"; fi
if [ "$i" ]; then incomp="$i"; fi
if [ "$k" ]; then keptatom="$k"; fi
if [ "$l" ]; then refatom="$l"; fi
if [ "$L" ]; then level="$L"; fi
if [ "$m" ]; then manual=1; fi
if [ "$M" ]; then manual=2; fi
if [ "$n" ]; then numonly="$n"; fi
if [ "$p" ]; then num="$p"; fi
if [ "$P" ]; then refass="$P"; fi
if [ "$q" ]; then supmin="$q"; fi
if [ "$Q" ]; then supmax="$Q"; fi
if [ "$r" ]; then rank="$r"; fi
if [ "$R" ]; then miss="$R"; fi
if [ "$s" ]; then details=0; fi
if [ "$S" ]; then spectrum=$S; fi
if [ "$u" ]; then unass="$u"; fi
if [ "$U" ]; then noass="$U"; fi
if [ "$v" ]; then viol=0.0001; fi
if [ "$V" ]; then viol="$V"; fi


if [ "$num" ]; then
  refass=refass.$$
  echo $num | awk -F, '{ for(i=1;i<=NF;i++) print "REFASS",$i }' > $refass
elif [ "$refass" ]; then
  awk '/Peak [0-9][0-9]* / { i=index($0," ppm"); print "REFASS",substr($0,10,i-10) }
      ' $refass > refass.$$
  refass=refass.$$
fi
#echo refass=$refass

for i in $*; do
  echo "$i:"
  awk 'BEGIN { n=-1; m=0; nrank=0; averank=0.0;
               stab[1]=0.25; stab[2]=0.50; stab[3]=0.75; stab[4]=0.80; stab[5]=0.85;
               stab[6]=0.90; stab[7]=0.95; stab[8]=0.99; stab[9]=10.00;
#              nstab=9;
               nstab=0;
               nrefass=0; }
       /^REFASS / { nrefass++; refass[substr($0,8,length($0)-7)]=1; next }
       /^    Peak [0-9][0-9]* / { irank=0; havref=0; keptref=0; diag=0; haveno=0;
                    havvio=-1.0; f=200.0; lev=100000; missref=0;
                    n=0; peaknum=$2; peaknum=peaknum+0;
                    if ($3=="from") spec=$4; else spec="";
                    i=index($0," ppm"); peakid=substr($0,10,i-10)
                    havatm=(atom==""); havkat=(keptatom==""); havrat=(refatom==""); }
       /quality = / { s=substr($NF,1,length($NF)-1); s=s+0.0; }
       /[0-9] [+-] [A-Z]/ { irank++
                            if (index($0," +")>0) {
                              l=substr($0,17,4)-substr($0,34,4)
                              if (l<0) l=-l
                              if (l<lev) lev=l }
                          }
       substr($1,1,1)=="*" || substr($1,1,1)=="!" { havref=irank;
                            if (index($0," +")>0) keptref=1;
                            f=$10; f=f+0.0;
                            if (index($0,refatom)>0) havrat=1; }
       /ppm; diagonal/ { diag=1; }
       /low quality/ { haveno=1; }
       /[0-9] out of / { if ($1==0) havass=0; else havass=1; assposs=$4 }
       / Violated in / { i=NF-1; if (!havass) havvio=$i+0.0}
       /Reference assignment not found:/ { missref=1; }
       /  OK / { if (index($0," +")>0 && index($0,keptatom)>0) havkat=1; }
       { if (n>=0) { if (details) line[++n]=$0;
                     else line[++n]=substr($0,1,72);
                     if (index($0,atom)>0) havatm=1; }}
       NF==0 && n>0 { if (nrefass>0) { if (refass[peaknum]!=1 && refass[peakid]!=1) next }
                      if (!havatm || !havkat || !havrat) next;
                      if (diag) next;
                      if (irank==99999) next;
                      if (ass && !havass) next;
                      if (miss && !missref) next;
                      if (manual==1 && !havref) next;
                      if (manual==2 && havref) next;
                      if (add && (havref || !havass)) next;
                      if (comp && (!havref || !keptref || !havass)) next;
                      if (enough && !haveno) next;
                      if (havvio<=viol && viol>0.0) next;
                      if (incomp && (!havref || keptref || !havass) && !(missref && havass)) next;
                      if (unass && havass) next;
                      if (noass && assposs>0) next;
                      if (havref<rank) next;
                      if (level>0 && (!havass || lev<level)) next;
                      if (f>frac) next;
                      if (supmin<1000.0 && (s==0.0 || s>supmin)) next;
                      if (s<supmax) next;
                      if (spectrum!="" && index(spec,spectrum)==0) next;
                      if (!numonly) for (i=1;i<=n;i++) print line[i];
                      if (havref>=1) { averank+=havref; nrank++; }
                      for (i=1;i<=nstab;i++) if (s<=stab[i]) { ns[i]++; break; }
                      supav+=s; supsq+=s*s
                      n=-1; m++; }
       END { if (nrank==0) nrank=1;
             print m,"peaks listed, average rank of manual assignment",(1.0*averank)/nrank
             if (numonly) {
               nsum=0;
               for (i=1;i<=nstab;i++) {
                 nsum+=ns[i]; printf("%8.2f %6d %6d\n",stab[i],ns[i],nsum); }
               if (nsum>0) {
                 supav=supav/nsum
                 print "average quality",supav,"+/-",sqrt(supsq/nsum-supav*supav); }
               }
           }
      ' add=$add comp=$comp viol=$viol enough=$enough incomp=$incomp unass=$unass noass=$noass \
        rank=$rank frac=$frac supmin=$supmin supmax=$supmax num=$num details=$details \
        manual=$manual atom="$atom" keptatom="$keptatom" refatom="$refatom" \
        level=$level numonly=$numonly miss=$miss ass=$ass spectrum=$spectrum \
        $refass $i
done
if [ "$refass" ]; then rm -f $refass; fi
