# snaprun3.sh # source me ; dspp snap predict version 2 using prot-hsps; hmm built # all scaffolds/chrs, preserve hmm-build subset # set dp=dvir # set dpid=... # dir /bio/bio-grid/dspp/caf1snap/dvir set hmm=snapho-$dpid.hmm ## screen small scaffs 1st pass ? find .. -size 1 .. -size 9 (all < 5kb) # set chrs=`chdir $scd/perchr; /bin/ls -1 [cs]*.fa | sed -e's/.fa//'` # do small junk as one input file, no -xdef ## chdir $scd/perchr; /bin/ls -l set chrsmall=`chdir $scd/perchr; /bin/ls -l | egrep -v '^l|total' | sed -e's/.*gilbertd.[a-z]* *//' |\ sort -k1,1nr | perl -ne'($n)=/(\d+)/;print if($n<40000);' | sed -e's,.* ,,' -e's,.fa,,' | grep -v ^p` set chrsbig=`chdir $scd/perchr; /bin/ls -l | egrep -v '^l|total' | sed -e's/.*gilbertd.[a-z]* *//' | \ sort -k1,1nr | perl -ne'($n)=/(\d+)/;print if($n>=40000);' | sed -e's,.* ,,' -e's,.fa,,' | grep -v ^p` foreach chr ( $chrsbig ) if ( ! -f "snapout/$dp-hsp-$chr.zff" ) then gzcat $sc/caf1a/dgil/${dp}prot9-hsp.gff.gz | egrep '#|modDM' | perl -ne \ "print if(/^#/ || /^$chr\b/);" | perl ../gff2zff2.pl > snapout/$dp-hsp-$chr.zff endif if ( ! -f "snapout/$dp-snapho-$chr.gff" ) then if ( -z "snapout/$dp-hsp-$chr.zff" ) then echo $chr : no hsp data $ZOE/snap -quiet -name 'snapho' -gff3 \ -aa snapout/$dp-snapho-$chr.aa -tx snapout/$dp-snapho-$chr.tr \ $hmm $scd/perchr/$chr.fa > snapout/$dp-snapho-$chr.gff else $ZOE/snap -quiet -name 'snapho' -gff3 -xdef snapout/$dp-hsp-$chr.zff \ -aa snapout/$dp-snapho-$chr.aa -tx snapout/$dp-snapho-$chr.tr \ $hmm $scd/perchr/$chr.fa > snapout/$dp-snapho-$chr.gff endif endif end cp /dev/null snapout/chrsmall.fa foreach chr ( $chrsmall ) cat $scd/perchr/$chr.fa >> snapout/chrsmall.fa end if ( ! -z snapout/chrsmall.fa ) then set chr=chrsmall $ZOE/snap -quiet -name 'snapho' -gff3 \ -aa snapout/$dp-snapho-$chr.aa -tx snapout/$dp-snapho-$chr.tr \ $hmm snapout/chrsmall.fa > snapout/$dp-snapho-$chr.gff endif cat snapout/$dp-snapho-*.gff > snapho-$dpid.gff cat snapout/$dp-snapho-*.tr > snapho-$dpid.tr cat snapout/$dp-snapho-*.aa > snapho-$dpid.aa