source `$b/ROOT/bin/argos-env` set em=/bio/bio-grid/mb/EVidenceModeler set pa=/bio/bio-grid/mb/PASA/ set td=/bio/bio-grid/dpulex/prots/ set aug=/bio/bio-grid/mb/augustus setenv EVM_HOME $em setenv PASA_HOME $pa set path= ( $path $PASA_HOME/bin ) setenv AUGUSTUS_CONFIG_PATH $aug/config/ #----------- set rname=pasa_dmoj set dgenome=dmoj_caf060210.fa # set dgenome=dpulex_jgi060905.hardmask.fa set dest1=dmoj-est.fa #? need genes only if want to check/validate predictions set dgenes=dmoj-genes.gff touch log.$rname echo "# Transcript alignments followed by alignment assembly" \ >> log.$rname ../scripts/Launch_PASA_pipeline.pl -c alignAssembly.config \ -C -R --USE_GMAP \ -g $dgenome -t $dest1 \ >> & log.$rname # not used: -T -u all_transcripts.fasta -f FL_accs.txt ## optional echo "# Importing the latest annotations into the PASA database" \ >> log.$rname ## optional ../scripts/Load_Current_Gene_Annotations.dbi -c alignAssembly.config \ -g $dgenome -P $dgenes \ >> & log.$rname ## optional echo "# Performing an annotation comparison" \ >> log.$rname ../scripts/Launch_PASA_pipeline.pl -c annotCompare.config \ -A -g $dgenome -t $dest1 \ >> & log.$rname perl -pi -e's/ID=chain/Parent=chain/;' $rname.pasa_assemblies.gff3 echo "# Updating our gene structure annotations" \ >> log.$rname ../scripts/cDNA_annotation_updater.dbi \ -M "${rname}:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock:gilbertd:darwin" -P null \ >> & log.$rname #.... output training genes $pa/scripts/pasa_asmbls_to_training_set.dbi \ -g $dgenome \ -M "${rname}:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock" \ -p "gilbertd:darwin" \ >> log.train.$rname #..... echo "## Identification and Classification of All Alternative Splicing Variations" \ >> log.$rname ../scripts/Launch_PASA_pipeline.pl -c alignAssembly.config \ -g $dgenome -t $dest1 --ALT_SPLICE \ >> & log.$rname #... if wanted; same as -A gff ... echo "# dump current annots to gff" \ >> log.$rname ## this regurgitates the -A compare .gff output ../scripts/dump_valid_annot_updates.dbi \ -M "${rname}:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock" \ -p "gilbertd:darwin" \ >> & log.$rname #......... annotCompare for FL Full Length to predict new genes ...... # ^^ tweak annotCompare with is_fli to do novel-gene annot ?? # cDNA_annotation_comparer.dbi # dump_valid_annot_updates.dbi #.......... # test with scaffold99 (and some est asm not on genes: asm0gene/) 1. mysql.clusters drives annotCompare loop, replaced w/ temp = scaffold_99 subset 2. test as is ../scripts/cDNA_annotation_comparer.dbi -G dpulex_jgi060905a.fa \ -M pasa_dapha:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock \ -p gilbertd:darwin \ > output.annotation_compare.s99.out 3. set some cluster_link .is_fli = 1 and rerun on scaf99 without genes: asmbl_18445 asmbl_18490 update cluster_link set is_fli = 1 where cdna_acc = 'asmbl_18445'; 3b. set all to .is_fli = 1 where is_assembly = 1 3c. ../scripts/cDNA_annotation_comparer.dbi -G dpulex_jgi060905a.fa \ -M pasa_dapha:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock \ -p gilbertd:darwin \ > output.annotation_compare.allfli.out 4. output novel gene models : ../scripts/dump_valid_annot_updates.dbi \ -M pasa_dapha:localhost-port=3306-mysql_socket=/tmp/fbmysql.sock \ -p gilbertd:darwin \ > output.annotation_compare.s99fli.gff3 5. update is_fli = 0 for failed cdna tests | cluster_id | cdna_acc | id | is_assembly | is_fli | length | header | is_EST | select cl.cdna_acc, cl.id, cl.is_assembly, cl.is_fli, s.status_id from cluster_link cl, status_link s where cl.is_assembly = 1 and (s.cdna_acc = cl.cdna_acc and s.status_id = 1) update cluster_link cl, status_link s set cl.is_fli = 0 where cl.is_assembly = 1 and (s.cdna_acc = cl.cdna_acc and s.status_id = 1) -- also this: short est not fli: status:23 update cluster_link cl, status_link s set cl.is_fli = 0 where cl.is_assembly = 1 and (s.cdna_acc = cl.cdna_acc and s.status_id = 23)