#!/usr/bin/perl # gff2zff2.pl ## cant sort input gff by loc; loosing gene-exon ordering... my($forxdef,$forhmm)=(0,0); ## forxdef is only one we need? use Getopt::Long; $ok= GetOptions("xdef!" => \$forxdef, "hmm!" => \$forhmm); die "usage: gff2zff2.pl -hmm | -xdef < gff > zff\n -xdef is probably right.\n" unless($ok && ($forxdef||$forhmm)); if($forhmm) { zffForHmm(); } else { zffForXdef(); } # default sub zffForXdef { my $zcmd="ADJ"; # or "SET" ? my $dt="."; # or "0" ? my ($pid, $lpid); while(<>){ next unless(/^\w/); chomp; my ($ref,$src,$type,$b,$e,$score,$strand,$shft,$attr)= split "\t"; if($attr =~ m/(Parent|ID)=([^;\s]+)/) { $pid=$2; $pid =~ s/_[GSo]\d+$//; } if($ref ne $lref) { my($sb,$se,$tp)=($le,$le); $tp= ($lstrand eq '-') ? "Start" : "Stop"; print join("\t", $tp,$sb,$se,$lstrand,'+50',$dt,$dt,$dt,$zcmd,$lref),"\n" if($le); $le=0; #print ">$ref\n"; $lref= $ref; $lpid=''; } $score =~ s/\s+//g; #$score = int($score); #? int($score/50); #? $score=100 if($score>100); $score= $score/100; $score=0.1 if ($score<0.1); ## for Coding, use values in 0.1 .. 1,2 range ??; higher for Start/Stop next if ($score <= 0); if($pid ne $lpid) { my($sb,$se,$tp)=($le,$le); $tp= ($lstrand eq '-') ? "Start" : "Stop"; print join("\t", $tp,$sb,$se,$lstrand,'+50',$dt,$dt,$dt,$zcmd,$lref),"\n" if($le); $le=0; print ">$ref:$pid\n"; ($sb,$se)=($b,$b); $tp= ($strand eq '-') ? "Stop" : "Start"; print join("\t", $tp,$sb,$se,$strand,'+50',$dt,$dt,$dt,$zcmd,$ref),"\n"; $lpid= $pid; } print join("\t", "Coding",$b,$e,$strand,'+'.$score,$dt,$dt,$dt,$zcmd,$ref),"\n"; ($lb,$le,$lstrand,$lscore)=($b,$e,$strand,$score); } } =item hmm zff >scaffold_4845 Einit 6571 6681 + 10.789 0 0 0 scaffold_4845-SNAP_dm.1 Eterm 6840 7199 + 11.685 0 0 2 scaffold_4845-SNAP_dm.1 =cut sub zffForHmm { my ($pid, $lpid, @lv); my $etype="Einit"; while(<>){ next unless(/^\w/); chomp; my @v= split "\t"; my ($ref,$src,$type,$b,$e,$score,$strand,$shft,$attr)= @v; if($attr =~ m/(Parent|ID)=([^;\s]+)/) { $pid=$2; $pid =~ s/_[GSo]\d+$//; } $endgene= ($lpid ne $pid || $ref ne $lref); $score =~ s/\s+//g; $score = int($score); #? int($score/50); #? ## for Coding, use values in 0.1 .. 1,2 range ??; higher for Start/Stop $etype= "Eterm" if($etype eq "Exon" && $endgene); $etype= "Esngl" if($etype eq "Einit" && $endgene); print join("\t", $etype,$lb,$le,$lstrand,'+'.$lscore,0,0,0,$lpid,$lref),"\n" if($le); $le=0; if($ref ne $lref) { print ">$ref\n"; $lref= $ref; $lpid=''; } next if ($score <= 0); if($pid ne $lpid) { $lpid= $pid; $etype="Einit"; } else { $etype="Exon"; } ($lb,$le,$lstrand,$lscore)=($b,$e,$strand,$score); @lv= @v; } $etype= "Eterm" if($etype eq "Exon" && $endgene); $etype= "Esngl" if($etype eq "Einit" && $endgene); print join("\t", $etype,$lb,$le,$lstrand,'+'.$lscore,0,0,0,$lpid,$lref),"\n" if($le); $le=0; }