gtar -Ozxf $soc/dmicsatdpsec.blout.tgz | ../blast92gff.pl -in=stdin -gff -out=dros-dpse-micsat3.gff -nodebug -evalmin=1e-6 perl -pi -e's,blastn:\w+,blastn:drosmicsat,;' dros-dpse-micsat3.gff set dspp=(dana dere dgri dmel dmoj dpse dsim dvir dyak) foreach ds ($dspp) gtar -Ozxf $soc/dmicsat${ds}c.blout.tgz | \ ../blast92gff.pl -in=stdin -gff -out=stdout -nodebug -evalmin=1e-10 | \ perl -pe's,blastn:\w+,blastn:drosmicsat,;' > dros-${ds}-micsat2.gff end $gbl/bin/lucegene_bulk_load_gff.pl -database dana_ag0508 \ -append work/dros-dana-micsat.gff #!/bin/csh set gbl=/bio/argos/common/perl/gbrowselive #dana done set dspp=( dere dgri dmoj dvir ) foreach ds ($dspp) $gbl/bin/lucegene_bulk_load_gff.pl -database ${ds}_ag0508 \ -append dros-${ds}-micsat.gff end # dsim_dgpg040929 dyak_wu040407 dpse-r1.04 dmel_r4.1 $gbl/bin/lucegene_bulk_load_gff.pl -database dmel_r4.1 \ -append dros-dmel-micsat.gff $gbl/bin/lucegene_bulk_load_gff.pl -database dpse-r1.04 \ -append dros-dpse-micsat.gff $gbl/bin/lucegene_bulk_load_gff.pl -database dyak_wu040407 \ -append dros-dyak-micsat.gff $gbl/bin/lucegene_bulk_load_gff.pl -database dsim_dgpg040929 \ -append dros-dsim-micsat.gff gbrowse.conf::::::::::::: default features = tblastDM tblastSC tblastAG tblastCE blastndmel blastdmic:overview blastdmic [blastdmic:overview] feature = match:blastn:drosmicsat label = 0 glyph = generic bgcolor = lavender height = 5 key = Microsatellites bump = 0 bump density = 5 [blastdmic] feature = nucleotide_match:blastn:drosmicsat glyph = segments stranded = 1 bgcolor = lavender fgcolor = black key = Dros. microsatellites citation = Dros. microsatellites category = Analysis label = sub { my $f = shift; my @na= ($f->attributes('Name'), $f->attributes('Alias'), $f->attributes('symbol')); return $na[0] || $f->display_name; } link = sub { my($ac) = shift->attributes('ID'); $ac =~ s/[\.\|].*$//; return 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&doptcmdl=GenBank&tool=GMOD&term='.$ac if ($ac); } .................. # marker genes = dmel named genes, located on all dspp, # "best" in each 500kb section of each chr ## missing dpse-dmelgn.ids/dpse-prot9 data ! set dspp=(dana dere dgri dmoj dsim dvir dyak) foreach ds ($dspp) echo ---- $ds dmelgn.ids ------ gzgrep modDM $ds/gff/$ds-prot9.gff.gz | grep -v match_part |\ perl -ne'($id)=m/ID=(\w+)/;print "$id\n";' | sort | uniq > work/$ds-dmelgn.ids end 13074 13074 98322 dana-dmelgn.ids 13328 13328 100322 dere-dmelgn.ids 12585 12585 94492 dgri-dmelgn.ids 12675 12675 95188 dmoj-dmelgn.ids 13246 13246 99679 dsim-dmelgn.ids 11902 11902 89405 dvir-dmelgn.ids 13043 13043 98188 dyak-dmelgn.ids cat *dmelgn.ids | sort | uniq -c | grep ' 7 ' | perl -pe's/\s+7\s+//;' > dmelgn-all.ids cat *all.ids | perl -pe's/(\w+)/ID=$1;/;' > allids.patt split allids.patt allidpatt. melon.% foreach idp (allidpatt.*) foreach? fgrep -f $idp dmel-named-genes.gff >> dmel-best-genes.gff foreach? end melon.% wc *gff 3872 34848 967106 dmel-best-genes.gff 4808 43272 1206928 dmel-named-genes.gff cat dmel-best-genes.gff | perl -ne\ 'BEGIN{open(F,"bestrefs.tsv");while(){chomp;($gn,$r)=split"\t";$rn{$gn}=$r;}}\ ($c,$s,$t,$b,$e,@x)=split"\t";$sg=500000*int($b/500000);\ ($nm)=m/Name=(\w+)/; $w=length($_); $w=int($w/2) if($nm=~/[^a-z]/);\ ($gn)=m/(FBgn\d+)/;$nr=$rn{$gn}; $w += 10*$nr;\ print "$sg\t$w\t",$_;' |\ sort -k3,3 -k1,1n -k2,2rn | \ perl -ne'($s,$w,$c,@x)=split"\t";print join("\t",$c,@x) unless(0 < $gn{$c.$s}++);' \ > dmel-best-gene3.gff cat dmel-best-gene3.gff | perl -ne'($id)=m/ID=(\w+)/;($nm)=m/Name=([^;]+)/;print"$id\t$nm\n";' > dmel-best-gene3.ids ..... set ds=dana set dspp=(dana dere dgri dmoj dsim dvir dyak) foreach ds ($dspp) gzgrep modDM $spd/$ds/gff/$ds-prot9.gff.gz | grep -v match_part |\ perl -ne'BEGIN{open(F,"dmel-best-gene3.ids");%best=map{chomp;($d,$n)=split;$d,$n;} ;}\ ($id)=m/ID=(\w+)/;if($best{$id}){ s/tblastn:modDM/marker:modDM/; \ s/ID=[^;]+;/Name=$best{$id};/; s/;loc=[^;\n]+//; print;}' \ > $spd/$ds/gff/$ds-markers.gff end ------------------------------- sort -k3,3 -k1,1n -k2,2rn |more #$w=int($w/0.9) if(/GB_protein/); 2L . gene 3771706 3784129 . + . ID=CG10021;Name=bowl;Dbxref=FlyBase :FBan0010021,FlyBase:FBgn0004893;cyto_range=24C3-24C4;dbxref_2nd=FlyBase:FBgn0014041,FlyBase:FBgn00 22034,GB:CG10021;gbunit=AE003578;synonym=bowl;synonym_2nd=17-29-5,Bowl,Su(tor)2-1,bowel,l(2)c,l(2)k 08617,org2 ... use ugpxml summary size for weight of marker genes? 500000 377 2L . gene 541235 542572 . - . ID=CG3018;Name=lwr; << Dbxref=FlyBase:FBan0003018,FlyBase:FBgn0010602;cyto_range=21D1-21D1;dbxref_2nd=FlyBase:CG3018,FlyBa se:FBgn0010523,FlyBase:FBgn0015755,FlyBase:FBgn0022761;gbunit=AE003589;synonym=lwr;synonym_2nd=DUbc 9,DmUbc9,Dmubc9,FBgn0010602,Lwr,UBC9,Ubc9,dUBC9,dUbc9,dip4,hbl,i105,i56,l(2)01519,l(2)02858,l(2)054 86,l(2)05487,semi,ubc9,unnamed 500000 302 2L . gene 624644 625453 . + . ID=CG2826;Name=lect in-21Ca;Dbxref=FlyBase:FBan0002826,FlyBase:FBgn0040107,GB_protein:AAF51470.1;cyto_range=21D1-21D1;d bxref_2nd=FlyBase:FBgn0031272,Gadfly:CG2826;gbunit=AE003588;synonym=lectin-21Ca;synonym_2nd=AC+0045 73A,AC004573a,Lectin21Ca 500000 265 2L . gene 868675 869911 . - . ID=CG4258;Name=dbe; Dbxref=FlyBase:FBan0004258,FlyBase:FBgn0020305;cyto_range=21D4-21D4;dbxref_2nd=FlyBase:CG4258,FlyBa se:FBgn0022118,FlyBase:FBgn0022171,FlyBase:FBgn0024894;gbunit=AE003588;synonym=dbe;synonym_2nd=l(2) k05428,l(2)k06708 500000 237 2L . gene 620433 621540 . - . ID=CG13686;Name=lec tin-21Cb;Dbxref=FlyBase:FBan0013686,FlyBase:FBgn0040106;cyto_range=21D1-21D1;dbxref_2nd=FlyBase:FBg n0031271;gbunit=AE003588;synonym=lectin-21Cb;synonym_2nd=AC+004573B,AC004573b,Lectin21Cb 500000 235 2L . gene 825964 833245 . + . ID=CG3727;Name=dock << ;Dbxref=FlyBase:FBan0003727,FlyBase:FBgn0010583;cyto_range=21D3-21D3;dbxref_2nd=FlyBase:CG3727,FlyB ase:FBgn0041141;gbunit=AE003588;synonym=dock;synonym_2nd=Dm0447,Dock,dck,doc,l(2)04723 500000 211 2L . gene 523467 540541 . + . ID=CG2762;Name=ush; Dbxref=FlyBase:FBan0002762,FlyBase:FBgn0003963;cyto_range=21D1-21D1;dbxref_2nd=FlyBase:CG2762;gbuni t=AE003589;synonym=ush;synonym_2nd=FOG,Ush,l(2)19,u-sh,ushaped 500000 196 2L . gene 861849 868352 . - . ID=CG4276;Name=aru; Dbxref=FlyBase:FBan0004276,FlyBase:FBgn0029095;cyto_range=21D4-21D4;dbxref_2nd=FlyBase:FBgn0031290; gbunit=AE003588;synonym=aru;synonym_2nd=arouser 500000 182 2L . gene 640021 714968 . - . ID=CG17941;Name=ds; Dbxref=FlyBase:FBan0017941,FlyBase:FBgn0000497;cyto_range=21D1-21D2;gbunit=AE003588;synonym=ds;syno nym_2nd=CT39575,Ds,cad2,l(2)05142 500000 177 2L . gene 833584 851061 . - . ID=CG3365;Name=dron << go;Dbxref=FlyBase:FBan0003365,FlyBase:FBgn0020304;cyto_range=21D3-21D4;dbxref_2nd=FlyBase:CG3365;gb unit=AE003588;synonym=drongo 500000 177 2L . gene 852767 854539 . + . ID=CG3943;Name=krak en;Dbxref=FlyBase:FBan0003943,FlyBase:FBgn0020545;cyto_range=21D4-21D4;dbxref_2nd=FlyBase:CG3943;gb unit=AE003588;synonym=kraken 500000 155 2L . gene 583540 594687 . - . ID=CG2851;Name=Gsc; cp /dev/null bestgeneinfo.tsv split -l 100 dmel-best-genes.fbid dmbestids. foreach idf (dmbestids.*) set idl=`cat $idf | perl -ne'/(\w+)/;print "$1+";'` curl -L -bck -cck \ 'http://chipmunk.bio.indiana.edu:7082/lucegene/search?lib=fbgn&method=fetch&items=1-9999&headlinefields=1&format=text/tsv&q=docid:('$idl')'\ >> bestgeneinfo.tsv end #GSYM NAM CLOC ALESR REF DBA DT iscore docid grep -v '^GSYM' bestgeneinfo.tsv | perl -ne'($g,$n,$c,$a,$r,$d,$t,$s,$id)=split"\t";print "$id\t$r\n" if($id=~/FBgn/);' http://chipmunk.bio.indiana.edu:7082/lucegene/search?q=fbgn-PHM:eye%20AND%20ENZ:kinase curl -L -bck -cck -i \ 'http://chipmunk.bio.indiana.edu:7082/lucegene/search?lib=fbgn&q=docid:(FBgn0010602+FBgn0020304+FBgn0010583)&method=fetch&items=1-9999&headlinefields=1&format=text/tsv'\ HTTP/1.1 200 OK Date: Sat, 20 Aug 2005 20:31:57 GMT Server: Apache Coyote/1.0 Expires: Sun, 21 Aug 2005 20:31:57 GMT Content-Type: text/plain Content-Length: 234 X-Cache: MISS from chipmunk.bio.indiana.edu Connection: close GSYM NAM CLOC ALESR REF DBA DT iscore docid dock dreadlocks 21E2 26 77 23 24 Dec 04 0.99999994 FBgn0010583 drongo drongo 21E2 2 16 18 24 Dec 04 0.99999994 FBgn0020304 lwr lesswright 21E2 16 53 44 24 Dec 04 0.99999994 FBgn0010602 // curl -L -bck -cck -i 'http://microbe.bio.indiana.edu:8080/lucegene/search?\ // lib=uniprot&q=all:signal&method=batchfetch&items=1-10&headlinefields=1&format=text/tsv' $b/flybase/bin/lucegene-search.sh -lib fbgn -c 'format table;fields title' \ -c 'find docid:(FBgn0010602 FBgn0020304 FBgn0010583)' |\ perl -ne'($id)=/ID \d (\w+)/;($nr)=/REF (\d+)/;print "$id\t$nr\n";'