#!/usr/local/bin/perl ## ? this is bad on sol10:#!/usr/bin/env perl =head1 blast.cgi sppblast.cgi - wrapper for NCBI web blast program output more info here... =cut use strict; my $debug= 0; # display opts my $dropgo= 1; # is now old data in >headers.. ; need to collect all defline before match. my $domapout= 1; # user opt? MAPS_ my $dohidealigns= 0; my $saveTillEnd= 0; # test for hitlist table my $doMultiMap= 1; #?? my $NOTICE_TOP=""; my $OLD_NOTICE_TOP="

06 December: DroSpeGe Sequence Link Change Notice

"; use vars qw( $propsOk %parts $part $dbprops $dbname $saved $savetx $collectAlign $subjectMap $chr $bstart $bend @scafloc $atdefline $inalign $wantMap $inscore $mapu %savemaploc %maporder ); # use lib('/bio/argos/perl/lib/'); # debug BEGIN { $propsOk= eval { require euGenes::Properties; }; $propsOk= $propsOk && ($@ == 0); } my $wwwblasthome='.'; # for now; fix to use ARGOS_ROOT/systems/blast/Bin/ my $BLASTCMD= "/usr/bin/nice -n +17 $wwwblasthome/blast.REAL"; my $jsheader=''; # see init my $org= 'dmel'; my @orgs= (); my %orgpath= (); #?? for blastset/subdir for release versions my %speciesNames=( # %spa, not used now? dana => "Drosophila_ananassae", dere => "Drosophila_erecta", dgri => "Drosophila_grimshawi", dmel => "Drosophila_melanogaster", dmel5 => "Drosophila_melanogaster rel 5", dmel_r322 => "Drosophila_melanogaster rel 3.2", dmelhet => "Drosophila_melanogaster heterochromatin", dmoj => "Drosophila_mojavensis", #dmov => "Drosophila_mojavensis", dper => "Drosophila_persimilis", dpse => "Drosophila_pseudoobscura", dsec => "Drosophila_sechellia", dsim => "Drosophila_simulans", dvir => "Drosophila_virilis", dwil => "Drosophila_willistoni", dyak => "Drosophila_yakuba", aphid => "Acyrthosiphon_pisum", acyr => "Acyrthosiphon_pisum", acyr1 => "Acyrthosiphon_pisum", ); # dang fix cuz fasta header lacks species= but for chromosome ! my %sppchrmap = ( dyak => '^chr(X|2L|2R|3L|3R|4|U)', #+ _random dsim => '^chr(X|2L|2R|3L|3R|4|U)', #+ _random dana => '^(contig|scaffold)', dmoj => '^(contig|scaffold)', dvir => '^(contig|scaffold)', dere => '^(contig|scaffold)', dper => '^scaffold', dsec => '^scaffold', dgri => '^scaffold', dwil => '^scaffold', aphid => '^(scaffold|SCAFFOLD)', acyr => '^(scaffold|SCAFFOLD)', acyr1 => '^(scaffold|SCAFFOLD)', dmel => '^(X|2L|2R|3L|3R|4)$', # safe dmel5 => '^(X|Y|2|3|4|U|dmel_mito)', dmelhet => '^(\w+h|Y)$', # clash w/ dpse other U dpse => '^(2|3|4_group.*|XR_group.*|XL_group.*)$', #|U ); my %orgmaps = ( dsim => 1, dyak => 1, dgri => 1, dana => 1, dmoj => 1, dvir => 1, dere => 1, aphid => 1, acyr => 1, acyr1 => 1, dper => 1, dsec => 1, dwil => 1, dmel => 1, dmel5 => 1, dmelhet => 1, dmel_r322 => 1, dpse => 1, ); #my $mysppdefpatt = join('|',sort keys %orgmaps); my $mysppdefpatt = join('|',sort{ length($b)<=>length($a) or $a cmp $b } keys %orgmaps); # deflinemap is checked 1st for gbrowse data set; needs to be certain match # >2R type=chromosome; loc=2R:1..20766785; ID=2R; species=dmel; release=r4.0 # >2R type=chromosome; loc=2R:1..20302755; ID=2R; release=r3.2.2; species=dmel # >3h type=chromosome; loc=3h:1..2955737; ID=3h; species=dmel; # >3h type=chromosome; loc=3h:1..2955737; ID=3h; species=dmel; release=r3_2h ?? # >3 type=chromosome; loc=3:1..19738957; ID=3; release=r1.03; species=dpse my %deflinemap = ( dmel => sub { local $_=shift; return(/species=dmel\b/ && /release=r4/);}, dmel5 => sub { local $_=shift; return(/species=Dmel\b/i && /release=r5/);}, dmel_r322 => sub { local $_=shift; return(/species=dmel\b/ && /release=r3/);}, dmelhet => sub { local $_=shift; return(/species=dmel\b/ && /loc=(\w+h|Y|U)/);}, dpse => sub { local $_=shift; return(/species=dpse\b/ && /release=r1/);}, ##acyr1 => sub { local $_=shift; return(/species=acyr\b/ && /release=1/);}, ); ## check for cgi in WWW_BLAST_TYPE = dmelhet, other db name ## but have to leave stdin for blast.real to read below .. ## call this cgi with param, blast.cgi?db=dmelhet.db ? my $qs=$ENV{QUERY_STRING}; if (!$qs && @ARGV) { $qs = join(" ",@ARGV); } #?? my($testBlastOut) = ($qs =~ m/blastout=([^\s;,]+)/); if ($qs =~ m/(org|species)=([\w\.\-]+)/) { $org= $2; } my $dbname= ($qs =~ m/db=([\w\.\-]+)/) ? $1 : "dbsets"; $ENV{'BLASTDB'}= $dbname; # point blast.REAL to data $ENV{'DEBUG_COMMAND_LINE'}= "TRUE" if $debug; # for debug output of CGI call data to /tmp/__web.in ## need to read input cgi data; collect species selections and build .nal, .pal files ## for blast.real to use (then rewrite cgi data ??) ## ? no don't build .nal/.pal but run sequential blasts on each spp db ??? ## -- but "all" == 12 spp blast runs; is .nal more efficient? my $backhost= $ENV{HTTP_BACKHANDPROXIED}; my $baseurl= $ENV{ARGOS_BASEURL} || ""; my $host=`hostname`; my $hosturl; $hosturl= $ENV{"HTTP_X_FORWARDED_HOST"}; unless ($hosturl =~ /\w/) { $hosturl= "http://".$ENV{'SERVER_NAME'}.":".$ENV{'SERVER_PORT'}; } my $nphcgi_url= $hosturl."/tmp/"; # FIXME: "/blast-cgi/TmpGifs/"; #also change all backhand /blast-cgi/ to /blast/ == non-backhand path to same ####my $baseurl= "/blast/"; # use master computer or $hosturl ? my $missinglink= $baseurl.'/cgi-bin/missinglink?'; my $webtmpdir = $baseurl."/tmp"; my $realtmpdir = $ENV{DOCUMENT_ROOT}."/tmp"; my $getsppfastaUrl= $baseurl.'/cgi-bin/getsppfasta?'; # drospege only? my $project='DroSpeGe'; ## need some ENV options for this! my $isGbrowse_fb= 0; my $gbrowsebaseurl= $baseurl."/cgi-bin/gbrowse" ; my $gbrowseimgurl= $baseurl."/cgi-bin/gbrowse_img" ; ## FIXME test. # my $seqfetchurl= $baseurl.'/cgi-bin/gbrowse/$org/\?plugin=BatchDumper;BatchDumper.fileformat=embl;BatchDumper.format=text;plugin_do=Go;$loc'; my $seqfetchurl= $gbrowsebaseurl; ## add user choices of format? BatchDumper.format=text ?? my $seqparam_em = ";plugin=BatchDumper;BatchDumper.fileformat=embl;plugin_do=Go"; my $seqparam_gb = ";plugin=BatchDumper;BatchDumper.fileformat=genbank;plugin_do=Go"; my $seqparam_fa = ";plugin=BatchDumper;BatchDumper.fileformat=fasta;plugin_do=Go"; my $seqparam_gff= ";plugin=BatchDumper;BatchDumper.fileformat=gff3;plugin_do=Go"; my $seqparam_flip= ";BatchDumper.flip=1"; ## my $gbrowsebaseurl= $baseurl."/cgi-bin/gbrowse_fb" ; #my $isGbrowse_fb= 1; my $gbrowseurl= $gbrowsebaseurl.'/dmel/?'; my $maphit_label= "BLAST HIT on Genome Map"; my $dbpropname = $ENV{DOCUMENT_ROOT}."/../conf/dbUrls.properties"; # messy my %dbaliases = ( # URL_emb => 'URL_EMBL', ); ## instead of these urls, rely on euGenes::Properties() and conf/dbUrls.properties my $gburl='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search\&db=Nucleotide\&doptcmdl=GenBank\&tool=euGenes'; my $esturl='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search\&db=nucest\&doptcmdl=GenBank\&tool=euGenes'; my $pasaurl='http://server2.eugenes.org/cgi-bin/PASA/cgi-bin/cdnaasmbl_report.cgi'; my $arpurl='http://insects.eugenes.org/genepage/arthropod/'; # /genepage/arthropod/ARP1_G3585 # http://www.expasy.org/cgi-bin/sprot-search-de?Q9V494 #my $swurl='http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-e+[SWALL-acc:%s]'; my $swurl='http://srs.ebi.ac.uk/cgi-bin/wgetz?-e+[SWALL-id:%s]'; # my $egidpat='(AG|AT|CE|FB|HU|MG|SG|ZF)gn\d+'; my $idpat='FB\w\w\d+'; my $idurl=$baseurl.'/cgi-bin/fbidq.html?'; my $inorg= $org; my %idmap = (); # see initdata; this is hash of defline+ patterns and urls to make initdata(); #----------------------------------------- # Run the sucker ............. # but do some checking; some idiots push in a whole proteome of 16,000 genes $|=1; # unbuffer print? #print "Expires: ".expiretime()."\n"; my $needcontype= 1; # print "Content-type: text/html\n\n"; #! not if type = xml, others? my $nentries=0; # count fasta > lines; should count bases instead my $MAXENTRIES=30; my ($atorg, $athidealign); #my $formin= "$realtmpdir/sppblast$$.in"; my $formin= "/tmp/sppblast$$.in"; unless($testBlastOut){ open(IN,">$formin"); while () { if (/^Content-Disposition: form-data;/) { $atorg= (m/name="org"/); $athidealign= (m/name="hide_alignments"/); } elsif ($atorg && /^(\w\S+)/) { # need way to pass blastset/subdir + orgname for releases push(@orgs,$1); #? if (/^(\w\S+)\s+(\S+)/) { $orgpath{$1}= $2; } # need orgrel, reldir, orgname # or use "org_release" syntax and split on _ below } elsif ($athidealign && /^(\w\S+)/) { #? add special form opt: Content-Disposition: form-data; name="hide_alignments" $dohidealigns= $1; $saveTillEnd= $dohidealigns; } elsif(/^>/) { $nentries++; } print IN $_ unless($atorg); ## drop this non-ncbi form input ?? } close(IN); } if($nentries>$MAXENTRIES) { print "Content-type: text/html\n\n"; print "

Whoa there ... I count $nentries sequences, a bit more than I can handle.

Please consider running this blast on your computer instead of here.\n See DroSpeGe data/ section for most of the blast data sets\n "; exit(0); } ## change ^^ to rewrite DATALIB or DATABASE fields to ## add $org/dbname -- blast will work on them all then ## blast.rc must have all $org/dbname entries though ## and have to pull $org from >deflines for gbrowseurl ## aug05 change: drop leading "dspp/" from dbname, wwwblast can no longer handle it my $inlib=0; my @webin=(); my $skipit=0; open(IN,"$formin"); my @in= ; close(IN); foreach (@in) { if (/^Content-Disposition: form-data;/) { $inlib= (m/name="DATALIB"/); } elsif ($inlib && m,^org/(\S+),) { my $lib=$1; my $orglibs=''; foreach my $o (@orgs) { my $onm= $o; $onm=~s/_.*$//; ##old##$orglibs .= "$o/$onm-$lib "; $orglibs .= "$onm-$lib "; ##new } $_= $orglibs."\n"; #s,$lib,$orglibs,; } elsif ($inlib && m,^org_misc/(\S+),) { # handle multiple datalibs: na_genbank na_refseq .. while(s,org_misc/(\S+),ORGMISC,){ my $lib=$1; my $orglibs=''; foreach my $o (@orgs) { my $onm= $o; $onm=~s/_.*$//; #old##$orglibs .= "${o}_misc/$onm-$lib "; $orglibs .= "$onm-$lib "; #new } s,ORGMISC,$orglibs,; } # $_= $orglibs."\n"; #s,$lib,$orglibs,; } ##push(@webin,$_) unless($skipit); } ## print @webin instead ? need to drop non-ncbi 'org' form entries ? open(IN,">$formin"); print IN @in; close(IN); my $saveinh= undef; my $blastsave= "$realtmpdir/sppblast$$.out"; if ($debug>1) { open(SAVEBL,">$blastsave"); $saveinh= *SAVEBL; } # print "sppblast.cgi orgs=@orgs tmpf=$formin

\n" if $debug && !$needcontype; # print "

Results for "; # foreach my $org1 (@orgs) { print "$speciesNames{$org1} .. \n"; } # print "

\n"; $inorg= $orgs[0]; # foreach my $org1 (@orgs) if (1) { if ($testBlastOut) { open(R, $testBlastOut); $dohidealigns=1; $saveTillEnd= $dohidealigns; } else { open(R,"cat $formin | $BLASTCMD |"); } # this reads cgi input #was#else { open(R,"cat $formin | $wwwblasthome/blast.REAL|"); } # this reads cgi input # $gbrowseurl= ($orgmaps{$org1}) ? "/cgi-bin/gbrowse_fb/$org1?" : ''; ## merge in ALTERNATE_blast_output if ($domapout) { MAPS_blast_output(*R); } else { BASIC_blast_output(*R); } close(R); } if($needcontype) { print "Content-type: text/html\n\n"; $needcontype= 0; } # error? print "sppblast.cgi orgs=@orgs tmpf=$formin

\n" if $debug && !$needcontype; unlink($formin) unless $debug; close($saveinh) if $saveinh; exit; #----------------------------------------- ## fixme ! config file? ## >2R type=chromosome; loc=2R:1..20302755; ID=2R; ** release=r3.2.2; species=dmel sub checkorg { my($inorg,$chr)= @_; ## check on defline info - can this be certain to give gbrowse 'org' if ($atdefline) { foreach my $o (sort keys %deflinemap) { my $defpatt= $deflinemap{$o}; if (ref($defpatt) =~ /CODE/) { return $o if eval{ &{$defpatt}($atdefline); }; } elsif ($atdefline =~ m/$defpatt/) { return $o; } } } if ($inorg eq 'dmel' && $chr =~ /h|Y|U/) { return 'dmelhet'; } elsif (!$inorg) { foreach my $o (sort keys %sppchrmap) { my $ochrpatt= $sppchrmap{$o}; if ($chr =~ m/$ochrpatt/) { return $o; } } } return lc($inorg); } sub gbrowseurl { my ($inorg,$chr)= @_; # need also $atdefline for release= my $org= checkorg($inorg,$chr); # need all fasta header here - check for release= return undef unless($org); my $gbrowseurl= ($orgmaps{$org}) ? $gbrowsebaseurl."/$org/?" : ''; ## my $gbrowseurl= ($orgmaps{$org}) ? $baseurl."/cgi-bin/gbrowse_fb/$org/?" : ''; return $gbrowseurl; } sub gbrowseFromOrgChrStartStop { my ($inorg,$chr,$bstart,$bend,$strand) = @_; my $url=''; my $gbrowseurl= gbrowseurl($inorg,$chr); if ($bend && $gbrowseurl) { # my $url= $maplink; $url= eval("$url"); ## bad - what is eval syntax? if ($isGbrowse_fb){ $url="${gbrowseurl}name=$chr:$bstart..$bend;doexpand=1;ft=blast,Blast,,$bstart..$bend"; ## gbrowse_fb } else { ##my($vstart,$vend)= ($bstart-1000,$bend+1000); ## dont want this for seqparam my($vstart,$vend)= ($bstart,$bend); ## dont want this for seqparam $url="${gbrowseurl}name=$chr:$vstart..$vend;add=$chr+match+Blast+$bstart..$bend"; ## gbrowse #bad:need vend;add=# $url =~ s/;add=/;flip=1;add=/ if($strand<0); $url =~ s/name=/flip=1;name=/ if($strand<0); } } return $url; } #http://flybase.bio.indiana.edu/cgi-bin/gbrowse_fb2?name=X:3947906..3945387 #transcript_boundaries:(2L:6,330,987..6,338,497[+]) sub gbrowseFromBoundaries { my $line = shift; # @_; my $mapu= ''; return if ($line =~ m/type=chromosome/); # if ($line =~ s/(\w+:[\d,]+\.\.[\d,]+)/XcutX/) if ($line =~ m/(\w+\:[\d,]+\.\.[\d,]+)/) { my $loc= $1; my $chromo=$1 if ($loc =~ m/^(\w+):/); my $gbrowseurl= gbrowseurl($inorg,$chromo); return unless $gbrowseurl; $loc =~ s/,//g; my $url = "${gbrowseurl}name=${loc}"; # $line =~ s,XcutX,$chromePos,; $mapu= " Feature on Genome Map"; # chomp($line); $line .= $mapu; } return $mapu; } sub gbrowseFromGnomapLocation { my $line = shift; # @_; my $mapu= ''; return if ($line =~ m/type=chromosome/); # if ($line =~ s/(\w+:[\d,]+\.\.[\d,]+)/XcutX/) # /(-?\d+)(?:-|\.\.)(-?\d+)/ if ($line =~ m/([\w_-]+)\:(complement|join)\(([\d-]+)\..*\.([\d-]+)\)/) { my $chr = $1; my $compl = $2; my $start = $3; my $end = $4; my $gbrowseurl= gbrowseurl($inorg,$chr); return unless($gbrowseurl); my $url = "${gbrowseurl}name=$chr:${start}..${end}"; $mapu= " Feature on Genome Map"; } elsif ($line =~ m/([\w_-]+)\:([\d-]+)\.\.([\d-]+)/) { my $chr = $1; my $start = $2; my $end = $3; my $gbrowseurl= gbrowseurl($inorg,$chr); return unless($gbrowseurl); my $url = "${gbrowseurl}name=$chr:${start}..${end}"; $mapu= " Feature on Genome Map"; } return $mapu; } sub gbrowseFromId { my $line = shift; # @_; my $mapu= ''; return if ($line =~ m/type=chromosome/); my $gbrowseurl= gbrowseurl($inorg,$chr); return unless($gbrowseurl); if ($line =~ m/(FBgn\d+)/) { my $id = $1; my $url = "${gbrowseurl}id=$id"; $mapu= " Feature on Genome Map"; } return $mapu; } sub dehtml { my ($h)= @_; local $_= $h; s,<[^>]+>,,g; s,\&,&,g; s,\<,<,g; s,\>,>,g; return $_; } =head1 getDbUrl($dbname,$accession) read URL_ properties from new euGenes::Properties(file => $file) $u= $dbprops->get("URL_".$db); unless($u =~ s/\%s/$acc/) { $u.= $acc; } =cut sub getDbUrl { my($db,$acc)= @_; my $u=''; # ## -- this works now - requires added perl lib, dbprops data if ($propsOk) { unless($dbprops) { $dbprops= new euGenes::Properties( name => $dbpropname, debug => 0); #, debug => $debug ); ##? add any locally define URL aliases like 'emb' ?? ##? pass as param to Properties( aliases => \%dbaliases ) foreach my $dba (keys %dbaliases) { $dbprops->set( $dba, $dbprops->getvalue( $dbaliases{$dba}) ) unless( $dbprops->get( $dba ) ); } # $dbprops->list() if $debug; # debug } $u= $dbprops->get("URL_".$db); $u= $dbprops->get("URL_".uc($db)) unless($u); } return "$missinglink$db:$acc" unless($u); unless($u =~ s/\%s/$acc/) { $u.= $acc; } return $u; } =head1 evalUrl($line, $pat, $replace) add hyperlinks for fasta >defline ids in blastout blastdb specific; messy.... =cut sub evalUrl { my($v,$p,$u,$inpart)= @_; if ($u =~ /\%s/) { $u =~ s/\%s/\$1/; } if ($v =~ m,^>, && $p =~ m/^\^>/) { $p =~ s/\^[^\(]+\(/\(/; ## cant do this match } my $ok= 0; $inpart ||=""; my $c=','; $c='!' if ($p =~ m/,/ || $u =~ m/,/); local $_= $v; if ($u =~ /getDbUrl/) { # my $mu= 'm'.$c.$p.$c; $ok= eval "$mu;"; # not good... $ok= ($_ =~ m!$p!); if ($ok) { my($db,$da,$d3,$d4,$d5)= ($1,$2,$3,$4,$5); #.. really need to parse $p & $u for $1,$2,... ## see below hack fix for dspp_ => Dspp- ; use both here ?? ## s/\bd(\w\w\w)_/D$1-/; .. do here; safer ## need better key the defline has species prefix; 'gnl|Spp-dbinfo|itemId ...' my $du=''; if ($db =~ /^gnl$/ && ($da =~ m/^($mysppdefpatt)/ || $da =~ m/^(D\w\w\w)[_-]/i) ) { my $org= lc($1); ## change this to have map of those with 'getsppfastaUrl' if ($orgmaps{$org}) { $ok= 0; $du = $missinglink; } else { $du= "${getsppfastaUrl}$db/$da/$d3"; } # a hack, but works - problems? } else { $du= getDbUrl($db,$da); # if $ok } # print "$ok = eval: getDbUrl($db,$da)= $du\n"; unless ($du =~ m/$missinglink/) { $u =~ s/\&getDbUrl\([^)]+\)/$du/; $u =~ s/\$1/$db/; #if $u =~ m/\$1/; $u =~ s/\$2/$da/; #if $u =~ m/\$2/; $u =~ s/\$3/$d3/; # if $u =~ m/\$3/; $u =~ s/\$4/$d4/; # if $u =~ m/\$4/; $u =~ s/\\//g; $ok= ($_ =~ s!$p!$u!); } } } else { if($inpart eq "hitlist") { return ($_,0) if ($_ =~ m!$p\.\.\.!); } my $pu= 's'.$c.$p.$c.$u.$c; #,g ?? ,e? $ok= eval "$pu;"; } return ($_,$ok); } =head1 basicUrls do simple hyperlinks =cut sub basicUrls { my ($h,$tx)= @_; local $_= $h; unless($parts{db}) { s,,<TITLE>$project ,i;} # if $inheader if($parts{end}) { s,(</BODY), <hr><small>Run on computer $host</small>\n $1,i; } s,nph-viewgif.cgi\?,$nphcgi_url,g; s,<IMG SRC="images/\?,$nphcgi_url,g; if ($parts{defline} && $dropgo) { s,\bGO\([^)]+\),,g; } s,($idpat),<A href="$idurl$1">$1</A>,g; my $didu= 0; foreach my $k (sort keys %idmap) { my $v= $idmap{$k}; my ($p,$u,$pt,$db)= @$v; next unless ($db eq 'all' || $dbname =~ m/$db/); my $ok= 0; my $atpart; foreach my $ps (sort keys %parts) { if ($parts{$ps} && $pt =~ m/$ps/) { $ok=1; $atpart=$ps; last; } } next unless ($ok); my $te; ($te,$ok)= evalUrl($_,$p,$u,$atpart); $_= $te if $ok; $didu++ if $ok; ##last if $ok; # dont do this? } if ($wantMap && m/\bloc=/) { $mapu = gbrowseFromGnomapLocation($_); if ($mapu) { $wantMap=0; $_ =~ s/(.)$/$1 $mapu/; } } elsif ($wantMap && m/boundaries:/) { $mapu = gbrowseFromBoundaries($_); if ($mapu) { $wantMap=0; $_ =~ s/(.)$/$1 $mapu/; } } if ($wantMap && m/FBgn/) { $mapu = gbrowseFromId($_); # $wantMap=0 if ($mapu); # save in case boundaries shows up later in alignment } if ($inscore && $wantMap && $mapu) { $wantMap=0; print "$mapu\n"; } return $_; } =head1 BASIC_blast_output simple parsing of blast output =cut sub BASIC_blast_output { my $inh= shift; while (<$inh>) { if ($needcontype) { if ($_ =~ /xml/i) { print "Content-type: text/xml\n\n"; } else { print "Content-type: text/html\n\n"; } $needcontype= 0; } s,<TITLE>,<TITLE>$project ,i; # if $inheader s,(</BODY),<hr><small>Run on computer $host </small>\n$1,i; if (m/^>/) { $inalign=1; $inscore=0; $wantMap=1; $mapu=''; } # each new score alignment if (m/^\s*Score =/) { $inscore=1; } # have passed >defline unless( m,<area,) { s,($idpat),<A href=\"$idurl$1\">$1</A>,g; s,nph-viewgif.cgi\?,$nphcgi_url,g; s,<IMG SRC="images/\?,$nphcgi_url,g; if ($wantMap && m/\bloc=/) { $mapu = gbrowseFromGnomapLocation($_); if ($mapu) { $wantMap=0; $_ =~ s/(.)$/$1 $mapu/; } } elsif($wantMap && m/boundaries:/) { $mapu = gbrowseFromBoundaries($_); if ($mapu) { $wantMap=0; $_ =~ s/(.)$/$1 $mapu/; } } if($wantMap && m/FBgn/) { $mapu = gbrowseFromId($_); # $wantMap=0 if ($mapu); # save in case boundaries shows up later in alignment } if ($inscore && $wantMap && $mapu) { $wantMap=0; print "$mapu\n"; } } print ; } } =head1 MAPS_blast_output collect all output alignment chunks, read chromosome base start/end of match and add genome map link with blast hit feature (requires modified gbrowse_fb) =cut use vars qw($theQuery $theDatabase); sub MAPS_blast_output { my $inh= shift; my $inpre= 0; my $cutpre= 0; my $subjectid= 0; my $lineno= 0; my $subject_defline=''; $saved=''; $theDatabase= $theQuery=''; my $lastorg= 0; while (<$inh>) { ## hack fix 'dspp_xxx' => 'Dspp-xxx' ; see evalUrl !????? ## s/\bd(\w\w\w)_/D$1-/; my $inl= $_; $lineno++; my $tx= dehtml($_); print $saveinh $_ if ($saveinh); if ($needcontype) { if ($_ =~ /xml/i) { print "Content-type: text/xml\n\n"; } ##elsif ($_ =~ /html/i) { print "Content-type: text/html\n\n"; } else { print "Content-type: text/html\n\n"; } $needcontype= 0; } if ($dohidealigns && $inl =~ m,<BODY,i) { $inl =~ s,<BODY,$jsheader\n<BODY onload="startPage()" ,i; } if($NOTICE_TOP && $inl =~ m,<BODY,i) { $inl .= $NOTICE_TOP; } if ($tx =~ m,^Database:\s+(\S+),) { $dbname= $1; $theDatabase= $dbname; ## is this complete ?? $parts{db}++; $part='db'; ## can be many; if have several >query inputs printsaved($saved) if ($saved); # $saved=''; $savetx=''; $collectAlign= 1; #?? try always if ($dbname =~ m,scaffold,) { $subjectMap= 'scaffold'; # need also scaffold start/end to adjust Sbjct start/end $collectAlign= 1; } elsif ($dbname =~ m,whole_genome_genomic|chromosome,) { $subjectMap= 'genome'; $collectAlign= 1; } elsif ($dbname =~ m,translation,) { $subjectMap= 'protein'; $collectAlign= 1; $doMultiMap=0; } else { $subjectMap= 'other'; $collectAlign= 1; } } elsif ($tx =~ m,^Query=\s+(\S+),) { $theQuery= $1; $parts{atquery}=1; $part='atquery'; } elsif ($tx =~ m,Sequences producing significant alignments:,) { $parts{hitlist}=1; $part='hitlist'; # ^^ line before " Score E" belongs here } elsif ($tx =~ m,^>(\S+),) { #><a name = 480></a>gadfly|SEG:AE003497|gb|AE003497|arm:X [14518871,14794521] $subject_defline= $1; $parts{hitlist}=0; $parts{align}=0; $parts{defline}=1; $part='defline'; $parts{aligns}++; $subjectid++; if ($dohidealigns) { (my $subpatt= $subject_defline) =~ s/[|]/./g; $inl =~ s/$subpatt//; $inl =~ s/>//; # <a name = 25013></a> .. cut this out and wrap around defline my $aname= $1 if ($inl =~ s,<a name = (\d+)></a>,,i); my $sid= sprintf("S%04d",$subjectid); ## add div wrapper for each subject section (my $sbjlabel= $subject_defline) =~ s/^gnl\|//; $sbjlabel =~ s/[|]/: /g; my $hiddenspan= ""; ## are in pre mode here ?? $hiddenspan .= "<a name=\"$aname\" class=\"tctl\">" if ($aname); $hiddenspan .= "<span id=\"${sid}_show\" onclick=\"visibility('${sid}','on')\" class=\"ctl_visible\">" ."<img src=\"/icons/right.png\" alt=\"show\"/> "; $hiddenspan .= "<b>$sbjlabel</b> </span>"; ## ."<img src=\"/gbrowse/images/buttons/plus.png\" alt=\"show\"/> "; $hiddenspan .= "<span id=\"${sid}_hide\" onclick=\"visibility('${sid}','off')\" class=\"ctl_hidden\"> " ."<img src=\"/icons/down.png\" alt=\"hide\"/> "; $hiddenspan .= "<b>$sbjlabel</b> </span>"; ## ."<img src=\"/gbrowse/images/buttons/minus.png\" alt=\"hide\"/> "; $hiddenspan .= "</a> " if ($aname); $hiddenspan .= "<!--maplink_$subject_defline--> <br>\n" ."<div id=\"${sid}\" class=\"el_hidden\"> <table><tr><td><pre> "; $inl= $hiddenspan . $inl; if ($parts{insubdiv}>0) { $parts{insubdiv}--; unless(0) { ##$saved =~ s,\n</PRE>,\n</pre></td></tr></table></div></PRE>,) $inl = "</pre></td></tr></table></div>\n" . $inl; } } while ($inpre>0) { $inpre--; $inl= "</pre>" . $inl; } $parts{insubdiv}++; $cutpre= 1; } } elsif ($tx =~ m,^(Query|Sbjct):,) { $parts{align}=1; $part='align'; } elsif ($tx =~ m,^\s+Database:, ) { # can be many .. || $tx =~ m,^\s+Posted date, ##while ($parts{insubdiv}>0) { $parts{insubdiv}--; $inl = "</div>\n" . $inl;} while ($parts{insubdiv}>0) { $parts{insubdiv}--; unless(0) { ##$saved =~ s,\n</PRE>,\n</pre></td></tr></table></div></PRE>,) $inl = "</pre></td></tr></table></div>\n" . $inl; } } if ($part ne 'end') { while ($inpre>0) { $inpre--; $inl = "</PRE>" . $inl; } } $cutpre= 0; $parts{end}=1; $part='end'; $parts{aligns}= 0; ##NO - same database##$subjectMap= $collectAlign= 0; printsaved($saved) if ($saved); } # note: was m,,i << only UPcase PRE need chomping/counting ? if ($inl =~ m,^<PRE>,) { if ($cutpre) { $inl =~ s,<PRE>,,i; } else { $inpre++; } } elsif ($inl =~ m,^</PRE>,) { if ($cutpre) { $inl =~ s,</PRE>,,i; } else { $inpre--; } } # some html errs; messy for 2+ queries my $dosave=''; if ($part eq 'atquery') { $dosave='.' if ($saveTillEnd); } if ($part eq 'hitlist') { $dosave='.' if ($saveTillEnd); #? collect for table-izing ? # $collectHitlist= 1; if ($tx =~ m/^(\S+)/) { my $sbj= $1; $inl =~ s,\n,<!--maplink_$sbj-->\n,; } } if ($parts{aligns}) { $savetx .= $tx; chomp($savetx); if ($tx =~ m/^>(\w+)/) { # defline - in alignment my $defid= $1; my $dbid= undef; printsaved($saved) if ($saved); $inalign=1; $inscore=0; $bstart= $bend= 0; $wantMap=1; $mapu=''; $savetx = $tx; chomp($savetx); $inorg= $lastorg if (@orgs > 1); # dont know which organism match we are in ## fixme for '>gnl|dyak_wu040407|chrX' deflines if ($defid =~ m/^gnl|local|gi/ && $tx =~ m/^>\w+\|([\w_.-]+)\|([\w_.-]+)/) { ($dbid,$defid)= ($1,$2); if ($dbid =~ m/^($mysppdefpatt)/i) { $inorg= lc($1); } ## HACK, fixme elsif ($dbid =~ m/^(d\w\w\w)[_.-]/i) { $inorg= lc($1); } ## HACK, fixme } ##if ($tx =~ m/\btype=protein/i) { $isamino= 1; } if (0) { #? this is bad now for mapview w/ fly gene id subject if ($tx =~ m/\bloc=([\w_-]+)\:/i) { $defid= $1; } elsif ($tx =~ m/\barm:(\w+)/i) { $defid= $1; } } # dang; only chromosome fasta now has species=xxx; need it here for map views! # use species-chr map as for dmelhet ? $tx =~ s/\bto_species/to_org/; # Hack fix if ($tx =~ m/\bspecies=([\w_-]+)/i) { $inorg= lc($1); } #do for gbrowse only? # $inorg= checkorg($inorg,$defid); $atdefline= $tx; # global; more info for gbrowse url if ($collectAlign) { @scafloc= (); $chr= $defid; $dosave='.'; ## nov04 - do this for all with loc=chr:nnn ? # >AE003455 type=scaffold; loc=2R:16750460..17041709; ID=AE003455; Feature on Genome Map # name=AE003455; db_xref=Gadfly:AE003455.3,Gadfly:AE003455; # len=291250 # if ($line =~ m/([\w_-]+)\:(complement|join)\W([\d-]+)\.\..*([\d-]+)\)/) # elsif ($line =~ m/([\w_-]+)\:([\d-]+)\.\.([\d-]+)/) #$subjectMap =~ /scaffold/ && if ($subjectMap !~ /genome/ && $tx =~ m/\bloc=([\w_-]+)\:(complement|join)\(([\d]+)\..*\.([\d]+)\)/s) { @scafloc= ($1,$3,$4); } elsif ($subjectMap !~ /genome/ && $tx =~ m/([\w_-]+)\:([\d]+)\.\.([\d]+)/s) { # dropped [-] @scafloc= ($1,$2,$3); } ## get arm loc: >gadfly| SEG:AE003732 |gb|AE003732 |arm:3R [16579446..16803974] elsif ($subjectMap =~ /scaffold/ && m!\barm:(\w+)\s+\[(\d+)\D+(\d+)\]!) { @scafloc= ($1,$2,$3); } } # $saved=$_; next; } elsif ($tx =~ m/^\s*Score =/) { # have passed >defline printsaved($saved) if ($saved); # save values: bits/expect, strand for mapview ; 'Score = 753 bits (380), Expect = 0.0' $inscore=1; $bstart= $bend= 0; $parts{defline}=0; if ($collectAlign) { $dosave= '.'; } #next; } elsif ($subjectMap && $tx =~ m,Sbjct:\s+(\d+)[^\d]+(\d+)\s*$,) { $bstart= $1 if ($bstart == 0); $bend= $2; # my $hsploc= [$inorg,$chr,$bstart,$bend]; # ? save each for addMapHitLink ? $savemaploc{$subject_defline} = [] unless($savemaploc{$subject_defline}); push( @{$savemaploc{$subject_defline}}, [$inorg,$chr,$bstart,$bend] ); $maporder{$subject_defline}= $lineno unless($maporder{$subject_defline}); #$savemaploc{$subject_defline} = [$inorg,$chr,$bstart,$bend]; ## $subject_defline or subjectid ? $savetx =~ s/\bto_species/to_org/; # Hack fix if ($savetx =~ m/\bspecies=([\w_-]+)/i) { $inorg= lc($1); } if (!@scafloc && $subjectMap !~ /genome/ && $savetx =~ m/\bloc=([\w_-]+)\:(complement|join)\(([\d]+)\..*\.([\d]+)\)/s) { # dropped [-] @scafloc= ($1,$3,$4); } elsif (!@scafloc && $subjectMap !~ /genome/ && $savetx =~ m/([\w_-]+)\:([\d]+)\.\.([\d]+)/s) { # dropped [-] in loc @scafloc= ($1,$2,$3); } } } unless( m,<area, ) { $inl= basicUrls($inl,$tx); } $lastorg= $inorg; if ($collectAlign && ($saved || $dosave)) { $saved .= $inl; next; } print $inl; } allSubjectHitLinks() if ($saveTillEnd); multiSppBlastMap() if ($doMultiMap && $saveTillEnd); $saveTillEnd= 0; printsaved($saved) if ($saved); } use vars qw( %savemultiurl ); sub addSeqLinks { my($sequrl,$htmlinsert)= @_; $htmlinsert='' unless $htmlinsert; # $sequrl =~ s,$gbrowsebaseurl,$seqfetchurl,; ## same base url my $gffurl = $sequrl; # gff shows added feats ## want to add BatchDumper.flip=1 for strand=- ; dec06 ## my $seqflip= ($sequrl =~ m/flip=1/) ? $seqparam_flip : ""; $sequrl =~ s,;add=[^;]+,,; # these dont show in batchdump ?? $sequrl =~ s,flip=1;,, if($seqflip); ## want to remove +-1000 expansion from sequrl? vstart..vend is expanded # $url="${gbrowseurl}name=$chr:$vstart..$vend;add=$chr+match+Blast+$bstart..$bend"; ## gbrowse $htmlinsert .= "  <a href=\"${sequrl}${seqparam_fa}${seqflip}\">FastA</a>"; $htmlinsert .= " <a href=\"${sequrl}${seqparam_gb}${seqflip}\">GenBank</a>"; $htmlinsert .= " <a href=\"${sequrl}${seqparam_em}${seqflip}\">EMBL</a>"; $htmlinsert .= " <a href=\"${gffurl}${seqparam_gff}\">GFF</a>"; return $htmlinsert; } sub mapexUrl { my($url)= @_; my($vb,$ve)= ($url =~ m/(\d+)\.\.(\d+);add=/); if ($vb && $ve) { ($vb,$ve)= ($vb-1000, $ve+1000); $vb=1 if($vb<1); $url =~ s/(\d+)\.\.(\d+);add=/$vb\.\.$ve;add=/; } return $url; } sub allSubjectHitLinks { foreach my $sb (sort{$maporder{$a}<=>$maporder{$b}} keys %maporder) ## foreach my $sb (sort keys %savemaploc) # gets out-of-order map urls to multimap { my ($l_chr); my $url=''; my $strand=1; my($gnl,$sppid,$scaf)= split(/[|]/,$sb); my $sbhits= $savemaploc{$sb}; next unless(ref($sbhits)); foreach my $hit (@$sbhits) { my($inorg,$chr,$bstart,$bend)= @$hit; ##my $strand='+'; if ($bend < $bstart) { ($bstart,$bend) = ($bend,$bstart); $strand=-1; } #? add strand to url?? if(!$url) { #? $bend= 0 if ($subjectMap =~ /protein/); # 08jul patch for gbrowsenew ?? $url= gbrowseFromOrgChrStartStop($inorg,$chr,$bstart,$bend,$strand); $l_chr= $chr; } elsif ($chr eq $l_chr) { #my($vstart,$vend)= ($bstart-1000,$bend+1000); #$url= $url . ";add=$chr+match+Blast+$bstart..$bend"; ## gbrowse; add score?/direction ##$url="${gbrowseurl}name=$chr:$vstart..$vend;add=$chr+match+Blast+$bstart..$bend"; ## gbrowse #$url .= ",$bstart..$bend"; $url .= ",$bstart..$bend" unless(length($url)>500); my($vb,$ve)= ($url =~ m/(\d+)\.\.(\d+);add=/); if ($vb && $ve) { my $update= 0; my $dist1= $ve - $vb; #if ($bstart < $vb) { $vb= $bstart-1000; $update=1; } #if ($bend > $ve) { $ve= $bend+1000; $update=1; } if ($bstart < $vb) { $vb= $bstart; $update=1; } if ($bend > $ve) { $ve= $bend; $update=1; } my $dist2= $ve - $vb; $update=0 if ($dist2 > $dist1 + 300000); # is low qual, distant hsp ? if ($update) { $url =~ s/(\d+)\.\.(\d+);add=/$vb..$ve;add=/; } # ^^ this append is bad for hits far away .. check distance before adding? } } } if ($url) { $url =~ s/:(\d+)\.\.(\d+);add/;add/ if ($subjectMap =~ /protein/); # 08jul patch for gbrowsenew ?? $sppid= lc($sppid); $savemultiurl{$sppid}= $url if($sppid && !$savemultiurl{$sppid}); ## replace ^ if besthit-order is before this one (my $sbpatt=$sb) =~ s/[|]/./g; my $mapurl= mapexUrl($url); my $insert= "    <a href=\"$mapurl\">MapView</a>"; $insert .= addSeqLinks($url,""); unless($saved =~ s,<!--maplink_$sbpatt-->,$insert,g) { my $sbat= index($saved,$sb); $sbat = index($saved,"\n",$sbat) if $sbat; # use maplink ? if ($sbat) { $saved = substr($saved,0,$sbat-1) . $insert . substr($saved,$sbat-1); } } } } } =item multispp blast view try multi-species gbrowse view of blast hits ?? for top subj of each spp, call gbrowse_img (?) -- add params: e=1;t=tblastDM+blastndmel; (tracks to show; embed in html) http://melon.bio.indiana.edu:7151/species/cgi-bin/gbrowse_img/dmoj/?e=1;name=scaffold_6540:15787972..15795263;add=scaffold_6540+match+Blast+15791803..15791862,15791803..15791894,15791521..15791580,15791521..15791592,15794204..15794263,15794204..15794301,15788972..15789027,15794092..15794151,15794092..15794169,15789154..15789207,15789300..15789356,15793973..15794016,15794329..15794388,15794329..15794418,15791349..15791386,15791976..15792016,15793318..15793361 http://melon.bio.indiana.edu:7151/species/cgi-bin/gbrowse/dvir/?name=scaffold_13047:5411235..5418234;add=scaffold_13047+match+Blast+5414853..5414912,5414853..5414944,5414558..5414617,5414558..5414628,5417175..5417234,5417175..5417279,5412235..5412290,5417065..5417124,5417065..5417142,5412399..5412452,5412559..5412615,5416942..5416985,5414396..5414433,5415030..5415061 http://melon.bio.indiana.edu:7151/species/cgi-bin/gbrowse/dgri/?name=scaffold_25013:5134448..5141418;add=scaffold_25013+match+Blast+5137773..5137832,5137742..5137832,5138091..5138150,5138080..5138150,5140363..5140418,5135448..5135507,5135402..5135507,5135584..5135643,5135545..5135643,5140187..5140240,5140012..5140060,5135686..5135729,5136541..5136599,5138293..5138330,5135290..5135336,5137628..5137656 http://melon.bio.indiana.edu:7151/species/cgi-bin/gbrowse/dmoj/?name=scaffold_6540:15787972..15795263;add=scaffold_6540+match+Blast+15791803..15791862,15791803..15791894,15791521..15791580,15791521..15791592,15794204..15794263,15794204..15794301,15788972..15789027,15794092..15794151,15794092..15794169,15789154..15789207,15789300..15789356,15793973..15794016,15794329..15794388,15794329..15794418,15791349..15791386,15791976..15792016,15793318..15793361 =cut sub multiSppBlastMap { my @splist= (sort keys %savemultiurl); return unless(@splist); my $mmapfile= "sppblastmap$$.html"; open(MM,">$realtmpdir/$mmapfile") or return; my $mmout= *MM; my $insert="<big><b><a href=\"$webtmpdir/$mmapfile\">Multi-species BLAST Map View</a></b></big>\n"; my $at= 0; $at= index($saved,"Sequences producing significant alignments"); $at= rindex($saved,"\n",$at-1) if $at>0; $at= rindex($saved,"\n",$at-1) if $at>0; if ($at) { $saved= substr($saved,0,$at) . $insert . substr($saved,$at); } else { $saved= $insert.$saved; } ## $saved =~ s,(Sequences producing significant alignments), $insert $1,; my $splist= join(", ",@splist); # (sort keys %savemultiurl) print $mmout <<"EOF"; <html> <title> $project: Multi-species BLAST Map

Multi-species BLAST Hits Map

Query= $theQuery
Species databases= $splist

EOF foreach my $sppid (@splist) { # (sort keys %savemultiurl) my $url= $savemultiurl{$sppid}; (my $org = lc($sppid)) =~ s/[-_].*$//; #?? skip if no $speciesName ? my $species= $speciesNames{$org} || $org; $species =~ s/_/ /g; my $mapurl= mapexUrl($url); (my $imurl= $mapurl) =~ s,$gbrowsebaseurl,$gbrowseimgurl,; my $ftlist= "tblastDM"; ## FIXME if ($org =~ /^(dmel)/) { $ftlist="genespan"; } elsif ($org =~ /^(dpse)/) { $ftlist="gene"; } $imurl .= ";t=$ftlist;e=1;width=700"; ## FIXME: t=tblastDM == $mainFeaturesList #my $mapurl= mapexUrl($url); my $insert= "  ...   Map Detail"; $insert .= addSeqLinks($url); print $mmout <<"EOF"; EOF } print $mmout "
$species $insert
\n"; close($mmout); } sub addMapHitLink { ##my ($saved)= @_; ##my($saved,$chr,$bstart,$bend)= @_; #^ using global $saved to empty it; below # print "\n
DEBUG map link $chr:$bstart..$bend
\n"; if ($chr && $bstart =~ m/\d/ && $bend =~ m/\d/) { my $strand=1; ($bstart,$bend,$strand) = ($bend,$bstart,-1) if ($bend < $bstart); if (@scafloc == 3) { my ($arm, $sstart, $send)= @scafloc; $chr= $arm; if ($send < $sstart) { ($sstart, $send) = ($sstart, $send) ; ($bstart, $bend) = ($bstart, $bend) ; #?? } if ($subjectMap =~ /protein/) { $bstart *= 3; $bend *= 3; } #? is this right? ## really need to map from prot loc=(5..10,20..30,...) to genome locs ($bstart,$bend)= ($bstart+$sstart, $bend+$sstart); } elsif ($subjectMap !~ /genome/) { $bend= 0; } $bend= 0 if ($subjectMap =~ /protein/); # 08jul patch for gbrowsenew ?? my $url= gbrowseFromOrgChrStartStop($inorg,$chr,$bstart,$bend); if ($url) { ## $saved =~ s,\<...maplink..\>,    $maphit_label,; # for 1st HSP only? my $lscore= rindex($saved,' Score ='); $lscore = index($saved,"\n",$lscore) if $lscore; if ($lscore) { $saved = substr($saved,0,$lscore) . " $maphit_label" . substr($saved,$lscore); } else { $saved =~ s,(\s*Score =.+),$1 $maphit_label,; } } $bend= 0; # dont repeat ! } } sub printsaved { ##my ($saved)= @_; ##my($saved,$chr,$bstart,$bend)= @_; #^ using global $saved to empty it; below # print "\n
DEBUG map link $chr:$bstart..$bend
\n"; # if ( $dropgo ) { $saved =~ s,\s*\(GO:[^)]+\)\s*, ,sg; } addMapHitLink(); #?? # if (0) { ##$chr && $bstart && $bend) # ($bstart,$bend) = ($bend,$bstart) if ($bend < $bstart); # if (@scafloc == 3) { # my ($arm, $sstart, $send)= @scafloc; # $chr= $arm; # if ($send < $sstart) { # ($sstart, $send) = ($sstart, $send) ; # ($bstart, $bend) = ($bstart, $bend) ; #?? # } # if ($subjectMap =~ /protein/) { $bstart *= 3; $bend =* 3; } #? is this right? # ## really need to map from prot loc=(5..10,20..30,...) to genome locs # ($bstart,$bend)= ($bstart+$sstart, $bend+$sstart); # } # elsif ($subjectMap !~ /genome/) { # $bend= 0; # } # # my $url= gbrowseFromOrgChrStartStop($inorg,$chr,$bstart,$bend); # if ($url) { # # ## $saved =~ s,\<...maplink..\>,    $maphit_label,; # for 1st HSP only? # # my $lscore= rindex($saved,' Score ='); # $lscore = index($saved,"\n",$lscore) if $lscore; # if ($lscore) { # $saved = substr($saved,0,$lscore) . " $maphit_label" # . substr($saved,$lscore); # } # else { # $saved =~ s,(\s*Score =.+),$1 $maphit_label,; # } # } # # } unless($saveTillEnd) { print $saved; $saved=''; } # $savetx=''; } sub expiretime { my(@MON)=qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/; my(@WDAY) = qw/Sun Mon Tue Wed Thu Fri Sat/; ## my $time= time()+(60*60*2); # 2hr ? my $time= time(); # it really expires right away ; not cached. need to rerun or save my $sc=' '; my($sec,$min,$hour,$mday,$mon,$year,$wday) = gmtime($time); $year += 1900; my $extime= sprintf("%s, %02d$sc%s$sc%04d %02d:%02d:%02d GMT", $WDAY[$wday],$mday,$MON[$mon],$year,$hour,$min,$sec); return $extime; } sub initdata { # copied from Gbrowse javascript show/hide methods $jsheader= <<'JEOF'; JEOF %idmap = ( # fbid => ['(FB\w\w\d+)', # "$1", # 'all', 'all'], ## na_cDNA = BcDNA:RE10864 AY118367 [similar by BLASTN (0.0) to ATPsyn-beta "... 577 e-165 ## >BcDNA:LD14807 AY061181 [start_codon:2] # cDNA => "http://www.fruitfly.org/cgi-bin/EST/community_query/ctgReport.pl?db=estlabtrack&id_type=0&id_value=%s", ## change to NCBI ## http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&doptcmdl=GenBank&tool=FlyBase&term=RE05272 na_cDNA => [ '^>?(\w+cDNA):(\w+)', '$1:$2', ##'$1:$2', 'hitlist|defline', 'na_cDNA|na_all'], ## na_geno_clones = AC010838 : Drosophila melanogaster, chromosome 4 ## >AC010838 : Drosophila melanogaster, chromosome 4, region 101F-102F, BAC # clone BACR13D24, complete sequence na_geno_clones => [ '^>?(\w+)\s+:', '$1 :', 'hitlist|defline', 'na_geno_clones|na_all'], ##na_est =RE57967.5prime BI371140 ## RE57967.5prime BI371140 ; >RE12172.5prime BI170649 # EST => "http://www.fruitfly.org/cgi-bin/EST/community_query/ctgReport.pl?db_name=estlabtrack&id_type=0&id_value=%s", # http://weasel.lbl.gov/cgi-bin/EST/community_query/cloneReport.pl?id_type=0&id_value=LD04112&db_name=estlabtrack na_est => [ '^>?(\w+\.(3prime|5prime|complete))', '$1', 'hitlist|defline', 'na_est|na_all'], # whole_genome_genomic_scaffolds_dmel_RELEASE3 = # gadfly|SEG:AE003846|gb|AE003846|arm:4 [925147,1237870] # >gadfly|SEG:AE003846|gb|AE003846|arm:4 [925147,1237870] # estimated-cyto:102F3-102F8 gadfly-seqname:AE003846 seg => [ '\|SEG:(\w+)', '\|SEG:$1', 'hitlist|defline', 'scaffolds|na_all'], # arm:3R [16579446,16803974] larm => [ '\|arm:(\w+)\s+\[(\d+).(\d+)\]', '\|arm:$1 [$2..$3]', 'hitlist|defline', 'na_gb|scaffolds|na_all'], #?? add gbrowse source=armview for whole_genome_genomic_dmel arm names ?? # >3R 3R ... has blast hit to map gb => [ '\|gb\|(\w+)', '\|gb\|$1', 'hitlist|defline', 'na_est|na_gb|scaffolds|na_all'], gbest => [ '\|gb\|(\w+)', '\|gb\|$1', 'hitlist|defline', 'est'], arpgene => [ # as dbxref=euGenes:ARP1_G6497 '(euGenes)\:(ARP\w+)', '$1:$2', 'hitlist|defline', 'translation'], pasaasm => [ '\|(pasa\w+)\|(\w+)', '\|$1:$2', 'hitlist|defline', 'est'], # pasa_acyr|asmbl_10531 # return 'http://server2.eugenes.org/cgi-bin/PASA/cgi-bin/cdnaasmbl_report.cgi' # .'?db=pasa_acyr&compare_id=1&cdna_acc='.$1; # na_pe = EP(3)3515 AQ072985 inserted at base 57 # >l(3)j8C8 AQ034099 inserted at base 321 Both 5' and 3' ends of P ## '$1', #?? change to CODE = sub{} ? ## if ($f->name()) { return '/cgi-bin/fbinsq.html?symbol='.$f->name(); } na_pe => [ ##'^>?(\w\w+)', ## too broad for na_all use - list '^>?((BG|EP|KG|l.\d|fs.\d)[\w\(\)]+)', '$1', #?? change to CODE = sub{} ? 'hitlist|defline', 'na_pe|na_all'], # na_STS = embl|Z31972|DM198B1S D. melanogaster STS # >BACR38E10-TET3 (STS) made from BACR38E10 ### this syntax with s,p,q,e; isnt working... ### '$1\| $2\| $3', defline => [ ##'^>?(\w+)\|(\w+)\|(\w+)', '^>?(\w+)\|([^|]+)\|(\S+)', ## need to skip () '$1\|$2\|$3', 'hitNOTlist|defline', 'all'], ##was 'na_STS|na_re|na_te|na_all|genbank|uniprot|refseq'], # genomeasm => [ # '^>?([^\|]+)\|([^\|]+)\|([^\s]+)', # '$1\|$2\|$3', # 'hitlist|defline', # 'all'], ## should be only *chromosome* ? bac => [ '^>?(BAC\w+)', '$1', 'hitlist|defline', 'na_STS|na_all'], # whole_genome_transcript == CG7421-RC transcript from_gene[CG7421 Nopp140 FBgn0037137 ] na_genome => [ '^>?(C\w\d+\-\w\w)', '$1', 'hitlist|defline', 'whole_genome_transcript|transcript|na_all'], # whole_genome_translation = CG11154-PA translation from_gene[CG11154 ATPsyn-beta # >CG11154-PA translation from_gene[CG11154 ATPsyn-beta FBgn0010217 ] # seq_release:3 mol_weight=54109 # cds_boundaries:(4:1,051,342..1,053,862[+]) aa_length:505 Genome Map aa_genome => [ '^>?(C\w\d+\-P\w)', '$1', 'hitlist|defline', 'whole_genome_translation|translation'], # aa_dmel_swall == Q9V494 ATPSYN-beta protein (RE10864p). aa_swall => [ '^>?(\w\w+)', '$1', 'hitlist', ## 'hitlist|defline', 'swall'], ); # idmap } =item blast form message dghome2% more /tmp/__web.in -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="org" dana -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="org" dmel -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="org" dsim -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="PROGRAM" blastn -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="DATALIB" chromosome -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="SEQUENCE" >fly2R_16113607_CG10527RA D. melanogaster Chromosome 2R, selected_feature=mRNA CG10527-RA ID=CG10527-RA , selection_range=complement(16113607..16114604,16114946..16115347,16116269. .16116329) 1461 bp AGTTTGAATCAGAATCTCAGTAAGCGAACATCGTTGATTGAAACAGCAAT CATGCCAATCGAAGTCAACACCCCCGATAAGTTGGAGTACCAATTCTTCC CCGCCAGCGGTGGAGTTTTTACCTTCAAGGTGCGTTCCCCCAAGGATGCT CATTTGGCACTCACACCCGCGCCCGAGGAGAACGGTCCCATTTTCGAGAT CTTTCTGGGAGGATGGGAGAACACCAAGTCGGTGATCCGCAAGGACCGCC AGAAGCCCGAAGTCGCTGAGGTGCCCACTCCGGGCATCCTAGATGCCGGA GAGTTCCGTGGCTTCTGGGTGCGCTGGTACGACAATGTCATCACCGTTGG CCGCGAAGGAGACGCCGCCGCCTTCCTTTCCTACGATGCTGGTAGCCTGT TCCCGGTCAACTTCGTCGGCATCTGCACGGGATGGGGTGCCAGCGGCACC TGGCTGATTGATGAGCCCGCTCCATCGGCTCCCGTCATGGGCTTCGCCGC GCCCACAGGAAGCGGACCAGGATGCTGGGTGCCCGCCGCCAACGGTGAGG -----------------------------1137522503144128232716531729 Content-Disposition: form-data; name="SEQFILE"; filename="" Content-Type: application/octet-stream ... =head2 new blast db filenames, Mar04 /bio/archive/genomes/Drosophila_melanogaster/current/blast dmel_aa_swall.phr << was aa_dmel_swall dmel_all_chromosome_r310.nhr << was whole_genome_genomic_dmel_RELEASE3 dmel_all_scaffolds_r310.nhr << was whole_genome_genomic_scaffolds_dmel_RELEASE3 dmel_all_transcript_r320.nhr << was whole_genome_transcript_dmel_RELEASE3 dmel_all_translation_r320.phr << was whole_genome_translation_dmel_RELEASE3 dmel_all_transposon_r320.nhr << was na_te dmel_hetero_scaffolds_r310.nhr << was heterochromatin_genomic_scaffolds_dmel_RELEASE3 na_EST.nhr na_STS.nhr na_cDNA.nhr na_dbEST.nhr na_gb.nhr na_geno_clones.nhr na_pe.nhr na_re.nhr deflines for new r3.2 fasta look like >CG2671-PA type=protein; loc=2L:complement(11218..11344,11410..11518,11779..12221,12286..12928 ,13520..13625,13683..14874,14933..15711,17053..17136); ID=l(2)gl-PA; name=l(2)gl-PA; db_xref=' CG2671,FlyBase:FBgn0002121'; /gene=l(2)gl; len=1161 >> parse on >fastaID ; db_xref= ; loc= for hyperlinks =head1 backhand use notes The output gif url nph-viewgif.cgi needs replacing with full url to .gif for use with mod_backhand proxying - otherwise we loose gif to more proxy; cant use full url to nph-viewgif.cgi because of 2nd proxy call Remove all backhand directory /blast-cgi/ relative urls .. AND/OR start using a session-id cookie in spec of mod_backhand, and Backhand bySession flag -- this causes multiple session calls to go to same backend Top output - need to deal with local relative url to IMG ; otherwise each is a backhand call Run on computer cricket


BLAST Search Results

BLASTN 2.2.6 [Apr-09-2003]



=head1 backhand error 

output when backhand fails to find server ---
== BLASTErrCombination of  common/servers/blast/Src/wwwblast.h
 -- is it due to mistake in passing on form inputs, or finding blast.rc database info?

http://cricket.bio.indiana.edu:7082/blast-cgi/backhand-test.cgi
.. after successful calls -- got
Not Found 
The requested URL /blast-cgi/backhand-test.cgi was not found on this server  
Apache/1.3.26 Server at cricket.bio.indiana.edu Port 7112 
  ^^^ BAD PORT -- this is Centaurbase virtual host !!
 

BLAST Search Results

 

Error 9 in submitting BLAST query


Short error description:

The combination of database and program, that you provided in your
message is invalid or not acceptable by BLAST search system.
Please look at current possible combinations in BLAST help.
=cut =head1 original ncbi blast.cgi # echo "Content-type: text/html" # echo "" # # #setenv DEBUG_COMMAND_LINE TRUE # setenv BLASTDB db # # ./blast.REAL =cut