#!/usr/bin/perl
# dmelaffyxbigmap.pl
# this now only works run on computer w/ gbrowse to copy img files; can fix
use strict;
use constant SLEEPYTIME => 3; # more, less?
my $debug= 1;
my $cstep= 1000000;
my @chrstest= qw(4 2L);
my @chrsall= qw(
2L 2R 3L 3R X 4 2LHet 2RHet 3LHet 3RHet XHet YHet U
);
# dmel_mitochondrion_genome
my %csize=(
"2L" => 23011544,
"2R" => 21146708,
"3L" => 24543557,
"3R" => 27905053,
"X" => 22422827,
"4" => 1351857,
"2LHet" => 368872,
"2RHet" => 3288761,
"3LHet" => 2555491,
"3RHet" => 2517507,
"XHet" => 204112,
"YHet" => 347038,
"U" => 10049037,
# "Uextra" => 29004656,
#"dmel_mitochondrion_genome" => 19517,
);
#my @chrs= ($debug) ? @chrstest : @chrsall;
my @chrs= @chrsall;
# output:
# q=3L:17000000-17500000
my $urlbase='http://insects.eugenes.org/cgi-bin/gbrowsenew/gbrowse_img/drosmel5dg/?q=';
my $filebase='/bio/argos/work/ggb169/htdocs';
my $params="w=1024;t=gene+me8af2xmask;embed=1;grid=0";
my $date="200809";
my $ndone=0; my $err=0;
foreach my $chr (@chrs) {
my $csize= $csize{$chr} or next;
for(my $start=1; $start < $csize; $start += $cstep) {
my $stop= $start + $cstep - 1;
$stop= $csize if($stop > $csize); # some problem here at end; 2 small panels
my $mapfile= join("_","dmel","trafx",$date,$chr,sprintf("%08d",$start),$stop).".html";
# ^^ pad $start 1 to 00000001: sprintf("%07d",$start);
(my $imageout= $mapfile) =~ s/.html/.png/;
next if (-s $imageout and -s $mapfile);
warn "# Working on $mapfile\n" if $debug;
my $curlp= "curl -s '${urlbase}$chr:$start-$stop;$params'";
warn "# $curlp\n" if $debug;
my $maphtml= `$curlp`; # will take a while
$maphtml =~ m,"(/gbrowse/tmp/.*/image.png)",s;
my $imgfile=$1;
unless($imgfile) {
warn "#ERR: missing img $imageout\n";
$err++; # next;
} else {
system("/bin/cp -p $filebase/$imgfile $imageout");
}
$maphtml =~ s,"(/gbrowse/tmp/.*/image.png)","$imageout",s;
open(H,">$mapfile"); print H $maphtml; close(H);
$ndone++;
# last if($err or ($debug and $ndone > 0));
sleep(SLEEPYTIME);
last if($stop == $csize);
}
}
__END__
# system("$curlp > $mapfile");
# my $imageline= `grep 'src=/gbrowse/tmp/' $mapfile`;
# $imageline =~ m,"(/gbrowse/tmp/.*/image.png)",; my $imgfile=$1;
#
# sub renamemaps
# ls -1 * | head | perl -ne \
# 'chomp; s/\.((html|png))$//; $suf=$1; $oldp=$_; @p=split "_"; $p[4]= sprintf("%08d",$p[4]);\
# $newp= join("_",@p); if ($oldp ne $newp) { \
# print "mv $oldp.$suf\t$newp.$suf\n"; print "perl -pi -e\"s/$oldp/$newp/;\" $newp.$suf\n" if($suf =~ /html/); } '\
# > fixnames.sh
#
sub cutcrap # from html
#.......................
cat dmel_trafx_200809_4_01000001_1351857.html | perl -ne\
'$p=0 if(m,\<(area|style|link|script|span onmousedown),); \
$p=1 if(m,\]*>,