# dsppsynteny1.R vers <- "r5" ## set graphics device: pdf() ; png() with X11; quartz() outdevice <- "PNG" #.Device # "PNG" is good default imgwidth <- 50 # dodebug <- TRUE dohtml <- outdevice == "PNG" canreverse <- FALSE # FALSE # TRUE dmel_wsppset <- c("dsim","dsec","dyak","dere","dana","dper","dpse","dwil","dmoj","dvir","dgri") dmoj_wsppset <- c("dere","dana","dpse","dwil","dvir","dgri") linkspp <- c("dmel","dmoj") dspp <- "dmoj" # "dmoj" #"dmel" wsppset <- dmoj_wsppset # c("dvir") outdir <- "dsynr5/" #pixoutdir <- "dsynr5/dsyntpix/" pixoutdir <- "dsynr5/dmojsyntpix/" allmullerfile <- paste(outdir,"all","-",dspp,"-muller-",vers,".html",sep="") allmuller <- c() maphref <- function(wspp,wch) { paste("",sep="") } hreflink <- function(file,label) { paste("", label,"",sep="") } for (wspp in wsppset ) # wsppset # dmojwsppset # c("dyak","dere","dana","dper")) { print(paste(" ============== ",wspp,"============== ")) imgdata <- c() # imgs <- c(); imginfo <- c(); imgdchr<-c(); imgdlab<-c(); imgwsize<-c(); hsectionfile <- paste(outdir,wspp,"-",dspp,"-segments-",vers,".html",sep="") hmullerfile <- paste(outdir,wspp,"-",dspp,"-muller-",vers,".html",sep="") #topscaftab <- "tabs/topscaffolds.tab" # per dmelc ; add others topscaftab <- paste("tabs/",dspp,"-topscaffolds.tab",sep="") dfile <- paste("tabs/",dspp,"-",wspp,"-r.tab",sep="") # dchrgff <- "gff/dmel-chromosomes.gff" ## fixe for other dspp if (match(dspp, c("dmel","dpse","dyak","dsim"),nomatch=0)) { dchrgff <- paste("gff/",dspp,"-chromosomes.gff",sep="") } else { dchrgff <- paste("gff/",dspp,"-scaffolds.gff",sep="") } if (match(wspp, c("dmel","dpse","dyak","dsim"),nomatch=0)) { wchrgff <- paste("gff/",wspp,"-chromosomes.gff",sep="") } else { wchrgff <- paste("gff/",wspp,"-scaffolds.gff",sep="") } print(paste("read ",topscaftab)) topscafs <- read.delim(topscaftab) # species ref len matchchr matchchr2 matchchr3 # dana scaffold_13340 23706033 3R . . # dana scaffold_13337 23301361 3L . . print(paste("read ",dchrgff)) dchrtab <- read.delim(dchrgff,header=FALSE,col.names=paste("v",1:9,sep="")) ## for dspp == dmoj, use only top 5 chrs ! need list # dchrs <- levels( factor( dchrtab$v1) ) # dchrs <- dchrs[grep("[^4]",dchrs)] # skip chr 4 .. dscafs <- topscafs[ topscafs$species == dspp, ] dchrs <- levels( factor( dscafs$ref, levels=dscafs$ref)) matchcols <- grep("match", names(topscafs[1,])) ## need alias/labels for dchrs: Muller-A,B,C,D.. dmoj>dmel equivs ## put in topscafs ? print(paste("read ",wchrgff)) wchrtab <- read.delim(wchrgff,header=FALSE,col.names=paste("v",1:9,sep="")) print(paste("read data",dfile)) egdata <- read.delim(dfile) #^ bad data, d(e-b) << w(e-b) or viceversa drows <- nrow(egdata) dwidth <- abs(egdata$dBend - egdata$dBstart) wwidth <- abs(egdata$wBend - egdata$wBstart) dwdiff <- abs(wwidth - dwidth) ##egdata <- egdata[ dwdiff < 150000, ] egdata <- egdata[ dwdiff < 250000, ] print(paste("filtered huge sizes n=",drows - nrow(egdata))) # dchr <- levels( factor(egdata$dChr)) # wchr <- levels( factor(egdata$wChr)) dsppscafs <- topscafs[ topscafs$species == dspp, ] wsppscafs <- topscafs[ topscafs$species == wspp, ] wchr <- levels( factor( wsppscafs$ref, levels=wsppscafs$ref)) wgenomesize <- sum( wchrtab$v5 ) wscafcount <- length( wchrtab$v5 ) # dchr <- levels( factor( wsppscafs$matchchr[ wsppscafs$ref == wch ] )) # dchr <- wsppscafs[wsppscafs$ref == wch, 4:6] if (exists("dodebug")) { # wchr <- wchr[1:2] } for (wch in wchr) { wchrsize <- wsppscafs$len [ wsppscafs$ref == wch ] # wchmuller <- wsppscafs$muller [ wsppscafs$ref == wch ] # dchr <- wsppscafs[wsppscafs$ref == wch, 4:6] for (idch in matchcols) { # 4:6 nosynt <- FALSE dch <- wsppscafs[wsppscafs$ref == wch, idch] if( is.na(dch) && idch > matchcols[1]) next if( is.na(dch)) nosynt <- TRUE dchrsize <- NA if (!nosynt) { dch <- levels( factor(dch) ) dchrsize <- dchrtab$v5[ factor( dchrtab$v1) == dch ] } wshort <- paste(wch) if(pmatch("scaffold",wshort,nomatch=0)) { wshort <- paste(unlist(strsplit(wshort,"caffold_")),collapse="") } dshort <- paste(dch) if(pmatch("scaffold",dshort,nomatch=0)) { dshort <- paste(unlist(strsplit(dshort,"caffold_")),collapse="") } wlab <- paste(wspp,wshort,sep=":") dlab <- paste(dspp,dshort,sep=":") mainlab <- paste(wlab, "x", dlab, "synteny") print( mainlab) if (nosynt) { sv <- factor( egdata$wChr) == wch } else { sv <- factor( egdata$dChr) == dch & factor( egdata$wChr) == wch } if (nosynt || !TRUE %in% sv) { print( paste("No associated genes in",mainlab) ) nosynt <- TRUE } drank <- egdata$dBstart[ sv ] wrank <- egdata$wBstart[ sv ] dend <- egdata$dBend[ sv ] wend <- egdata$wBend[ sv ] wrev <- egdata$dOri[ sv ] == '-' wisrev <- FALSE dmax <- max(dchrsize,drank,dend) wmax <- max(wchrsize,wrank,wend) maxb <- ifelse(nosynt, wmax, max(dmax,wmax)) if (maxb < 1000000) { print( paste("Too small chr",mainlab, " = ",maxb) ) next } # make length ~ chrsize; 20 MB ~ 4 in; 1 MB ~ 0.2 in; wsizemb <- round( wchrsize / 1000000, digits=1) imglab <- paste( wlab,"\n",wsizemb,"Mb",sep=" ") # if(!nosynt) imglab2 <- paste(imglab,"\n","+",dlab,sep=" ") #? use instead fixed maxb ~ 35 MB (max chr size) # maxp <- round( maxb / 100000) # 300 maxin <- 0.15 * maxb / 1000000 if(maxin < 1.5) { maxin <- 1.5 ##?? min height for png maxb <- round(maxin * 1000000 / 0.15) } widin <- 1.4 in2dot <- 100 #80 #100 # minimum for png?? would like smaller but get draw errors dscale <- 1 # maxb / max(dchrsize,drank,dend) wscale <- 1 # maxb / max(wchrsize,wrank,wend) wybase <- 0.6 wyloc <- 1.5 dyloc <- 2.8 xlim <- c(0.5,3.5) ylim <- c(0,maxb) if (outdevice == "pdf") { pfile <- paste(pixoutdir,dspp,"_",dshort,"-",wspp,"_",wshort,"-",vers,".pdf",sep="") pdf(file = pfile, width = widin, height = maxin, pointsize= 9) print(paste("plot to",pfile)) } else if (outdevice == "PNG") { pfile <- paste(pixoutdir,dspp,"_",dshort,"-",wspp,"_",wshort,"-",vers,".png",sep="") png(file = pfile, width = round(in2dot*widin), height = round(in2dot*maxin), pointsize=12) print(paste("plot to",pfile)) } else { get(getOption("device"))(width=widin,height=maxin) } ### should put all these fields into one row/drawn image # if (!dev.interactive()) imgs <- c(imgs, pfile) # imginfo <- c(imginfo, imglab) # imgdchr <- c(imgdchr, dshort) # imgdlab <- c(imgdlab, dlab) # imgwsize <- c(imgwsize, wchrsize) imgrow <- data.frame( imglab=imglab, imgfile=pfile, # list or data.frame? dch=dch, dlab=dlab, dchrsize=dchrsize, wch=wch, wlab=wlab, wchrsize=wchrsize, wsizemb=wsizemb ) imgdata <- rbind(imgdata,imgrow) # imgdata$dch[3] ; imgdata$dchrsize[i]; irow <- imgdata[2,] try ( plot( xlim, ylim, axes=FALSE, type="n", main=NA, ylab=NA, xlab=NA) ); drelrank <- drank / dchrsize drelend <- dend / dchrsize dscale <- dscale * dchrsize doffset <- 0 wrelrank <- wrank / wchrsize wrelend <- wend / wchrsize wscale <- wscale * wchrsize woffset <- 0 if(!nosynt) { locdiff <- mean( drank - wrank) if(wchrsize < dchrsize && locdiff > 10000) { woffset <- locdiff } } if (canreverse && sum(wrelrank - drelrank) < -1) { wisrev <- TRUE } plotsynt <- function() { ifelse(dohtml, par( mar=c( 1, 1, 1, 1)) ,par( mar=c( 3, 2, 1, 2)+0.1 )) # par( mgp=c( 2, 1, 0) ) # c(3, 1, 0) # axis title/labels/line # par( omd=c( 0.01,0.01,0.01,0.01) ) # outer margin # par( lwd=0.3 ) plot.window(xlim, ylim) ## for png/gff, use instead html label if(!dohtml) title( sub=imglab, font.sub=2,cex.sub=1.2,line=2) ## ! slide rect up to match synt align ##rect( wybase, wmax+woffset, wyloc, 1+woffset, density=5, angle=0) rect( wybase, 1+woffset, wyloc, wmax+woffset, density=10, angle=0) if(!nosynt) { if(!dohtml) mtext( dlab, side=4, font=2, srt=90) arrows( dyloc, dmax, dyloc, 1, code = 1 + wisrev) } } syntcluster <- function(drelrank,wrelrank) { dw <- cbind(drelrank,wrelrank) dwdist <- dist(dw, method="euclidean") # was "canberra" hc <- hclust(dwdist) # method=complete, simple ? memb <- cutree(hc, h=0.5) # memb <- cutree(hc, h=1.0) # need hc height ? cent <- NULL for (k in 1:max(memb)) { cent <- rbind(cent, apply(dw[memb==k,1,drop=FALSE],2,length)) } clust <- rev(order(cent)) list(memb,clust) } try( plotsynt() ) ## ------------ bigclust <- c(1) memb <- c(1) if (!nosynt) { slist <- try( syntcluster(drelrank,wrelrank) ) memb <- unlist(slist[1]) bigclust <- unlist(slist[2]) } ##--------- ## kmax <- trunc(length(bigclust)/2) kmax <- length(bigclust) pclrs <- topo.colors(kmax) # rainbow(kmax);# terrain.colors(kmax) # showclust <- function(k) { pclr <- pclrs[k] ## for all wchr / dch, need woffset,wscale to string on pseudo-chr dpoints <- drelrank[ memb==bigclust[k] ] * dscale dends <- drelend[ memb==bigclust[k] ] * dscale wrevs <- wrev[ memb==bigclust[k] ] if(wisrev) { # fixme wpoints <- (1 - wrelrank[ memb==bigclust[k] ]) wends <- (1 - wrelend[ memb==bigclust[k] ] ) } else { wpoints <- wrelrank[ memb==bigclust[k] ] wends <- wrelend[ memb==bigclust[k] ] } wpoints <- woffset + ( wpoints * wscale) wends <- woffset + ( wends * wscale) dlen <- length(dpoints) wy <- seq( length=dlen, from=wyloc, to=wyloc) dy <- seq( length=dlen, from=dyloc, to=dyloc) na <- NA[1:dlen] # swap(wpoints,wends,drev) for (i in 1:dlen) { if(wrevs[i] == TRUE) { w= dpoints[i]; dpoints[i]= dends[i]; dends[i]= w; } } xpoly <- rbind(wpoints,dpoints,dends,wends,na) ypoly <- rbind(wy,dy,dy,wy,na) polygon(c(ypoly), c(xpoly), col=pclr, border=NA) # col=pclr, density=c(10, 20) } k <- kmax # reverse - so heaviest printed last !? if (!nosynt) { for (k in kmax:1) { try( showclust( k) ) } } if (!dev.interactive()) dev.off() } } tablehead <- function(title,preface) { tshort <- sub(" *[({;\.].*","",title) paste("\n", "
| \n")
cat( maphref(wspp,wch) )
cat(" \n",imglab) cat(" | \n")
##if (nrows > 3 && i %% 3 == 0) cat("