# dsppsynteny1.R vers <- "r5" ## set graphics device: pdf() ; png() with X11; quartz() outdevice <- "PNG" #.Device # "PNG" is good default imgwidth <- 50 # dodebug <- TRUE dohtml <- outdevice == "PNG" canreverse <- FALSE # FALSE # TRUE dmel_wsppset <- c("dsim","dsec","dyak","dere","dana","dper","dpse","dwil","dmoj","dvir","dgri") dmoj_wsppset <- c("dere","dana","dpse","dwil","dvir","dgri") linkspp <- c("dmel","dmoj") dspp <- "dmoj" # "dmoj" #"dmel" wsppset <- dmoj_wsppset # c("dvir") outdir <- "dsynr5/" #pixoutdir <- "dsynr5/dsyntpix/" pixoutdir <- "dsynr5/dmojsyntpix/" allmullerfile <- paste(outdir,"all","-",dspp,"-muller-",vers,".html",sep="") allmuller <- c() maphref <- function(wspp,wch) { paste("",sep="") } hreflink <- function(file,label) { paste("", label,"",sep="") } for (wspp in wsppset ) # wsppset # dmojwsppset # c("dyak","dere","dana","dper")) { print(paste(" ============== ",wspp,"============== ")) imgdata <- c() # imgs <- c(); imginfo <- c(); imgdchr<-c(); imgdlab<-c(); imgwsize<-c(); hsectionfile <- paste(outdir,wspp,"-",dspp,"-segments-",vers,".html",sep="") hmullerfile <- paste(outdir,wspp,"-",dspp,"-muller-",vers,".html",sep="") #topscaftab <- "tabs/topscaffolds.tab" # per dmelc ; add others topscaftab <- paste("tabs/",dspp,"-topscaffolds.tab",sep="") dfile <- paste("tabs/",dspp,"-",wspp,"-r.tab",sep="") # dchrgff <- "gff/dmel-chromosomes.gff" ## fixe for other dspp if (match(dspp, c("dmel","dpse","dyak","dsim"),nomatch=0)) { dchrgff <- paste("gff/",dspp,"-chromosomes.gff",sep="") } else { dchrgff <- paste("gff/",dspp,"-scaffolds.gff",sep="") } if (match(wspp, c("dmel","dpse","dyak","dsim"),nomatch=0)) { wchrgff <- paste("gff/",wspp,"-chromosomes.gff",sep="") } else { wchrgff <- paste("gff/",wspp,"-scaffolds.gff",sep="") } print(paste("read ",topscaftab)) topscafs <- read.delim(topscaftab) # species ref len matchchr matchchr2 matchchr3 # dana scaffold_13340 23706033 3R . . # dana scaffold_13337 23301361 3L . . print(paste("read ",dchrgff)) dchrtab <- read.delim(dchrgff,header=FALSE,col.names=paste("v",1:9,sep="")) ## for dspp == dmoj, use only top 5 chrs ! need list # dchrs <- levels( factor( dchrtab$v1) ) # dchrs <- dchrs[grep("[^4]",dchrs)] # skip chr 4 .. dscafs <- topscafs[ topscafs$species == dspp, ] dchrs <- levels( factor( dscafs$ref, levels=dscafs$ref)) matchcols <- grep("match", names(topscafs[1,])) ## need alias/labels for dchrs: Muller-A,B,C,D.. dmoj>dmel equivs ## put in topscafs ? print(paste("read ",wchrgff)) wchrtab <- read.delim(wchrgff,header=FALSE,col.names=paste("v",1:9,sep="")) print(paste("read data",dfile)) egdata <- read.delim(dfile) #^ bad data, d(e-b) << w(e-b) or viceversa drows <- nrow(egdata) dwidth <- abs(egdata$dBend - egdata$dBstart) wwidth <- abs(egdata$wBend - egdata$wBstart) dwdiff <- abs(wwidth - dwidth) ##egdata <- egdata[ dwdiff < 150000, ] egdata <- egdata[ dwdiff < 250000, ] print(paste("filtered huge sizes n=",drows - nrow(egdata))) # dchr <- levels( factor(egdata$dChr)) # wchr <- levels( factor(egdata$wChr)) dsppscafs <- topscafs[ topscafs$species == dspp, ] wsppscafs <- topscafs[ topscafs$species == wspp, ] wchr <- levels( factor( wsppscafs$ref, levels=wsppscafs$ref)) wgenomesize <- sum( wchrtab$v5 ) wscafcount <- length( wchrtab$v5 ) # dchr <- levels( factor( wsppscafs$matchchr[ wsppscafs$ref == wch ] )) # dchr <- wsppscafs[wsppscafs$ref == wch, 4:6] if (exists("dodebug")) { # wchr <- wchr[1:2] } for (wch in wchr) { wchrsize <- wsppscafs$len [ wsppscafs$ref == wch ] # wchmuller <- wsppscafs$muller [ wsppscafs$ref == wch ] # dchr <- wsppscafs[wsppscafs$ref == wch, 4:6] for (idch in matchcols) { # 4:6 nosynt <- FALSE dch <- wsppscafs[wsppscafs$ref == wch, idch] if( is.na(dch) && idch > matchcols[1]) next if( is.na(dch)) nosynt <- TRUE dchrsize <- NA if (!nosynt) { dch <- levels( factor(dch) ) dchrsize <- dchrtab$v5[ factor( dchrtab$v1) == dch ] } wshort <- paste(wch) if(pmatch("scaffold",wshort,nomatch=0)) { wshort <- paste(unlist(strsplit(wshort,"caffold_")),collapse="") } dshort <- paste(dch) if(pmatch("scaffold",dshort,nomatch=0)) { dshort <- paste(unlist(strsplit(dshort,"caffold_")),collapse="") } wlab <- paste(wspp,wshort,sep=":") dlab <- paste(dspp,dshort,sep=":") mainlab <- paste(wlab, "x", dlab, "synteny") print( mainlab) if (nosynt) { sv <- factor( egdata$wChr) == wch } else { sv <- factor( egdata$dChr) == dch & factor( egdata$wChr) == wch } if (nosynt || !TRUE %in% sv) { print( paste("No associated genes in",mainlab) ) nosynt <- TRUE } drank <- egdata$dBstart[ sv ] wrank <- egdata$wBstart[ sv ] dend <- egdata$dBend[ sv ] wend <- egdata$wBend[ sv ] wrev <- egdata$dOri[ sv ] == '-' wisrev <- FALSE dmax <- max(dchrsize,drank,dend) wmax <- max(wchrsize,wrank,wend) maxb <- ifelse(nosynt, wmax, max(dmax,wmax)) if (maxb < 1000000) { print( paste("Too small chr",mainlab, " = ",maxb) ) next } # make length ~ chrsize; 20 MB ~ 4 in; 1 MB ~ 0.2 in; wsizemb <- round( wchrsize / 1000000, digits=1) imglab <- paste( wlab,"\n",wsizemb,"Mb",sep=" ") # if(!nosynt) imglab2 <- paste(imglab,"\n","+",dlab,sep=" ") #? use instead fixed maxb ~ 35 MB (max chr size) # maxp <- round( maxb / 100000) # 300 maxin <- 0.15 * maxb / 1000000 if(maxin < 1.5) { maxin <- 1.5 ##?? min height for png maxb <- round(maxin * 1000000 / 0.15) } widin <- 1.4 in2dot <- 100 #80 #100 # minimum for png?? would like smaller but get draw errors dscale <- 1 # maxb / max(dchrsize,drank,dend) wscale <- 1 # maxb / max(wchrsize,wrank,wend) wybase <- 0.6 wyloc <- 1.5 dyloc <- 2.8 xlim <- c(0.5,3.5) ylim <- c(0,maxb) if (outdevice == "pdf") { pfile <- paste(pixoutdir,dspp,"_",dshort,"-",wspp,"_",wshort,"-",vers,".pdf",sep="") pdf(file = pfile, width = widin, height = maxin, pointsize= 9) print(paste("plot to",pfile)) } else if (outdevice == "PNG") { pfile <- paste(pixoutdir,dspp,"_",dshort,"-",wspp,"_",wshort,"-",vers,".png",sep="") png(file = pfile, width = round(in2dot*widin), height = round(in2dot*maxin), pointsize=12) print(paste("plot to",pfile)) } else { get(getOption("device"))(width=widin,height=maxin) } ### should put all these fields into one row/drawn image # if (!dev.interactive()) imgs <- c(imgs, pfile) # imginfo <- c(imginfo, imglab) # imgdchr <- c(imgdchr, dshort) # imgdlab <- c(imgdlab, dlab) # imgwsize <- c(imgwsize, wchrsize) imgrow <- data.frame( imglab=imglab, imgfile=pfile, # list or data.frame? dch=dch, dlab=dlab, dchrsize=dchrsize, wch=wch, wlab=wlab, wchrsize=wchrsize, wsizemb=wsizemb ) imgdata <- rbind(imgdata,imgrow) # imgdata$dch[3] ; imgdata$dchrsize[i]; irow <- imgdata[2,] try ( plot( xlim, ylim, axes=FALSE, type="n", main=NA, ylab=NA, xlab=NA) ); drelrank <- drank / dchrsize drelend <- dend / dchrsize dscale <- dscale * dchrsize doffset <- 0 wrelrank <- wrank / wchrsize wrelend <- wend / wchrsize wscale <- wscale * wchrsize woffset <- 0 if(!nosynt) { locdiff <- mean( drank - wrank) if(wchrsize < dchrsize && locdiff > 10000) { woffset <- locdiff } } if (canreverse && sum(wrelrank - drelrank) < -1) { wisrev <- TRUE } plotsynt <- function() { ifelse(dohtml, par( mar=c( 1, 1, 1, 1)) ,par( mar=c( 3, 2, 1, 2)+0.1 )) # par( mgp=c( 2, 1, 0) ) # c(3, 1, 0) # axis title/labels/line # par( omd=c( 0.01,0.01,0.01,0.01) ) # outer margin # par( lwd=0.3 ) plot.window(xlim, ylim) ## for png/gff, use instead html label if(!dohtml) title( sub=imglab, font.sub=2,cex.sub=1.2,line=2) ## ! slide rect up to match synt align ##rect( wybase, wmax+woffset, wyloc, 1+woffset, density=5, angle=0) rect( wybase, 1+woffset, wyloc, wmax+woffset, density=10, angle=0) if(!nosynt) { if(!dohtml) mtext( dlab, side=4, font=2, srt=90) arrows( dyloc, dmax, dyloc, 1, code = 1 + wisrev) } } syntcluster <- function(drelrank,wrelrank) { dw <- cbind(drelrank,wrelrank) dwdist <- dist(dw, method="euclidean") # was "canberra" hc <- hclust(dwdist) # method=complete, simple ? memb <- cutree(hc, h=0.5) # memb <- cutree(hc, h=1.0) # need hc height ? cent <- NULL for (k in 1:max(memb)) { cent <- rbind(cent, apply(dw[memb==k,1,drop=FALSE],2,length)) } clust <- rev(order(cent)) list(memb,clust) } try( plotsynt() ) ## ------------ bigclust <- c(1) memb <- c(1) if (!nosynt) { slist <- try( syntcluster(drelrank,wrelrank) ) memb <- unlist(slist[1]) bigclust <- unlist(slist[2]) } ##--------- ## kmax <- trunc(length(bigclust)/2) kmax <- length(bigclust) pclrs <- topo.colors(kmax) # rainbow(kmax);# terrain.colors(kmax) # showclust <- function(k) { pclr <- pclrs[k] ## for all wchr / dch, need woffset,wscale to string on pseudo-chr dpoints <- drelrank[ memb==bigclust[k] ] * dscale dends <- drelend[ memb==bigclust[k] ] * dscale wrevs <- wrev[ memb==bigclust[k] ] if(wisrev) { # fixme wpoints <- (1 - wrelrank[ memb==bigclust[k] ]) wends <- (1 - wrelend[ memb==bigclust[k] ] ) } else { wpoints <- wrelrank[ memb==bigclust[k] ] wends <- wrelend[ memb==bigclust[k] ] } wpoints <- woffset + ( wpoints * wscale) wends <- woffset + ( wends * wscale) dlen <- length(dpoints) wy <- seq( length=dlen, from=wyloc, to=wyloc) dy <- seq( length=dlen, from=dyloc, to=dyloc) na <- NA[1:dlen] # swap(wpoints,wends,drev) for (i in 1:dlen) { if(wrevs[i] == TRUE) { w= dpoints[i]; dpoints[i]= dends[i]; dends[i]= w; } } xpoly <- rbind(wpoints,dpoints,dends,wends,na) ypoly <- rbind(wy,dy,dy,wy,na) polygon(c(ypoly), c(xpoly), col=pclr, border=NA) # col=pclr, density=c(10, 20) } k <- kmax # reverse - so heaviest printed last !? if (!nosynt) { for (k in kmax:1) { try( showclust( k) ) } } if (!dev.interactive()) dev.off() } } tablehead <- function(title,preface) { tshort <- sub(" *[({;\.].*","",title) paste("\n", "", tshort, "\n", "\n", "

",title,"

", preface, "\n") } tablefoot <- function(footnote) { paste("
\n",footnote,"\n\n") } ## order by dmel csomes? need also show for dchr == NA printhtmlbyDchr <- function() { cat("\n") for (dch in dchrs) { imgrow <- imgdata[ imgdata$dch == dch & !is.na(imgdata$dch), ] # has NA .. imgsynt <- as.character(imgrow$dlab[1]) # or imgrow[1,]$dlab # if (is.na(imgsynt)) next ## dchmuller <- levels(factor(dsppscafs$muller [ dsppscafs$ref == dch ])) dchmuller <- dsppscafs$muller [ dsppscafs$ref == dch ] dchmuller <- ifelse (is.na(dchmuller), "U", paste(dchmuller)) cat("",wspp, ":", dchmuller,"/",imgsynt,"") } cat("\n\n") wsizes <- 0 nscafs <- 0 wchdrawn <- c() for (dch in dchrs) { imgrow <- imgdata[ imgdata$dch == dch & !is.na(imgdata$dch), ] ## NA's nrows <- nrow(imgrow) # print(paste("DEBUG",dch,nrows,imgrow)) if (nrows < 1) next cat("") cat("", "\n") for (i in 1:nrows) { ## 1:length(imgfs) # print(paste("DEBUG",i,nrows,imgrow[i,])) if(is.na(imgrow$imgfile[i])) next imgf <- as.character(imgrow$imgfile[i]) # imgfs[i] imgf <- sub(outdir,"",imgf); nscafs <- nscafs + 1 wch <- as.character(imgrow$wch[i]) if( is.na(match(wch,wchdrawn))) wsizes <- wsizes + imgrow$wchrsize[i] # imgwsize[i] wchdrawn <- c(wchdrawn,wch) imglab <- as.character(imgrow$imglab[i]) # imglabs[i] imglab <- gsub("\n","
\n",imglab); flab <- unlist(strsplit(imgf,"[./]"))[2] cat("\n") ##if (nrows > 3 && i %% 3 == 0) cat("\n") } cat("
\n") cat( maphref(wspp,wch) ) cat("\"",flab,"\"",sep="")0) cat(" width=",imgwidth) cat(">") cat("") cat("
\n",imglab) cat("
\n") cat("\n") # if (i %% 10 == 0) cat("\n") } wunscaf <- wscafcount - nscafs wgenomemb <- round( wgenomesize / 1000000, digits=1) wunsizemb <- round( (wgenomesize - wsizes) / 1000000, digits=1) cat("\n", hreflink(sub(outdir,"",hsectionfile), paste(wspp,"top segments.")), "Unlisted remainder:", wunsizemb,"Mb", "of", wgenomemb,"Mb total genome", ", in", wunscaf, "segments", "\n") # cat("\n\n\n") # close(hout) } printhtmlbyimg <- function() { nrows <- nrow(imgdata) if(nrows<1) return wsizes <- 0 nscafs <- nrows # length(imgs) cat( "\n") for (i in 1:nrows) { # 1:length(imgs) ## need fname, fsize in imgs imgf <- as.character(imgdata$imgfile[i]) # imgs[i] imgf <- sub(outdir,"",imgf); imglab <- as.character(imgdata$imglab[i]) # imginfo[i] imgsynt <- as.character(imgdata$dlab[i]) # imgdlab[i] wch <- as.character(imgdata$wch[i]) wsizes <- wsizes + imgdata$wchrsize[i]# imgwsize[i] imglab <- gsub("\n","
\n",imglab); flab <- unlist(strsplit(imgf,"[./]"))[2] cat("") cat( maphref(wspp,wch) ) ##? add to gbb url: ;overview.x=mouse.y;seg_length=chrlen;span=syntenic_width ?? ## need javascript to get mouse x,y onclick cat("\"",flab,"\"",sep="")0) cat(" width=",imgwidth) cat(">") cat("") cat("
\n",imglab,"
\n",imgsynt,sep="") cat("\n") if (i %% 10 == 0) cat("\n") } wsizes <- sum(imgdata$wchrsize[!duplicated(imgdata$wch)] ) wunscaf <- wscafcount - nscafs wgenomemb <- round( wgenomesize / 1000000, digits=1) wunsizemb <- round( (wgenomesize - wsizes) / 1000000, digits=1) cat("\n", hreflink(sub(outdir,"",hmullerfile), paste(wspp,"Muller elements.")), "Unlisted remainder:", wunsizemb,"Mb", "of", wgenomemb,"Mb total genome", ", in", wunscaf, "segments", "\n") # cat("\n\n\n") # close(hout) } if(dohtml) { print(paste("html to",hmullerfile)) htmltable <- capture.output( printhtmlbyDchr()) # capt. drops newlines # htmltable <- gsub("","\n",htmltable); cat( tablehead( paste(wspp, "genome Muller elements (x ", dspp," synteny)"),"" ), htmltable, tablefoot(""), file=hmullerfile) ## then catenate all for new file allmuller <- c(allmuller,htmltable) # but for table wrappers print(paste("html to",hsectionfile)) htmltable2 <- capture.output( printhtmlbyimg()) # htmltable2 <- gsub("","\n",htmltable2); cat( tablehead( paste(wspp, " genome assembly segments (x ", dspp," synteny)"),"" ), htmltable2, tablefoot(""), file=hsectionfile) } } # end wspp in () print(paste("html to",allmullerfile)) alinks <- " " for (lspp in linkspp) { if(lspp == dspp) next alink <- paste("all","-",lspp,"-muller-",vers,".html",sep="") alinks <- paste(alinks, " Elements with", hreflink(sub(outdir,"",alink), paste(lspp,"synteny")) ) } cat( tablehead( paste("All Drosophila genomes Muller elements (x ", dspp," synteny)", "
See also ",alinks,""), paste("Note: these maps show large scale synteny from DNA matches.", "Weaker syntenic relations are not shown. No display doesn't mean lack of synteny.
") ), allmuller, tablefoot(""), file=allmullerfile )