## goassocplot9.R : 29dec06 ## dassocplot.R - from R assocplot ## mods by d.gilbert, for genomes x GO categories associatons of gene matches ## see also goblfunc.R library("gmodels") # CrossTable # url.hspmap <- "/species/cgi-bin/gbrowse/" url.hspmap <- "http://melon.bio.indiana.edu/cgi-bin/gbrowse/" url.GOlookup <- "http://eugenes.org/fbservlet/goreport/id=" # gocats <- NULL read.gocat <- function( file="../gocatall.tab" # goid,parent,class,term ) { goc <- read.delim(file,colClasses="character") rownames(goc) <- goc[,1] goc } godetails <- function( id, parent = TRUE, grandparent = TRUE ) { ##BAD-rereads each call; gocats not global?? what is a dang R global / local var??? ##BAD##if(!exists("gocats") || is.null(gocats)) gocats <- read.gocat() ##if(is.null(gocats)) return () gor <- gocats[id,] if(is.null(gor)) return () #else if(parent) godet <- list(goid=gor$goid,class=gor$class,term=gor$term,parent=gor$parent) #else # godet <- list(goid=gor$goid,class=gor$class,term=gor$term) if(grandparent) { pid <- gor$goid llpar <- lpar <- NULL while(!is.na(pid)) { llpar <- lpar; lpar <- pid; prow <- gocats[pid,] if (is.na(prow) || prow$parent == "") break # pars <- c(pars,pid) # pid <- unlist(strsplit( prow$parent, ","))[1] pid <- strsplit( prow$parent, ",")[[1]][1] } if(!is.null(llpar)) godet <- c(godet, grandparent=gocats[ llpar, c("goid","term")]) } return (godet) } dassocrowplot <- function( drow, erow, chirow, x.w=NULL, y.h=NULL, ymax=NULL, colrs = c("green","red"), inwidth = 6, inheight = 0.75, doheader = FALSE, file= NULL, outdevice="PNG" ) { if(is.null(x.w)) x.w <- apply(erow, 1, max) if(is.null(y.h)) y.h <- apply(drow, 2, max) - apply(drow, 2, min) ## need ymaxmax param here ... x.delta <- mean(x.w) * 0.1 # space y.delta <- 0 ## mean(y.h) * 0.8 # space x.base <- x.w[1]/2 ## - x.delta # instead of 0 #xlim <- c(x.base, sum(x.w) + (NROW(drow)-2) * x.delta) xlim <- c(x.base, sum(x.w) + (NROW(drow)) * x.delta - x.w[NROW(x.w)]/2) # xlim <- c(x.base, sum(x.w) + NROW(drow) * x.delta) if(is.null(ymax)) ymax <- sum(y.h) ## fix y: only 1 row ylim <- c(0, ymax + y.delta) # inwidth <- 8.5 # inheight <- 12.0 in2dot <- 100 #80 #100 # minimum for png?? would like smaller but get draw errors # if(nrow(x)>12) inwidth <- 9.5 # 8.5 if (outdevice == "PNG" ) { #file <- paste(file,".png",sep="") png(file = file, width = round(in2dot*inwidth), height = round(in2dot*inheight), pointsize=12) } else if (outdevice == "pdf") { #file <- paste(file,".pdf",sep="") pdf(file = file, width = inwidth, height = inheight, pointsize= 9) } # par( mar=c(1,1,1,1)) if(doheader) { par( mar=c(0,0,2,0)) ; ylim[2] <- 0 } # par( mar=c(5,0,0,0)) else { par( mar=c(0,0,0,0)) } par( xpd = FALSE) # clip to plot plot.new() plot.window( xlim , ylim, log = "") ## x adj # x.m <- cumsum(x.w + x.delta) # x.r <- cumsum(x.w + x.delta) # x.m <- (c(0, x.r[-NROW(drow)]) + x.r)/2 x.r <- cumsum(x.w + x.delta) x.m <- (c(0, x.r[-NROW(drow)]) + x.r)/2 y.u <- cumsum(y.h + y.delta) y.m <- y.u - apply(pmax(drow, 0), 2, max) - y.delta/2 z <- expand.grid(x.m, y.m) colv <- colrs[1 + (drow < 0)] if (!is.null(chirow)) { cpan <- c("#FF0000", "#C94F4F", "#AF6F6F", "#BEBEBE", "#6FAF6F", "#4FC94F", "#00FF00") rv <- 4 + ( (chirow > 8) + (chirow > 4) + 1) * ( ((drow < 0) * -2) + 1 ) colv <- cpan[rv] } if(doheader) { rlabs= sub("dpulex","dpulx",sub("celegans","cele",rownames(drow))) # shorten # axis(1, pos=ylim[1], at = x.m, labels = rlabs, tick = FALSE) axis(3, pos=ylim[2], at = x.m, labels = rlabs, tick = FALSE) } else { abline( h = y.m, lty = 5, lwd = 0.4) # ugly midline rect( z[, 1] - erow/2, z[, 2], z[, 1] + erow/2, z[, 2] + drow, col = colv) } if (!dev.interactive()) dev.off() } dassoctextrow <- function( drow, erow, chirow, x.w=NULL, y.h=NULL, ymax=NULL) { # y.u <- cumsum(y.h) # y.m <- y.u - apply(pmax(drow, 0), 2, max) # # nplus <- y.m[i] > 0 * # nminus <- y.m[i] < 0 * } dassochtml <- function (x, htmlheader = NULL, doplot = TRUE, gourl = NULL, outdir = "") { if (length(dim(x)) != 2) stop("'x' must be a 2-d contingency table") if (any(x < 0) || any(is.na(x))) stop("all entries of 'x' must be nonnegative and finite") if ((n <- sum(x)) == 0) stop("at least one entry of 'x' must be positive") f <- x[, rev(1:NCOL(x))] e <- outer(rowSums(f), colSums(f), "*")/n d <- (f - e)/sqrt(e) e <- sqrt(e) CST <- chisq.test( f, correct = FALSE,) # F not X chires <- ((CST$expected - CST$observed)^2)/CST$expected d[is.nan(d)] <- 0 xall.w <- apply(e, 1, max) yall.h <- apply(d, 2, max) - apply(d, 2, min) ## need ymaxmax param here ... nr <- ncol(f) nc <- nrow(f) labc <- rownames(f) labr <- colnames(f) headf <- paste( "assoc-header.png",sep="") showwidth <- 400 # draw 600? if(!is.null(htmlheader)) cat(htmlheader) cat('\n\n', '\n', '\n', sep="" ) # cat('\n\n') # cat('\n',sep="") # cat('\n') # cat('\n') cat( colhead) last_goclass <- NULL headimg <- headf roworder <- nr:1 ir <- 0 for (r in roworder) { ir <- ir+1 href <- ""; goid <- "null" goid <- labr[r] if(charmatch("GO:",goid,nomatch = 0)==0 && exists("gocats")) { gid <- gocats[ gocats[,"term"] == goid,"goid"] if(!is.null(gid)) goid <- gid } if(!is.null(goid)) href <- paste("\n ",sep="") godet <- godetails( goid) if(is.list(godet) && !is.na(godet$class)) { if(!is.null(last_goclass) && (godet$class != last_goclass)) cat(colhead) last_goclass <- godet$class } cat('') cat('\n',sep="") imgf <- paste( "assoc-", sub(":","",goid),".png",sep="") # gvar/ngenes- ? boxht <- 50 # what ? leave to drawplot ? if( doplot) { drow <- as.matrix( d[,r]) erow <- as.matrix( e[,r]) chirow <- as.matrix( chires[,r]) if(!is.null(headimg)) { dassocrowplot( drow, erow, chirow, x.w=xall.w, y.h=yall.h[r], doheader = TRUE, file= paste( outdir, headimg, sep=""), inheight=0.40) headimg <- NULL } dassocrowplot( drow, erow, chirow, x.w=xall.w, y.h=yall.h[r], file= paste( outdir, imgf, sep=""), inheight=0.50) # change to pixht,pixwidth } cat('\n',sep="") cat('\n',sep="") ## add GO details on right of pic cat('\n') cat('\n') } cat('\n') cat('\n',sep="") cat('') cat('\n') cat('
\n') cat('\n') colh <- paste("
", paste(labc,collapse="",sep=""), "
\n", sep="") colhead <- paste( '
GO ID   ', colh, '', 'Term & Grouping
GO ID   ', # colh, # paste(labc,collapse=" "), "
\n", # '', # '
Term & Grouping
',href,labr[r],' ',' ') # godet <- godetails( goid) if(is.list(godet)) { cat(godet$term,"",godet$class,":",godet$grandparent.term,"") } cat('
  ','', #"
\n", colh, # "
\n", paste(labc,collapse=" "), '
\n') cat('\n') } #--------------- # library("gmodels") # for crosstable goassocplot <- function ( dxtabs, crossfile = NULL, htmlheader = "

Gene matches x Species association

\n", gourl = url.GOlookup, chilimit = 1.5, GOgrouporder = TRUE, docrosstab = TRUE ) { outdevice <- .Device # "PNG" is good default outdir <- "" # dxtabs <- xtabs(n_protid ~ goterm + species, dpgocp5, drop.unused.levels=TRUE) aspp <- colnames(dxtabs)[1] if (! is.null(crossfile)) { plotfile <- crossfile # fixme outdir <- sub("/[^/]+$","/",crossfile) } else { plotfile <- paste(aspp,"-GOassociation",sep="") } # screen out indifferent items CST <- chisq.test(as.matrix(dxtabs), correct = FALSE,) chires <- ((CST$expected - CST$observed)^2)/CST$expected keepall <- (chilimit <= 0) rkeep <- c() for (i in 1:nrow(CST$residuals)) { chir <- chires[i,] if(keepall) { rkeep <- c(rkeep, all(!is.na(chir))) ### keep all but NaN's } else { rkeep <- c(rkeep, all(!is.na(chir)) & sum(chir>chilimit) > 1) } } aplotx <- dxtabs[rkeep,] rowlist <- row.names(aplotx) noshowlist <- row.names(dxtabs[!rkeep,]) ## reorder here by GO grouping; see below ## GOroworder <- NULL if (GOgrouporder) { ordset <- c() for (goid in rowlist) { goord <- goid godet <- godetails( goid) if(is.list(godet)) { gc <- ifelse(godet$class=="C","Z",godet$class) goord <- paste(gc,":",godet$grandparent.term,":",goid,sep="") } ordset <- c(ordset,goord) } GOroworder <- order(ordset, na.last=TRUE) aplotx <- aplotx[GOroworder,] # note: t(aplotx) used below } ## add some more stats here? or rewrite CrossTable to do that? ## e.g. chi-sum per species if(docrosstab) { capture.output( CrossTable(as.matrix(aplotx), expected = TRUE, dnn=c("goterm","species"), sresid = TRUE, prop.r = FALSE, prop.c = FALSE, prop.t = FALSE, format="SPSS"), cat("\n","No sig. difference for: ",paste(noshowlist,collapse=", "),"\n"), file = paste(plotfile,"-crosstab.txt",sep="") ) } # #--- add TEXT summary table of groups x species # -- make from CrossTable output; perl script # Species: # dmel dana dpse dmoj # # + # + # GO:11111 ..+.....+.....-.....- # - # - # + # + # GO:22222 ..-.....-.....+.....+ # - - # - #--- HTML table of groups x species, with assoc. plot per group row htmlout <- capture.output( dassochtml( t(aplotx), outdir=outdir, htmlheader = htmlheader, gourl = gourl, ) ) hfile <- paste(plotfile,".html",sep="") cat('\n',file=hfile) cat( htmlout, sep="\n", file=hfile, append=TRUE) if(docrosstab) { lnk <- paste( sub("^.*/","",plotfile), "-crosstab.txt", sep="") cat('
Find
crosstable statistics here.
', file=hfile, append=TRUE) } if(length(noshowlist)>0) { cat("
\n","No significant deviations for n=",length(noshowlist)," categories.
\n ", ## paste(noshowlist,collapse=", "),"
\n", # list can be loooooooong .... file=hfile, append=TRUE) } cat('\n', file=hfile, append=TRUE) }