Drosophila species Gene Ontology matches
",
"",title,"
",
"Legend:
Blue bars: Mean gene count +/- 90% confidence interval
Red circle: expected value (population mean)
" )
}
itemrefs <-
function(gidlist) {
hrefs <- c("
",
"")
nr <- 0
gord <- c()
for (gi in gidlist) {
gilab <- gi # paste(gid,v)
if(exists("gocats"))
gilab <- paste( as.character(gocats[gocats[,1] == gi,3]),collapse=" ")
gord <- rbind(gord, c(gilab=gilab, gi=gi))
}
for (i in order(gord[,"gilab"])) {
gi <- gord[ i, "gi"]
gilab <- gord[ i, "gilab"]
hrefs <- c(hrefs, paste("| ",gilab," | ",sep="") )
nr <- nr+1
if(nr %% 5 == 0) hrefs <- c(hrefs," ")
}
hrefs <- c(hrefs," "," |
")
hrefs
}
bestgenes <-
function (gid, v, na.rm = FALSE)
{
xobs <- gop1[ gop1$GOID == gid & !is.na(gop1$GOID), c("species","query",v)]
if(nrow(xobs)==0) return (list(ids=c(), qdev=c(), xobs=c(), dbs=c()) )
# qs <- levels(factor(xobs$query))
# ## patch for missing data (should be there...) for v = ngenes
# ## for (qs) { for (species) { if xobs[spp,q] is missing, xobs.v <- 0 } }
# for (q in qs) { for (s in levels(gop1$species)) {
# if(sum(xobs$query == q & xobs$species == s)) xobs <- rbind(xobs, c(s,q,0))
# } }
qs <- levels(factor(xobs$query))
xobs <- xobs[order(xobs$species),]
gsmean <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]),
FUN=mean, na.rm = TRUE)
gidmean <- gidmeans[gidmeans$Goid == gid,v]
expect <- sppmeans[,v] + (gidmean - mean(sppmeans[,v]))
expdif <- gsmean$x - expect
# qdev <- data.frame( data=matrix(nrow=length(qs),ncol=4, ), row.names=qs)
nr <- length(qs)
qdev <- data.frame( mindev=rep(0,nr), obs=rep(0,nr), gsdev=rep(0,nr),expdev=rep(0,nr), row.names=qs)
for (q in qs) {
xo <- xobs[ xobs$query == q,v]
## want to minimize sum((xo - gsmean$x)^2) < sum((xo - expect)^2)
xogs <- sum((xo - gsmean$x)^2)
xoexp <- sum((xo - expect)^2)
# gsexp <- sum( ((xo - gidmean) - (gsmean$x - expect))^2)
# gsexp <- xogs * (xogs / xoexp)^2
gsexp <- xogs / xoexp ## this is it **
qdev[q,] <- list( mindev=gsexp, obs=sum(xo), gsdev=xogs, expdev=xoexp)
}
## err here ? qdev?
##names(qdev)<-c("mindev","obs","gsdev","expdev")
qd <- qdev[order(qdev$mindev),]
ids <- row.names( qd)
list(ids=ids, qdev=qd, xobs=xobs, dbs=levels(factor(gop1$DB)) )
}
genetable <-
function (gid, v, qdevbest=NULL, na.rm = FALSE)
{
## ? check bids? may be many proteomes
idurl <- idurls[[proteome]] #cgidurl
listurl <- listurls[[proteome]] #cglisturl
tabc <- "" #"\t"
if(!is.null(qdevbest)){
bids <- qdevbest$ids
xobs <- qdevbest$xobs
qdev <- qdevbest$qdev
dbs <- qdevbest$dbs
ng <- length(bids)
nq <- 0
for (q in bids) {
#if(nq > 10 || qdev[q,"mindev"]>0.95) break
if(nq > 10 || qdev[q,"mindev"]>1.0) break
nq <- nq+1
}
cat(gid, "Significant ",nq," of ",ng," total genes, contributing to ",v," deviation "," \n")
# add url to list of all gene reports ...
bq <- paste(bids[1:nq],sep="+",collapse="+")
bq <- gsub("-P.","",bq)
if (!is.null(listurl)) cat( hreflink(paste(listurl,bq,sep=""), bq), " \n")
cat(tablehead())
nq <- 0
for (q in bids) {
#if(nq > 10 || qdev[q,"mindev"]>0.95) break
if(nq > 10 || qdev[q,"mindev"]>1.0) break
qset <- (xobs$query == q)
bq <- gsub("-P.","",q) # only for fly CG id
gbq <- bq
if(nq==0) cat( " | | ","Query",tabc,paste( xobs[qset,"species"],collapse=tabc)," |
","\n",sep="" )
cat("| ")
aidurl <- idurl
if(charmatch("WBGene",q,nomatch = 0)==1) aidurl <- idurls["worm"] else {
if(charmatch("MGI:",q,nomatch = 0)==1) {
aidurl <- idurls["mouse"];
gbq <- sub("MGI:","",bq) # wildcard search in gbrowse with 'mgi:' fails, only does exact find
}
else {
if(charmatch("S0",q,nomatch = 0)==1) aidurl <- idurls["yeast"] else {
if(charmatch("CG",q,nomatch = 0)==1) aidurl <- idurls["fruitfly"]
}
}
}
cat( hreflink(paste(aidurl,bq,sep=""), bq))
# cat( tabc,paste(xobs[qset,v],collapse=tabc),"\n",sep="" )
## add map link to numbers ****
# | $1
hvals <- paste( '',
xobs[qset,v],"",sep="")
cat( tabc,paste(hvals,collapse=tabc),"\n",sep="" )
cat(" |
")
nq <- nq + 1
}
cat(tablefoot())
}
}
tablehead <-
function(title=NULL,preface="") {
tt <- ""
if(!is.null(title)) {
tshort <- sub(" *[({;\.].*","",title)
tt <- paste("\n",
"", tshort, "\n",
"\n",
"",title,"
", preface)
}
paste(tt,"\n")
}
tablefoot <-
function(footnote=NULL) {
if(is.null(footnote)) { paste("
\n") }
else { paste("\n",footnote,"\n\n") }
}
plotgenes <-
function(gid, v, qdevbest=NULL, noplot = FALSE, na.rm = FALSE)
{
gsmean <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]), FUN=mean, na.rm = TRUE)
#gsciw <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]),
# FUN=cim, ci=0.95, na.rm = TRUE)
expect <- sppmeans[,v] + (gidmeans[gidmeans$Goid == gid,v] - mean(sppmeans[,v]))
ispoisson <- FALSE ## (v == "ngenes"|v == "nparalog")
if(ispoisson) {
gsli <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]),
FUN=cipoisson, ci=0.95, tlower=TRUE, na.rm = TRUE)
gsui <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]),
FUN=cipoisson, ci=0.95, tlower=FALSE, na.rm = TRUE)
} else {
gsli <- aggregate(gop1[gop1$GOID == gid,v], list(Species = gop1[gop1$GOID == gid,"species"]),
FUN=cim, ci=0.95, na.rm = TRUE)
gsui <- gsli
}
if(noplot != TRUE) {
par(mar=c(4,4,1,1))
#plotCI( x=gsmean[,2], uiw=gsciw[,2], ylab=paste(v,gid,sep="-"), xlab="Species", gap=FALSE, col="black", barcol="blue", xaxt="n")
plotCI( x=gsmean[,2], uiw=gsui[,2], liw=gsli[,2], ylab=v, xlab="Species", gap=FALSE, col="black", barcol="blue", xaxt="n")
points(expect,pch=23,col="red")
axis(side=1, at=1:nrow(sppmeans), labels=as.character(sppmeans$Species), cex.axis=1.5)
plab <- gid # paste(gid,v)
if(exists("gocats")){
plab <-paste( gid, as.character(gocats[gocats[,1] == gid,3]),collapse=" ")
}
mtext( plab, side=3, line = -2 )
}
diff <- 1.1 * abs(gsmean[,2] - expect)
# sigdif <- max( 1.1 * diff - gsui[,2])
sigdif <- max( diff - gsli[,2], diff - gsli[,2])
sigdif
}
count <-
function (x, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, count, na.rm = na.rm)
else if (is.vector(x))
ifelse(na.rm, sum(!is.na(x)), length(x))
else if (is.data.frame(x))
sapply(x, count, na.rm = na.rm)
else count(as.vector(x), na.rm = na.rm)
}
cim <-
function (x, ci = 0.975, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, cim, ci = ci, na.rm = na.rm)
else if (is.vector(x)) {
nd <- ifelse(na.rm, sum(!is.na(x)), length(x))
qt(ci, nd) * sqrt(var(x, na.rm = na.rm) / nd)
}
else if (is.data.frame(x))
sapply(x, cim, ci = ci, na.rm = na.rm)
else
sapply(as.vector(x), cim, ci = ci, na.rm = na.rm)
}
cilog <-
function (x, ci = 0.975, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, cilog, ci = ci, na.rm = na.rm)
else if (is.vector(x)) {
nd <- ifelse(na.rm, sum(!is.na(x)), length(x))
qt(ci, nd) * sqrt(var(log(x), na.rm = na.rm) / nd)
}
else if (is.data.frame(x))
sapply(x, cilog, ci = ci, na.rm = na.rm)
else
sapply(as.vector(x), cilog, ci = ci, na.rm = na.rm)
}
meanlog <-
function (x, ci = 0.975, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, meanlog, ci = ci, na.rm = na.rm)
else if (is.vector(x)) {
mean(log(x))
}
else if (is.data.frame(x))
sapply(x, meanlog, ci = ci, na.rm = na.rm)
else
sapply(as.vector(x), meanlog, ci = ci, na.rm = na.rm)
}
#see cim: conf.intervals for mean of poisson distr., e.g. gene counts
cipoisson <-
function (x, ci = 0.975, tlower=TRUE, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, cipoisson, ci = ci, na.rm = na.rm)
else if (is.vector(x)) {
nd <- ifelse(na.rm, sum(!is.na(x)), length(x))
mn <- mean(x, na.rm = na.rm)
#? which is it? # psd <- sqrt(mn)
psd <- sqrt(mn/nd)
qt(ci, nd) * ppois(psd, mn, lower = tlower)
# psem <- sqrt(mn/nd)
}
else if (is.data.frame(x))
sapply(x, cipoisson, ci = ci, na.rm = na.rm)
else
sapply(as.vector(x), cipoisson, ci = ci, na.rm = na.rm)
}
sem <-
function (x, na.rm = FALSE)
{
if (is.matrix(x))
apply(x, 2, sem, na.rm = na.rm)
else if (is.vector(x)) {
nd <- ifelse(na.rm, sum(!is.na(x)), length(x))
sqrt(var(x, na.rm = na.rm) / nd)
}
else if (is.data.frame(x))
sapply(x, sem, na.rm = na.rm)
else
sapply(as.vector(x), sem, na.rm = na.rm)
}
# =========================================