apply - R: applying a function that returns a list across multiple columns of a data frame -
i trying apply function takes 2 inputs every combination of list:
> c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf') [1] "eas_maf" "amr_maf" "afr_maf" "eur_maf" "sas_maf"
to arrange values in each combination of 2 using combn
function:
> list <- combn(c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf'),2) > list [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] "eas_maf" "eas_maf" "eas_maf" "eas_maf" "amr_maf" "amr_maf" "amr_maf" "afr_maf" "afr_maf" "eur_maf" [2,] "amr_maf" "afr_maf" "eur_maf" "sas_maf" "afr_maf" "eur_maf" "sas_maf" "eur_maf" "sas_maf" "sas_maf"
the function calculates number of rows meet criteria , returns list:
sharedcalc.func <- function(pop1, pop2, table = varianttable){ s.count = sum(table[pop1]>0 & table[pop2]>0 & table['consequence'] == 'synonymous snv') ns.count = sum(table[pop1]>0 & table[pop2]>0 & table['consequence'] != 'synonymous snv') counts <- list("ns" = ns.count, "s" = s.count, "ns/s" = ns.count/s.count) return(counts) }
here example output function:
> sharedcalc.func('eas_maf', 'amr_maf') $ns [1] 59325 $s [1] 43434 $`ns/s` [1] 1.365865
to run function across list assumed apply
function appropriate. returns non-conformable arrays error:
> apply(list, 2, sharedcalc.func) error in fun(newx[, i], ...) : binary operation on non-conformable arrays
i tried outer
function , received same error:
> outer(list[1,], list[2,], sharedcalc.func) error in fun(x, y, ...) : binary operation on non-conformable arrays
i not sure why getting error. possibly due returning list function? have tried using lapply return list not work either. below dput of data:
> dput(head(varianttable)) structure(list(chrom = c("1", "1", "1", "1", "1", "1"), pos = c(69224l, 69428l, 69486l, 69487l, 69496l, 69521l), id = c("rs568964432", "rs140739101", "rs548369610", "rs568226429", "rs150690004", "rs553724620" ), ref = c("a", "t", "c", "g", "g", "t"), alt = c("t", "g", "t", "a", "a", "a"), af = c(0.000399361, 0.0189696, 0.000199681, 0.000399361, 0.000998403, 0.000399361), ac = c(2l, 95l, 1l, 2l, 5l, 2l), = c(5008l, 5008l, 5008l, 5008l, 5008l, 5008l), consequence = c("nonsynonymous snv", "nonsynonymous snv", "synonymous snv", "nonsynonymous snv", "nonsynonymous snv", "nonsynonymous snv"), gene = c("or4f5", "or4f5", "or4f5", "or4f5", "or4f5", "or4f5"), refgene_id = c("nm_001005484", "nm_001005484", "nm_001005484", "nm_001005484", "nm_001005484", "nm_001005484" ), aa_change = c("('d', 'v')", "('f', 'c')", "('n', 'n')", "('a', 't')", "('g', 's')", "('i', 'n')"), x0.fold_count = c(572l, 572l, 572l, 572l, 572l, 572l), x4.fold_count = c(141l, 141l, 141l, 141l, 141l, 141l), eas_maf = c(0, 0.003, 0.001, 0, 0, 0), amr_maf = c(0.0029, 0.036, 0, 0, 0.0014, 0.0029), afr_maf = c(0, 0.0015, 0, 0.0015, 0.003, 0), eur_maf = c(0, 0.0497, 0, 0, 0, 0), sas_maf = c(0, 0.0153, 0, 0, 0, 0), nonafr_n = c(309227l, 1128036l, 262551l, 0l, 309227l, 309227l), nonafr_weighted = c(0.0029, 0.0261704282487438, 0.001, 0, 0.0014, 0.0029)), .names = c("chrom", "pos", "id", "ref", "alt", "af", "ac", "an", "consequence", "gene", "refgene_id", "aa_change", "x0.fold_count", "x4.fold_count", "eas_maf", "amr_maf", "afr_maf", "eur_maf", "sas_maf", "nonafr_n", "nonafr_weighted" ), row.names = c(na, 6l), class = "data.frame")
try following:
l <- combn(c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf'),2) l [,1] [,2] [,3] [,4] [,5] [,6] [1,] "eas_maf" "eas_maf" "eas_maf" "eas_maf" "amr_maf" "amr_maf" [2,] "amr_maf" "afr_maf" "eur_maf" "sas_maf" "afr_maf" "eur_maf" [,7] [,8] [,9] [,10] [1,] "amr_maf" "afr_maf" "afr_maf" "eur_maf" [2,] "sas_maf" "eur_maf" "sas_maf" "sas_maf" mapply(sharedcalc.func, l[1,], l[2,]) eas_maf eas_maf eas_maf eas_maf amr_maf amr_maf amr_maf afr_maf ns 1 1 1 1 2 1 1 1 s 0 0 0 0 0 0 0 0 ns/s inf inf inf inf inf inf inf inf afr_maf eur_maf ns 1 1 s 0 0 ns/s inf inf
mapply
multivariate version of sapply
, used if want traverse multiple lists simultaneously.
as side-remark: bad idea overwrite built-in r functionality own objects. so, calling object list
bad idea, why changed l
in above code.
to keep column names, 1 this:
out <- mapply(sharedcalc.func, l[1,], l[2,]) setnames(data.frame(out), mapply(paste, l[1,], l[2,], sep="-")) eas_maf-amr_maf eas_maf-afr_maf eas_maf-eur_maf eas_maf-sas_maf ns 1 1 1 1 s 0 0 0 0 ns/s inf inf inf inf amr_maf-afr_maf amr_maf-eur_maf amr_maf-sas_maf afr_maf-eur_maf ns 2 1 1 1 s 0 0 0 0 ns/s inf inf inf inf afr_maf-sas_maf eur_maf-sas_maf ns 1 1 s 0 0 ns/s inf inf
Comments
Post a Comment