apply - R: applying a function that returns a list across multiple columns of a data frame -


i trying apply function takes 2 inputs every combination of list:

> c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf') [1] "eas_maf" "amr_maf" "afr_maf" "eur_maf" "sas_maf" 

to arrange values in each combination of 2 using combn function:

> list <- combn(c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf'),2) > list      [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]      [,9]      [,10]     [1,] "eas_maf" "eas_maf" "eas_maf" "eas_maf" "amr_maf" "amr_maf" "amr_maf" "afr_maf" "afr_maf" "eur_maf" [2,] "amr_maf" "afr_maf" "eur_maf" "sas_maf" "afr_maf" "eur_maf" "sas_maf" "eur_maf" "sas_maf" "sas_maf" 

the function calculates number of rows meet criteria , returns list:

sharedcalc.func <- function(pop1, pop2, table = varianttable){   s.count = sum(table[pop1]>0 & table[pop2]>0 &                    table['consequence'] == 'synonymous snv')   ns.count = sum(table[pop1]>0 & table[pop2]>0 &                    table['consequence'] != 'synonymous snv')   counts <- list("ns" = ns.count, "s" = s.count, "ns/s" = ns.count/s.count)   return(counts) } 

here example output function:

> sharedcalc.func('eas_maf', 'amr_maf') $ns [1] 59325  $s [1] 43434  $`ns/s` [1] 1.365865 

to run function across list assumed apply function appropriate. returns non-conformable arrays error:

> apply(list, 2, sharedcalc.func) error in fun(newx[, i], ...) : binary operation on non-conformable arrays 

i tried outer function , received same error:

> outer(list[1,], list[2,], sharedcalc.func) error in fun(x, y, ...) : binary operation on non-conformable arrays 

i not sure why getting error. possibly due returning list function? have tried using lapply return list not work either. below dput of data:

> dput(head(varianttable)) structure(list(chrom = c("1", "1", "1", "1", "1", "1"), pos = c(69224l,  69428l, 69486l, 69487l, 69496l, 69521l), id = c("rs568964432",  "rs140739101", "rs548369610", "rs568226429", "rs150690004", "rs553724620" ), ref = c("a", "t", "c", "g", "g", "t"), alt = c("t", "g", "t",  "a", "a", "a"), af = c(0.000399361, 0.0189696, 0.000199681, 0.000399361,  0.000998403, 0.000399361), ac = c(2l, 95l, 1l, 2l, 5l, 2l), = c(5008l,  5008l, 5008l, 5008l, 5008l, 5008l), consequence = c("nonsynonymous snv",  "nonsynonymous snv", "synonymous snv", "nonsynonymous snv", "nonsynonymous snv",  "nonsynonymous snv"), gene = c("or4f5", "or4f5", "or4f5", "or4f5",  "or4f5", "or4f5"), refgene_id = c("nm_001005484", "nm_001005484",  "nm_001005484", "nm_001005484", "nm_001005484", "nm_001005484" ), aa_change = c("('d', 'v')", "('f', 'c')", "('n', 'n')", "('a', 't')",  "('g', 's')", "('i', 'n')"), x0.fold_count = c(572l, 572l, 572l,  572l, 572l, 572l), x4.fold_count = c(141l, 141l, 141l, 141l,  141l, 141l), eas_maf = c(0, 0.003, 0.001, 0, 0, 0), amr_maf = c(0.0029,  0.036, 0, 0, 0.0014, 0.0029), afr_maf = c(0, 0.0015, 0, 0.0015,  0.003, 0), eur_maf = c(0, 0.0497, 0, 0, 0, 0), sas_maf = c(0,  0.0153, 0, 0, 0, 0), nonafr_n = c(309227l, 1128036l, 262551l,  0l, 309227l, 309227l), nonafr_weighted = c(0.0029, 0.0261704282487438,  0.001, 0, 0.0014, 0.0029)), .names = c("chrom", "pos", "id",  "ref", "alt", "af", "ac", "an", "consequence", "gene", "refgene_id",  "aa_change", "x0.fold_count", "x4.fold_count", "eas_maf", "amr_maf",  "afr_maf", "eur_maf", "sas_maf", "nonafr_n", "nonafr_weighted" ), row.names = c(na, 6l), class = "data.frame") 

try following:

l <- combn(c('eas_maf', 'amr_maf', 'afr_maf', 'eur_maf', 'sas_maf'),2) l      [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [1,] "eas_maf" "eas_maf" "eas_maf" "eas_maf" "amr_maf" "amr_maf" [2,] "amr_maf" "afr_maf" "eur_maf" "sas_maf" "afr_maf" "eur_maf"      [,7]      [,8]      [,9]      [,10]     [1,] "amr_maf" "afr_maf" "afr_maf" "eur_maf" [2,] "sas_maf" "eur_maf" "sas_maf" "sas_maf"  mapply(sharedcalc.func, l[1,], l[2,])      eas_maf eas_maf eas_maf eas_maf amr_maf amr_maf amr_maf afr_maf ns   1       1       1       1       2       1       1       1       s    0       0       0       0       0       0       0       0       ns/s inf     inf     inf     inf     inf     inf     inf     inf          afr_maf eur_maf ns   1       1       s    0       0       ns/s inf     inf     

mapply multivariate version of sapply, used if want traverse multiple lists simultaneously.

as side-remark: bad idea overwrite built-in r functionality own objects. so, calling object list bad idea, why changed l in above code.


to keep column names, 1 this:

out <- mapply(sharedcalc.func, l[1,], l[2,]) setnames(data.frame(out), mapply(paste, l[1,], l[2,], sep="-"))      eas_maf-amr_maf eas_maf-afr_maf eas_maf-eur_maf eas_maf-sas_maf ns                 1               1               1               1 s                  0               0               0               0 ns/s             inf             inf             inf             inf      amr_maf-afr_maf amr_maf-eur_maf amr_maf-sas_maf afr_maf-eur_maf ns                 2               1               1               1 s                  0               0               0               0 ns/s             inf             inf             inf             inf      afr_maf-sas_maf eur_maf-sas_maf ns                 1               1 s                  0               0 ns/s             inf             inf 

Comments

Popular posts from this blog

angular - Ionic slides - dynamically add slides before and after -

minify - Minimizing css files -

Add a dynamic header in angular 2 http provider -