TwoSampleMR: 孟德尔分析 (II)
最编程
2024-03-02 16:09:43
...
又是代码跟练哦~
示例一:以BMI和冠心病为例
加载包
library(TwoSampleMR)
library(ggplot2)
获取暴露和结局数据
bmi_exp_dat <- extract_instruments(outcomes = 'ieu-a-2')
chd_out_dat <- extract_outcome_data(snps = bmi_exp_dat$SNP, outcomes = 'ieu-a-7')
extract_instruments
函数参数如下:
extract_instruments(
outcomes,
p1 = 5e-08,
clump = TRUE,
p2 = 5e-08,
r2 = 0.001,
kb = 10000,
access_token = ieugwasr::check_access_token(),
force_server = FALSE
)
可以根据自己的需要调p值和r2、kb。
F值和R^2值
至于F值和R^2值的计算,之前已经说过今天为了系统复现MR分析的所有步骤,再放一下下:
把hyperten_liberal
换成自己的暴露数据即可
# Calculate R2 and F stat for exposure data
# Liberal hypertension F stat
hyperten_liberal$r2 <- (2 * (hyperten_liberal$beta.exposure^2) * hyperten_liberal$eaf.exposure * (1 - hyperten_liberal$eaf.exposure)) /
(2 * (hyperten_liberal$beta.exposure^2) * hyperten_liberal$eaf.exposure * (1 - hyperten_liberal$eaf.exposure) +
2 * hyperten_liberal$N * hyperten_liberal$eaf.exposure * (1 - hyperten_liberal$eaf.exposure) * hyperten_liberal$se.exposure^2)
hyperten_liberal$F <- hyperten_liberal$r2 * (hyperten_liberal$N - 2) / (1 - hyperten_liberal$r2)
hyperten_liberal_meanF <- mean(hyperten_liberal$F)
# Stringent hypertension F stat
hyperten_stringent$r2 <- (2 * (hyperten_stringent$beta.exposure^2) * hyperten_stringent$eaf.exposure * (1 - hyperten_stringent$eaf.exposure)) /
(2 * (hyperten_stringent$beta.exposure^2) * hyperten_stringent$eaf.exposure * (1 - hyperten_stringent$eaf.exposure) +
2 * hyperten_stringent$N * hyperten_stringent$eaf.exposure * (1 - hyperten_stringent$eaf.exposure) * hyperten_stringent$se.exposure^2)
hyperten_stringent$F <- hyperten_stringent$r2 * (hyperten_stringent$N - 2) / (1 - hyperten_stringent$r2)
hyperten_stringent_meanF <- mean(hyperten_stringent$F)
harmonise
dat <- harmonise_data(bmi_exp_dat, chd_out_dat)
孟德尔随机化
一旦暴露和结果数据被harmonised
之后,我们就有了暴露和结果性状中每个工具 SNP 的效应值和标准误差。我们可以利用这些信息进行孟德尔随机化啦~
res <- mr(dat)
res
查看MR方法清单
methods <- mr_method_list()
可以看到,可以做异质性检验的方法有5种。
此外,经过mr
那一步之后,我们就得到了b、se和p值,但是如果要画森林图,我们还差置信区间的数值。怎么办呢?
生成带有 95% 置信区间的几率比
generate_odds_ratios(res)
敏感性分析
异质性统计
mr_heterogeneity(dat)
执行 MR Egger 并返回截距值
mr_pleiotropy_test(dat)
返回结果如下:
id.exposure id.outcome outcome
1 ieu-a-2 ieu-a-7 Coronary heart disease || id:ieu-a-7
exposure egger_intercept se pval
1 Body mass index || id:ieu-a-2 -0.001719304 0.003985962 0.6674266
MR Egger 回归中的截距项是 MR 分析结果是否受定向水平多效性影响的有用指标。
单 SNP 分析
要利用每个 SNPs 单独获得 MR 估计值,我们可以采取以下方法:
res_single <- mr_singlesnp(dat)
固定效应元分析法
res_single <- mr_singlesnp(dat, single_method = "mr_meta_fixed")
系数比率法
res_single <- mr_singlesnp(dat, all_method = "mr_two_sample_ml")
mr_singlesnp()
函数也使用所有可用的 SNP 计算完整的 MR,默认情况下使用 IVW 和 MR Egger 方法。可以这样指定:
mr_singlesnp(
dat,
parameters = default_parameters(),
single_method = "mr_wald_ratio",
all_method = c("mr_ivw", "mr_egger_regression")
)
Leave-one-out 分析
res_loo <- mr_leaveoneout(dat)
默认情况下,使用的是反方差加权法,但可以通过使用 method
参数进行更改。
画图
散点图
res <- mr(dat)
p1 <- mr_scatter_plot(res, dat)
保存:
ggsave(p1[[1]], file = "filename.pdf", width = 7, height = 7)
ggsave(p1[[1]], file = "filename.png", width = 7, height = 7)
森林图
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
p2[[1]]
Leave-one-out图
res_loo <- mr_leaveoneout(dat)
p3 <- mr_leaveoneout_plot(res_loo)
p3[[1]]
漏斗图
res_single <- mr_singlesnp(dat)
p4 <- mr_funnel_plot(res_single)
p4[[1]]
示例二:多个暴露对应一个结局
获取暴露和结局数据
exp_dat <- extract_instruments(outcomes = c(2, 100, 1032, 104, 1, 72, 999))
table(exp_dat$exposure)
chd_out_dat <- extract_outcome_data(
snps = exp_dat$SNP,
outcomes = 7
)
数字对应的ID:
2 -> ieu-a-2
100 -> ieu-a-100
1032 -> ieu-a-1032
104 -> ieu-a-104
1 -> ieu-a-1
72 -> ieu-a-72
999 -> ieu-a-999
table(exp_dat$exposure)
Adiponectin || id:ieu-a-1 Body fat || id:ieu-a-999
14 10
Body mass index || id:ieu-a-2 Hip circumference || id:ieu-a-100
79 2
Waist-to-hip ratio || id:ieu-a-72 Waist circumference || id:ieu-a-104
31 1
如果这里用的是自己的数据,
expdat <- mv_extract_exposures_local(
filenames_exposure,
sep = " ",
phenotype_col = "Phenotype",
snp_col = "SNP",
beta_col = "beta",
se_col = "se",
eaf_col = "eaf",
effect_allele_col = "effect_allele",
other_allele_col = "other_allele",
pval_col = "pval",
units_col = "units",
ncase_col = "ncase",
ncontrol_col = "ncontrol",
samplesize_col = "samplesize",
gene_col = "gene",
id_col = "id",
min_pval = 1e-200,
log_pval = FALSE,
pval_threshold = 5e-08,
clump_r2 = 0.001,
clump_kb = 10000,
harmonise_strictness = 2
)
harmonise&mr
dat2 <- harmonise_data(
exposure_dat = exp_dat,
outcome_dat = chd_out_dat
)
res <- mr(dat2)
这里用多变量MR分析可以这样:
mvdat <- mv_harmonise_data(exposure_dat, outcome_dat)
res <- mv_multiple(mvdat)
画图
res <- subset_on_method(res) # 默认情况下,根据 IVW 方法(>1 个工具 SNP)或 Wald 比率方法(1 个工具 SNP)进行子集。
res <- sort_1_to_many(res, b = "b", sort_action = 4) # 这将按效应大小的递减对结果进行排序(图中顶部的效应最大)。
res <- split_exposure(res) # 为了保持 Y 轴标签的整洁,我们将暴露 ID 标签排除在曝光列之外
res$weight <- 1/res$se
min(exp(res$b - 1.96*res$se)) # identify value for 'lo' in forest_plot_1_to_many
max(exp(res$b + 1.96*res$se)) # identify value for 'up' in forest_plot_1_to_many
森林图
好丑的图!!!
看能不能改进一下~
res <- mr(dat2)
tmp <- generate_odds_ratios(res)
tmp$` ` <- paste(rep(" ", 22), collapse = " ") ##生成一个空行
# 生成OR (95% CI)
tmp$`OR (95% CI)` <- sprintf("%.2f (%.2f to %.2f)",#sprintF返回字符和可变量组合
tmp$or,tmp$or_lci95,tmp$or_uci95)
森林图改进版
p1 <- forest(tmp[,c(4:6,14,13,9)], ##这里根据自己想要的内容选择
est = tmp$or, #效应值
lower = tmp$or_lci95, #可信区间下限
upper = tmp$or_uci95, #可信区间上限
sizes = tmp$se,
ci_column = 5, #在那一列画森林图,要选空的那一列
ref_line = 1,
arrow_lab = c("No CHD", "CHD"),
xlim = c(0, 4),
ticks_at = c(0.5, 1, 2, 3),
footnote = "")
p1 ##样子还不错啊
保存图片:
require(ggplotify)
p2 <- as.ggplot(p2)
class(p2)
ggsave(p2,file="forest.pdf",width = 10,height = 10)
示例三:一个暴露对应多个结局
获取暴露和结局数据
exp_dat <- extract_instruments(outcomes = 2) # extract instruments for BMI
ao <- available_outcomes()
ao <- ao[ao$category == "Disease", ] # identify diseases
ao <- ao[which(ao$ncase > 100), ]
dis_dat <- extract_outcome_data(
snps = exp_dat$SNP,
outcomes = ao$id
)
dat3 <- harmonise_data(
exposure_dat = exp_dat,
outcome_dat = dis_dat
)
res <- mr(dat3, method_list = c("mr_wald_ratio", "mr_ivw"))
res <- split_outcome(res)
后续绘图思路可参考示例二~
无缝衔接MendelianRandomization包
# Convert to the `MRInput` format for the MendelianRandomization package:
dat2 <- dat_to_MRInput(dat)
# This produces a list of `MRInput` objects that can be used with the MendelianRandomization functions, e.g.
MendelianRandomization::mr_ivw(dat2[[1]])
# Alternatively, convert to the `MRInput` format but also obtaining the LD matrix for the instruments
dat2 <- dat_to_MRInput(dat, get_correlation = TRUE)
MendelianRandomization::mr_ivw(dat2[[1]], correl = TRUE)
MR-MoE:机器学习方法
# Extact instruments for BMI
exposure_dat <- extract_instruments("ieu-a-2")
# Get corresponding effects for CHD
outcome_dat <- extract_outcome_data(exposure_dat$SNP, "ieu-a-7")
# Harmonise
dat <- harmonise_data(exposure_dat, outcome_dat)
# Load the downloaded RData object. This loads the rf object
load("rf.rdata") ##这个数据下载➡dropbox.com/s/5la7y38od95swcf
# Obtain estimates from all methods, and generate data metrics
res <- mr_wrapper(dat)
# MR-MoE - predict the performance of each method
res_moe <- mr_moe(res, rf)
参数说明:
- `estimates` (results from each MR)
- `heterogeneity` (results from heterogeneity for different filtering approaches)
- `directional_pleiotropy` (egger intercepts)
- `info` (metrics used to generate MOE)
下一篇: 群体遗传学的统计指标--群体核苷酸多样性