Introduction

This document describes an estimate of the positive predictive value (PPV) of variant discovery for all variants in the gene KCNH2 on long QT syndrome type 2 (LQT2). Additional details on the methods used are published Kroncke et al. 2020 PLOS Genetics and at the following website: (https://oates.app.vumc.org/vancart/SCN5A/SCN5A-report.html). We use observed and estimated probability of LQT2 diagnosis for all known KCNH2 variants as a way to assess the per variant PPV for variant discovery. Our objective is to develope a prior estimate of the per variant PPV on LQT2 which incorporates structure, function, and in silico predictors. We use these in silico and in vitro data to generate a Bayesian prior estimate of the per variant PPV since these data can be generated in a lab setting, unlike heterozygotes/carriers of KCNH2 variants which may or may not exist. The final posterior estimate combines this derived prior and clinically phenotyped heterozygotes/carriers.

Part 1: Calculate probability of LQT2 diagnosis and LQT2 Probability Density using Various Subsets of the Literature and Cohort Data

All Literature Variants

# mut_type has includes type and isoform for all variants in the literature and in the cohort.
mut_type <- mut_type[mut_type$mut_type == "missense",]

# Cohort carrier/heterozygote counts and variant IDs. 
# Here we select only the missense variants (in-frame indels are also included as "missense")
cohort.data <- cohort.data[cohort.data$mut_type == "missense" & cohort.data$total_carriers > 0,]

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# Here, heterozygotes/carriers from gnomAD are removed to test their influence on performance
# Remove comments in next two lines and complete subsequent evaluation to assess influence of gnomAD on
# calculations.
#d$total_carriers <- d$total_carriers - d$gnomAD # test influence of gnomAD
#d$unaff <- d$total_carriers - d$lqt2 # test influence of gnomAD

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" when evaluating ROC-AUC of n=1 variants from the literature

LQT2 empirical LQTS diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot probability density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2329 -0.1653 -0.0232  0.0763  0.7561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2332     0.0135   17.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2323 on 789 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model)#p*(1-p)

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.54041284982238   beta0 =  1.77720039413882"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

# Plot literature observed LQT2 penetrance versus residue number
m<- d %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(d[,"penetrance_lqt2"]~as.numeric(d[,"resnum"]), span = 0.15)
plot(d$resnum, d$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 Penetrance Estimate")
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate LQTS probability densities

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication.

# NOTE: adjust 3rd argument given to funcdist, "d,", to d[d$total_carriers>1,] when 
# evaluating ROC of total_carriers == 1

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    l<-length(h2.covariates[h2.covariates$var == mut_type[rec, "var"],"var"])
    for (m in 1:l){
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")][m,] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d, h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
    }
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]
m<- h2.covariates %>% 
  select(resnum, pmean = lqt2_dist) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(h2.covariates[,"lqt2_dist"]~as.numeric(h2.covariates[,"resnum"]), span = 0.15)
plot(h2.covariates$resnum, h2.covariates$lqt2_dist, xlab ="Residue", ylab = "LQT2 Penetrance Density", xlim=c(0,1160))
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate Weighted Spearman Correlation Coefficients

Evaluate weighted Spearman correlations coefficients between observed LQT2 diagnosis probability in the literature and various potential predictors

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so we can estimate LQT2 diagnosis probability for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}


yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score")
tmp<-d[!is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  0.319565106  0.264  20
## 2        hm_tailPeak penetrance_lqt2   1000 -0.564758635  0.000 100
## 3        hm_vhalfact penetrance_lqt2   1000  0.147612664  0.293  68
## 4      hm_vhalfinact penetrance_lqt2   1000  0.669666858  0.001  29
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.734693865  0.007  12
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.375296727  0.025  42
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.025  34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000  83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061932733  0.710  41
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173738913  0.475  26
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.407   6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.975  14
## 13         pph2_prob penetrance_lqt2   1000  0.392919452  0.000 741
## 14     provean_score penetrance_lqt2   1000 -0.585053878  0.000 741
## 15        blast_pssm penetrance_lqt2   1000 -0.250452543  0.000 741
## 16          pamscore penetrance_lqt2   1000 -0.116758403  0.033 750
## 17   aasimilaritymat penetrance_lqt2   1000  0.018401459  0.719 750
## 18         lqt2_dist penetrance_lqt2   1000  0.583417065  0.000 748
## 19       revel_score penetrance_lqt2   1000  0.664409462  0.000 750
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000  0.690504615  0.097 10
## 2        hm_tailPeak penetrance_lqt2   1000 -0.428867090  0.004 66
## 3        hm_vhalfact penetrance_lqt2   1000 -0.093938286  0.659 35
## 4      hm_vhalfinact penetrance_lqt2   1000  0.629956576  0.049 15
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.772690799  0.294  4
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.228864954  0.368 17
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.022 34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000 83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061096836  0.748 40
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173348992  0.438 25
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.424  6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.980 14
## 13         pph2_prob penetrance_lqt2   1000  0.332107233  0.008 81
## 14     provean_score penetrance_lqt2   1000 -0.455185289  0.000 81
## 15        blast_pssm penetrance_lqt2   1000  0.055910704  0.653 81
## 16          pamscore penetrance_lqt2   1000  0.058560727  0.655 83
## 17   aasimilaritymat penetrance_lqt2   1000  0.063049344  0.610 83
## 18         lqt2_dist penetrance_lqt2   1000  0.755839461  0.000 83
## 19       revel_score penetrance_lqt2   1000  0.612183017  0.000 83

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

Literature Variants Where N = 1 Variants are removed

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 2,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

LQT2 empirical diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot LQTS probability density versus residue

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2323 -0.1648  0.0000  0.0000  0.7567 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.23253    0.01893   12.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3236 on 399 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.552007535426229   beta0 =  1.82190517994969"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate LQTS probability densities

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d[d$total_carriers>1,], h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

Literature and Cohort Combined (for final predictions)

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.cohort.data[lit.cohort.data$mut_type == "missense" & lit.cohort.data$isoform=="A",]

# add all possible variants
allvariants<-data.frame(unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"var"]), stringsAsFactors = F)
names(allvariants)<-"var"
allvariants$isoform<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"isoform"])
allvariants$mut_type<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"mut_type"])
a<-merge(d,allvariants,all = T)
a[is.na(a$total_carriers),"total_carriers"] <- 0
a[is.na(a$lqt2),"lqt2"] <- 0
d<-a

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

LQT2 empirical diagnosis probability prior

Use observed LQT2 diagnosis probability to calculate “LQTS probability density” as described in previous publication. Plot diagnosis probability density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36142 -0.25639 -0.03599  0.06351  0.61328 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.36170    0.01309   27.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2574 on 950 degrees of freedom
##   (6240 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.899678647955565   beta0 =  1.58771266711751"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate LQTS probability densities and annotate function and structural location

With the updated empirical priors applied to carrier counts, calculate “LQTS probability density” as described in previous publication. !!! NOTE: since these data are truly the “best estimates” we include all variants in the calculation such that unique scores are by residue not by variant.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
h2.covariates<-h2.covariates[h2.covariates$isoform=="A",]
ld<-0
for(rec in seq(2,1159,1)){
  #print(rec)
  ld <- funcdist(rec, "var", d[!is.na(d$total_carriers) & d$total_carriers>0 & d$isoform == "A" & d$mut_type != "nonsense",], h2dist, "penetrance_lqt2", "sigmoid", 7)
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var) & !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist"] <- ld[1]
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var)& !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist_weight"] <-ld[2] 
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

# annotate structural location (hotspot) 
d$Structure<-NA
d[!is.na(d$lqt2_dist) & d$lqt2_dist<0.1,"Structure"]<-"Non_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.1 & d$lqt2_dist<0.4,"Structure"]<-"Mild_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.4,"Structure"]<-"Hotspot"

# annotate functional perturbation
d$Function<-NA
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.25,"Function"]<-"Severe Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.5 & d$ht_tailPeak>=0.25,"Function"]<-"Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.5 & d$ht_tailPeak<0.75,"Function"]<-"LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.75 & d$ht_tailPeak<1.25,"Function"]<-"Normal"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=1.25,"Function"]<-"GOF"

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 diagnosis probability)

LQT2 diagnosis probability in Cohort Data for validation

load("prepared_data.RData")
load("lit_all_data_checkpoint.RData")

cohort.data<-cohort.data[!is.na(cohort.data$resnum) & cohort.data$resnum<2000,]

cohort.data$weight = 1-1/(0.01+cohort.data$total_carriers)
cohort.data$penetrance_lqt2 <- cohort.data$lqt2/cohort.data$total_carriers

for (variant in cohort.data$var) {
  if (!is.na(match(variant, d$var))) {
     cohort.data[cohort.data$var == variant, c("p_mean_w","alpha", "beta", "lqt2_lit", "unaff_lit", "total_carriers_lit", "lqt2_dist")] <- d[match(variant, d$var), c("p_mean_w", "alpha", "beta", "lqt2", "unaff", "total_carriers", "lqt2_dist")]
  }
}

m<- cohort.data %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(cohort.data[,"penetrance_lqt2"]~as.numeric(cohort.data[,"resnum"]), span = 0.15)
plot(cohort.data$resnum, cohort.data$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 diagnosis probability", col = "red", pch=19)
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

# Add covariates to cohort dataset
cohort.data <- merge(cohort.data, h2.covariates, all = TRUE)
cohort.data <- unique(cohort.data) 


# Save cohort data
save(cohort.data, file = "cohort_checkpoint.RData")

Part 2: Coverage plots

Bootstrap and get the coverage rate

  1. Use the observed diagnosis probability from as the TRUE diagnosis probability, generate n binomial observations

  2. Use the final EM algorithm posterior as the prior for Beta-Binomial, incorporate data from step (1), generate the posterior distribution, and get 95% credible interval.

  3. Check whether the interval cover the true diagnosis probability from Step 1.

  4. Repeat Step 1 to Step 3 N times to get the coverage rate.

Bootstrap function

BootsCoverage <- function(var,n=100,N=1000,true){
  
  # var: variant name
  # n: number of subjects in the new data
  # N: number of Bootstrap

  # extract the "true" diagnosis probability
  true.p <- d[d$var==var,true]
  
  # generate binomial data 
  event <- rbinom(N,n,true.p)

  # get the posterior credible interval
  alpha <- d$alpha[which(d$var==var)] 
  beta <- d$beta[which(d$var==var)] 

  new.alpha <- alpha + event
  new.beta <- beta + n - event

  lb <- qbeta(0.025,new.alpha,new.beta)
  ub <- qbeta(0.975,new.alpha,new.beta)

  # change lb to floor of nearest 0.1
  lb <- floor(lb*20)/20
  ub <- ceiling(ub*20)/20
  
  return(sum(lb < true.p & ub > true.p)/N) 
}

Plot coverage

Observed diagnosis probability as the “true” diagnosis probability

The coverage plot where observed diagnosis probability is the “true” diagnosis probability and one hundred new observations are added is shown below.

Part 3: Variance explained

Pearson R^2 and Spearman Rho Against EM Posterior from Cohort

# load data, literature + gnomAD + cohort
load("lit_all_data_checkpoint.RData")
load("cohort_checkpoint.RData")

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}

# Select covariates from isoform "A" in cohort dataset
cohort.data <- cohort.data[!is.na(cohort.data$total_carriers) & cohort.data$isoform == "A" & cohort.data$mut_type == "missense",]
cohort.data <- cohort.data[,!names(cohort.data) %in%  "cardiacboost"]
cohort.data <- unique(cohort.data)

tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score", 'p_mean_w')
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  -0.13782555  0.755   8
## 2        hm_tailPeak penetrance_lqt2   1000  -0.45322142  0.011  46
## 3        hm_vhalfact penetrance_lqt2   1000   0.17406867  0.431  34
## 4      hm_vhalfinact penetrance_lqt2   1000   0.58824513  0.104  13
## 5  hm_recovfrominact penetrance_lqt2   1000  -0.01793348  0.978   8
## 6   hm_taudeact_fast penetrance_lqt2   1000   0.09551958  0.749  26
## 7          ht_ssPeak penetrance_lqt2   1000  -0.72712827  0.033  10
## 8        ht_tailPeak penetrance_lqt2   1000  -0.74007175  0.000  38
## 9        ht_vhalfact penetrance_lqt2   1000  -0.06920233  0.774  21
## 10     ht_vhalfinact penetrance_lqt2   1000  -0.42544340  0.180  13
## 11 ht_recovfrominact penetrance_lqt2   1000   0.31291231  0.511   4
## 12  ht_taudeact_fast penetrance_lqt2   1000   0.40856426  0.177  13
## 13         pph2_prob penetrance_lqt2   1000   0.29946329  0.000 255
## 14     provean_score penetrance_lqt2   1000  -0.40683895  0.000 255
## 15        blast_pssm penetrance_lqt2   1000  -0.08826634  0.232 255
## 16          pamscore penetrance_lqt2   1000  -0.03980324  0.612 255
## 17   aasimilaritymat penetrance_lqt2   1000   0.09491418  0.207 255
## 18         lqt2_dist penetrance_lqt2   1000   0.48764962  0.000 255
## 19       revel_score penetrance_lqt2   1000   0.46266065  0.000 255
## 20          p_mean_w penetrance_lqt2   1000   0.53708337  0.000 255
rm(tmp)
rm(t)
i=0
tmp<-data.frame()
for (x in xList){
  i=i+2
  tmp[i-1,"Feature"]<-x
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  tmp[i,"Feature"]<-paste(x,"_cohort")
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i-1,"Spearman"]<-foo$t0
  tmp[i-1,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i-1,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i-1,"n"]<-length(t[,x])
  t<-cohort.data[!is.na(cohort.data[,x]) & cohort.data$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i,"Spearman"]<-foo$t0
  tmp[i,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i,"n"]<-length(t[,x])
}

forestplot(tmp$Feature,tmp$Spearman,tmp$Spearman_low,tmp$Spearman_high)

# reset "tmp" variable
tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
# Weighted R2 between observed LQT2 penetrance and post-test probability
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.3281077
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2155762 0.4505238
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 diagnosis probability density versus observed cohort LQT2 diagnosis probability")
## [1] "LQT2 diagnosis probability density versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.2513792
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1458213 0.3669153
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 diagnosis probability")
## [1] "REVEL score versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.2316343
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1338631 0.3560617
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-cohort.data[!is.na(cohort.data$ht_tailPeak),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000  0.972864918  0.300  4
## 2        hm_tailPeak penetrance_lqt2   1000 -0.251150899  0.258 28
## 3        hm_vhalfact penetrance_lqt2   1000 -0.005613182  0.988 14
## 4      hm_vhalfinact penetrance_lqt2   1000 -0.587879495  0.300  6
## 5  hm_recovfrominact penetrance_lqt2   1000 -1.000000000  0.000  2
## 6   hm_taudeact_fast penetrance_lqt2   1000  0.154267674  0.797  6
## 7          ht_ssPeak penetrance_lqt2   1000 -0.752439389  0.022 11
## 8        ht_tailPeak penetrance_lqt2   1000 -0.750024456  0.000 39
## 9        ht_vhalfact penetrance_lqt2   1000  0.107075664  0.690 20
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.499958288  0.088 14
## 11 ht_recovfrominact penetrance_lqt2   1000  0.312912311  0.494  4
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.558320083  0.085 11
## 13         pph2_prob penetrance_lqt2   1000  0.366819707  0.040 38
## 14     provean_score penetrance_lqt2   1000 -0.292846391  0.125 38
## 15        blast_pssm penetrance_lqt2   1000  0.172315938  0.356 38
## 16          pamscore penetrance_lqt2   1000 -0.002273073  0.987 39
## 17   aasimilaritymat penetrance_lqt2   1000  0.247567799  0.171 39
## 18         lqt2_dist penetrance_lqt2   1000  0.666753766  0.000 39
## 19       revel_score penetrance_lqt2   1000  0.709843622  0.000 39
## 20          p_mean_w penetrance_lqt2   1000  0.763687875  0.000 39
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.5640682
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2843941 0.7730929
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygously measured peak tail current versus observed cohort LQT2 diagnosis probability")
## [1] "Heterozygously measured peak tail current versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4953738
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2951170 0.6951972
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed cohort LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4031363
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1663152 0.6468372
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 diagnosis probability")
## [1] "REVEL score versus observed cohort LQT2 diagnosis probability"
foo$t0
## [1] 0.4568439
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2465301 0.7131307

Variance explained from literature dataset

load("lit_all_data_checkpoint.RData")

tmp<-d[!is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.na(d$lqt2_dist) & !is.na(d$revel_score),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.8168311
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.7677140 0.8596818
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed literature LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.4902899
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3887100 0.5914084
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 diagnosis probability")
## [1] "REVEL versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.3920345
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3249512 0.4562831
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.nan(d$penetrance_lqt2) & !is.na(d$lqt2_dist),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability")
## [1] "EM estimated LQT2 diagnosis probability versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.8884971
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.8053722 0.9438262
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 probability density versus observed literature LQT2 diagnosis probability")
## [1] "LQT2 probability density versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.5945224
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.4220788 0.7419627
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygous peak tail current versus observed literature LQT2 diagnosis probability")
## [1] "Heterozygous peak tail current versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.3390998
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1814097 0.5573245
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 diagnosis probability")
## [1] "REVEL versus observed literature LQT2 diagnosis probability"
foo$t0
## [1] 0.4286368
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2418773 0.6251751

Part 4: ROC and AUC plots

ROCs of observed cohort LQT2 diagnosis probability with 0.5 as cutoff all variants.

AUCs from ROCs observed cohort LQT2 diagnosis probability at multiple cutoffs

AUCs from ROCs predicting EM posteriors

## [1] "263   218"
## [1] "263   216"
## [1] "263   214"
## [1] "263   212"
## [1] "263   205"
## [1] "263   193"
## [1] "263   193"
## [1] "263   190"
## [1] "263   190"
## [1] "263   160"
## [1] "263   158"
## [1] "263   153"
## [1] "263   135"
## [1] "263   135"
## [1] "263   127"

ROCs of observed cohort LQT2 diagnosis probability with single observation variants.

## Area under the curve: 0.6648
## Area under the curve: 0.5209
## Area under the curve: 0.7175
## Area under the curve: 0.5836
## Area under the curve: 0.8093
## Area under the curve: 0.8228

ROCs of observed literature LQT2 diagnosis probability with 0.5 as cutoff.

AUCs from ROCs observed literature LQT2 diagnosis probability at multiple cutoffs

## [1] "741   234"
## [1] "741   231"
## [1] "741   229"
## [1] "741   223"
## [1] "741   218"
## [1] "741   211"
## [1] "741   208"
## [1] "741   205"
## [1] "741   205"
## [1] "741   193"
## [1] "741   192"
## [1] "741   190"
## [1] "741   183"
## [1] "741   179"
## [1] "741   174"

ROC and AUC for N = 1 variants from the literature.

AUC for N = 1 variants from the literature.

## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"
## [1] "338   94"

Part 5: Forest plots

The code to produce the forest plots is shown in the code chunk.

rm(d)

# Loading combined literature, gnomAD, and cohort dataset 
load("lit_all_data_checkpoint.RData")
d<-d[d$total_carriers>0 & d$mut_type!="nonsense" & d$isoform=="A",]
d<-d[!is.na(d$var),]

mean.post <- (d$alpha + d$lqt2)/(d$alpha+d$beta+d$total_carriers)
mean.prior <- (d$alpha)/(d$alpha+d$beta)

lower.prior <- qbeta(0.025,d$alpha,d$beta)
higher.prior <- qbeta(0.975,d$alpha,d$beta) 

lower.post <- qbeta(0.025,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2)
higher.post <- qbeta(0.975,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2) 

forest.data.post <- data.frame(variant = d$var, mean=mean.post,
                          lower=lower.post, higher=higher.post, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.post$group<-"posterior"

forest.data.prior <- data.frame(variant = d$var, mean=mean.prior,
                          lower=lower.prior, higher=higher.prior, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.prior$group<-"prior"

forest.data<-rbind(forest.data.post, forest.data.prior)
forest.data<-forest.data[order(forest.data$resnum, forest.data$variant),]
forest.data$label<-""
forest.data[forest.data$group=="posterior","label"]<-paste(forest.data[forest.data$group=="posterior","lqt2"], "/", forest.data[forest.data$group=="posterior","tc"])

#define colours for dots and bars
dotCOLS = c("#866D4B","#000000")
barCOLS = c("#FFFFFF","#FFFFFF")

plotg <- function(a,b){
  fd<-forest.data[a:b,]
  png( paste("Plots/", a, "-",b,"pics.png",sep=""),res=300,height=10,width=10,units="in")
  p<-ggplot(fd, aes(x=reorder(variant,-resnum), y=mean, ymin=lower, ymax=higher, col=group, fill=group)) + 
  geom_text(data=fd, aes(x=reorder(variant,-resnum), label=label)) +
#specify position here
  geom_linerange(size=2,position=position_dodge(width = 1)) +
  geom_hline(yintercept=1, lty=1) +
  geom_hline(yintercept=0, lty=1) +
#specify position here too
  geom_point(size=2, shape=21, colour="white", stroke = 0.5,position=position_dodge(width = 1)) +
  scale_fill_manual(values=barCOLS)+
  scale_color_manual(values=dotCOLS)+
  scale_y_continuous(name="LQT2 Diagnosis Probability", limits = c(0, 1)) +
  coord_flip() +
  theme_minimal()
  print(p)
  dev.off() 
}

sapply(0:30*50+1,function(x) plotg(x,x+49) )

The forests plots are pasted below, where gold bars are posteriors, and black bars are EM priors. Variant name is given to the left and the number of LQT2 affected / total heterozygote count is given above the dot indicating the posterior mean.