#m6A peak length distribution
quantile(as.numeric(peaks$end) - as.numeric(peaks$start))

[1] "82 TWAS genes nearby dsRNAs predicted with antisense approach:"

[1] "9 TWAS genes nearby dsRNA predicted with editing clusters:"


twas_dsRNA[, "trait_type"] <- "blood"
twas_dsRNA$trait_type[twas_dsRNA$trait %in% other_traits]<- "non-blood"
ggplot(data.frame(twas_dsRNA), aes(x = m6aQTL_Z, y = gwas_Z)) +

geom_point() + facet_grid(.~trait_type) +
theme(axis.text=element_text(size=16), 
                axis.title=element_text(size=16),
                strip.text.x=element_text(size = 18, face="bold"),
                panel.background = element_rect(fill="white"), 
                panel.spacing.x = unit(2, "lines"),
                panel.spacing.y = unit(1, "lines"),
                panel.grid.major.y=element_line(colour="grey", linetype = "dotted"),
                panel.grid.major.x=element_line(colour="grey", linetype = "dotted"),
                panel.border = element_rect(fill = NA, 
                                            colour = "black"))


twas_sub<-twas_dsRNA[twas_dsRNA$trait_type == "immune", ]
model <-lm(twas_sub$gwas_Z ~ twas_sub$m6aQTL_Z)
summary(model)

Call:
lm(formula = twas_sub$gwas_Z ~ twas_sub$m6aQTL_Z)

Residuals:
   Min     1Q Median     3Q    Max 
-9.491 -4.359  2.479  4.657 12.854 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)
(Intercept)         0.7041     1.7989   0.391    0.702
twas_sub$m6aQTL_Z   0.3802     0.4104   0.926    0.371

Residual standard error: 6.957 on 13 degrees of freedom
Multiple R-squared:  0.06193,	Adjusted R-squared:  -0.01023 
F-statistic: 0.8582 on 1 and 13 DF,  p-value: 0.3711

[1] "The total number prioritized TWAS genes potentially form dsRNAs:"

[1] "Proportion of all the genes that can form dsRNAs: "


quantile(resultsTWAS$queryL)
hist(resultsTWAS$queryL, main = "", breaks=20, 
     xlab = "Gene lengths for all genes with overlapped targets",
     cex = 1.5, cex.axis = 1.5, cex.lab = 1.5)


head(overlaps)

DataFrame with 6 rows and 9 columns
                         gr        gene   m6aQTL_ID  m6aQTL_Z
                  <GRanges> <character> <character> <numeric>
1 chr16:31128534-31138618:+        KAT8   rs4527034      5.72
2 chr16:31128534-31138618:+        KAT8   rs4527034      5.72
3 chr21:45642428-45647025:-      ICOSLG   rs2070554     -3.48
4 chr21:45642428-45647025:-      ICOSLG   rs2070554     -3.48
5 chr19:19739832-19745177:-        GMIP    rs873870     -4.58
6 chr19:19739832-19745177:-        GMIP    rs873870     -4.58
                   trait     gwas_ID    gwas_Z                 gr.double
             <character> <character> <numeric>                 <GRanges>
1        Body Mass Index    rs749767    -6.079 chr16:31132570-31135107:+
2        Body Mass Index    rs749767    -6.079 chr16:31127726-31128688:-
3 Inflammatory Bowel D..   rs2838520   -10.110 chr21:45643735-45644567:-
4   Rheumatoid Arthritis   rs4819388    -5.050 chr21:45643735-45644567:-
5 Low Density Lipoprot..  rs16996148   -14.150 chr19:19741468-19744826:-
6      Total Cholesterol  rs16996148   -16.690 chr19:19741468-19744826:-
    gene_name
  <character>
1        KAT8
2           .
3      ICOSLG
4      ICOSLG
5       LPAR2
6       LPAR2


colnames(twas)

matchedAS	knownAS	prop
<int>	<int>	<dbl>
523	814	0.64

queryTranscript.gene_name	ref.gene_name	targets.gene_name
<chr>	<chr>	<chr>
PINK1	PINK1-AS	naPINK1
SIX3	SIX3-AS1	Six3OS
VAX2	ENSG00000258881	Vax2OS
VAX2	ATP6V1B1-AS1	Vax2OS
ZEB2	ZEB2-AS1	Zeb2 NAT
MKRN2	MKRN2OS	RAF1
MKRN2	RAF1	RAF1
SMAD5	SMAD5-AS1	DAMS
CDYL	CDYL-AS1	CDYL-AS
EMX2	EMX2OS	EMX2OS

	num_dsRNA	num_TWAS_genes	enrichment
	<dbl>	<dbl>	<dbl>
Allergy	0	7	0.00000
Allergy Eczema	5	20	0.78125
Alzheimers	0	1	0.00000
Asthma	0	2	0.00000
Autoimmune Traits (Sure)	0	1	0.00000
Bipolar Disorder	2	2	3.12500

	trait	dsRNA_genes
	<chr>	<chr>
1	Body Mass Index	TRUB2
2	Body Mass Index	KAT8
5	Rheumatoid Arthritis	RUFY3
6	Rheumatoid Arthritis	TXNDC11
7	Rheumatoid Arthritis	ICOSLG
15	Crohn's Disease	GON4L
16	Crohn's Disease	ICOSLG
17	High Density Lipoprotein	TGOLN2
18	High Density Lipoprotein	BAZ1B
20	Triglycerides	BAZ1B
21	Triglycerides	PCSK7
22	Triglycerides	KAT8
23	White Blood Cell Count	THEMIS2
24	White Blood Cell Count	GNA12
25	White Blood Cell Count	DENND3
26	White Blood Cell Count	WAC-AS1
27	White Blood Cell Count	DDX55
28	White Blood Cell Count	PHF11
29	White Blood Cell Count	RABEP1
30	White Blood Cell Count	SLC9A3R1
35	Hemoglobin Concentration	GTPBP2
36	Hemoglobin Concentration	ZNF839
37	Hemoglobin Concentration	SLC7A5
38	Hemoglobin Concentration	SMG9
39	Platelet Count	THEMIS2
40	Platelet Count	GNA12
41	Platelet Count	OGDH
42	Platelet Count	BAZ1B
43	Platelet Count	LAMTOR4
44	Platelet Count	MEPCE
45	Platelet Count	FERMT3
46	Platelet Count	NEAT1
47	Platelet Count	ZNF839
48	Platelet Count	HM13
57	Granulocyte Count	TGOLN2
58	Granulocyte Count	DENND3
59	Granulocyte Count	WAC-AS1
60	Granulocyte Count	RABEP1
61	Granulocyte Count	SLC9A3R1
62	Neutrophil Count	TGOLN2
63	Neutrophil Count	DENND3
64	Neutrophil Count	WAC-AS1
65	Neutrophil Count	RABEP1
76	Menopause Age	NSUN4
77	Menopause Age	MEPCE
78	Menopause Age	RABEP1

Identifying transcripts that can form double strand RNAs¶

Procedure¶

Validate with labeled antisense transcripts from previous GENCODE release¶

Validate with the three pairs of cis-NATs tested in Qin Li's nature paper¶

Validate with reported antisense transcripts and their targets in the review paper¶

Testing on TWAS prioritized genes with two approaches¶

Examine whether dsRNA transcripts have m6A peaks nearby?¶

Directional effects of m6A QTL on targeted genes that have dsRNAs nearby¶

Repeat the analysis with genes that show evidence of colocalization¶

Enrichment for dsRNA genes among TWAS prioritized genes¶

Length distribution for dsRNA genes¶

An alternative approach for dsRNA predictions¶

Using genes associated with predicted dsRNAs from the review paper¶

Distribution for the predicted folding free energy per nucleotide¶

Enrichment of TWAS genes predicted to form dsRNA¶

Directional effects of m6A QTL on genes associated with dsRNA¶