We retrieve data from KEGG Website for
further gene or gene set enrichment analysis.
setwd("D:/R_wordir/API_ret/KEGG_API/")Add package:
library("httr") library("readr") library("curl") ## Using libcurl 7.64.1 with Schannel ## ## Attaching package: 'curl' ## The following object is masked from 'package:readr': ## ## parse_date ## The following object is masked from 'package:httr': ## ## handle_resetBuild API syntax
baseurl <- "https://rest.kegg.jp/" ## obtain organism abbreviation in KEGG database KEGG_org <- read.delim("https://rest.kegg.jp/list/organism",header = F) class(KEGG_org) ## [1] "data.frame" head(KEGG_org) ## V1 V2 V3 ## 1 T01001 hsa Homo sapiens (human) ## 2 T01005 ptr Pan troglodytes (chimpanzee) ## 3 T02283 pps Pan paniscus (bonobo) ## 4 T02442 ggo Gorilla gorilla gorilla (western lowland gorilla) ## 5 T01416 pon Pongo abelii (Sumatran orangutan) ## 6 T03265 nle Nomascus leucogenys (northern white-cheeked gibbon) ## V4 ## 1 Eukaryotes;Animals;Vertebrates;Mammals ## 2 Eukaryotes;Animals;Vertebrates;Mammals ## 3 Eukaryotes;Animals;Vertebrates;Mammals ## 4 Eukaryotes;Animals;Vertebrates;Mammals ## 5 Eukaryotes;Animals;Vertebrates;Mammals ## 6 Eukaryotes;Animals;Vertebrates;Mammals ## Obtain all kegg pathway name of human hsa_pathway <- read.delim("https://rest.kegg.jp/list/pathway/hsa",header = F) class(hsa_pathway) ## [1] "data.frame" head(hsa_pathway) ## V1 V2 ## 1 path:hsa00010 Glycolysis / Gluconeogenesis - Homo sapiens (human) ## 2 path:hsa00020 Citrate cycle (TCA cycle) - Homo sapiens (human) ## 3 path:hsa00030 Pentose phosphate pathway - Homo sapiens (human) ## 4 path:hsa00040 Pentose and glucuronate interconversions - Homo sapiens (human) ## 5 path:hsa00051 Fructose and mannose metabolism - Homo sapiens (human) ## 6 path:hsa00052 Galactose metabolism - Homo sapiens (human) ## Obtain pathway and genes of human PATHWAYID2GENEID <- read.delim("https://rest.kegg.jp/link/hsa/pathway",header = F) class(PATHWAYID2GENEID) ## [1] "data.frame" head(PATHWAYID2GENEID) ## V1 V2 ## 1 path:hsa00010 hsa:10327 ## 2 path:hsa00010 hsa:124 ## 3 path:hsa00010 hsa:125 ## 4 path:hsa00010 hsa:126 ## 5 path:hsa00010 hsa:127 ## 6 path:hsa00010 hsa:128Integrate pathway and pathwaytogeneid and transfrom that to list format change pathway to id list
pathIDs <- unique(PATHWAYID2GENEID$V1) PATHWAYIDs_GENEIDs_list <- lapply(pathIDs, function(x){ substring(as.vector(PATHWAYID2GENEID[PATHWAYID2GENEID$V1==x,"V2"]),5)} ) head(PATHWAYIDs_GENEIDs_list) ## [[1]] ## [1] "10327" "124" "125" "126" "127" "128" "130" "130589" ## [9] "131" "160287" "1737" "1738" "2023" "2026" "2027" "217" ## [17] "218" "219" "2203" "221" "222" "223" "224" "226" ## [25] "229" "230" "2538" "2597" "26330" "2645" "2821" "3098" ## [33] "3099" "3101" "387712" "3939" "3945" "3948" "441531" "501" ## [41] "5105" "5106" "5160" "5161" "5162" "5211" "5213" "5214" ## [49] "5223" "5224" "5230" "5232" "5236" "5313" "5315" "55276" ## [57] "55902" "57818" "669" "7167" "80201" "83440" "84532" "8789" ## [65] "92483" "92579" "9562" ## ## [[2]] ## [1] "1431" "1737" "1738" "1743" "2271" "3417" "3418" "3419" "3420" ## [10] "3421" "4190" "4191" "47" "48" "4967" "50" "5091" "5105" ## [19] "5106" "5160" "5161" "5162" "55753" "6389" "6390" "6391" "6392" ## [28] "8801" "8802" "8803" ## ## [[3]] ## [1] "132158" "2203" "221823" "226" "229" "22934" "230" "2539" ## [9] "25796" "2821" "414328" "51071" "5211" "5213" "5214" "5226" ## [17] "5236" "55276" "5631" "5634" "6120" "64080" "6888" "7086" ## [25] "729020" "8277" "84076" "8789" "9104" "9563" ## ## [[4]] ## [1] "10327" "10720" "10941" "231" "27294" "2990" "51084" "51181" ## [9] "54490" "54575" "54576" "54577" "54578" "54579" "54600" "54657" ## [17] "54658" "54659" "55277" "57016" "574537" "6120" "6652" "729020" ## [25] "729920" "7358" "7360" "7363" "7364" "7365" "7366" "7367" ## [33] "79799" "9365" "9942" ## ## [[5]] ## [1] "197258" "2203" "226" "229" "230" "231" "26007" "2762" ## [9] "29925" "29926" "3098" "3099" "3101" "3795" "4351" "5207" ## [17] "5208" "5209" "5210" "5211" "5213" "5214" "5372" "5373" ## [25] "55556" "57016" "57103" "6652" "7167" "7264" "80201" "8789" ## [33] "8790" ## ## [[6]] ## [1] "130589" "231" "2538" "2548" "2582" "2584" "2592" "2595" ## [9] "2645" "2683" "2717" "2720" "3098" "3099" "3101" "3906" ## [17] "3938" "5211" "5213" "5214" "5236" "55276" "57016" "57818" ## [25] "6476" "7360" "80201" "8704" "8972" "92579" "93432" ## check pathway name and pathway2ID name identical(pathIDs,hsa_pathway$V1) ## [1] TRUE ## add name to the list brk <- unlist(lapply(gregexpr(" -",hsa_pathway$V2), function(x){ x[1] })) names(PATHWAYIDs_GENEIDs_list) <- substring(hsa_pathway$V2,1,last = brk) head(PATHWAYIDs_GENEIDs_list) ## $`Glycolysis / Gluconeogenesis ` ## [1] "10327" "124" "125" "126" "127" "128" "130" "130589" ## [9] "131" "160287" "1737" "1738" "2023" "2026" "2027" "217" ## [17] "218" "219" "2203" "221" "222" "223" "224" "226" ## [25] "229" "230" "2538" "2597" "26330" "2645" "2821" "3098" ## [33] "3099" "3101" "387712" "3939" "3945" "3948" "441531" "501" ## [41] "5105" "5106" "5160" "5161" "5162" "5211" "5213" "5214" ## [49] "5223" "5224" "5230" "5232" "5236" "5313" "5315" "55276" ## [57] "55902" "57818" "669" "7167" "80201" "83440" "84532" "8789" ## [65] "92483" "92579" "9562" ## ## $`Citrate cycle (TCA cycle) ` ## [1] "1431" "1737" "1738" "1743" "2271" "3417" "3418" "3419" "3420" ## [10] "3421" "4190" "4191" "47" "48" "4967" "50" "5091" "5105" ## [19] "5106" "5160" "5161" "5162" "55753" "6389" "6390" "6391" "6392" ## [28] "8801" "8802" "8803" ## ## $`Pentose phosphate pathway ` ## [1] "132158" "2203" "221823" "226" "229" "22934" "230" "2539" ## [9] "25796" "2821" "414328" "51071" "5211" "5213" "5214" "5226" ## [17] "5236" "55276" "5631" "5634" "6120" "64080" "6888" "7086" ## [25] "729020" "8277" "84076" "8789" "9104" "9563" ## ## $`Pentose and glucuronate interconversions ` ## [1] "10327" "10720" "10941" "231" "27294" "2990" "51084" "51181" ## [9] "54490" "54575" "54576" "54577" "54578" "54579" "54600" "54657" ## [17] "54658" "54659" "55277" "57016" "574537" "6120" "6652" "729020" ## [25] "729920" "7358" "7360" "7363" "7364" "7365" "7366" "7367" ## [33] "79799" "9365" "9942" ## ## $`Fructose and mannose metabolism ` ## [1] "197258" "2203" "226" "229" "230" "231" "26007" "2762" ## [9] "29925" "29926" "3098" "3099" "3101" "3795" "4351" "5207" ## [17] "5208" "5209" "5210" "5211" "5213" "5214" "5372" "5373" ## [25] "55556" "57016" "57103" "6652" "7167" "7264" "80201" "8789" ## [33] "8790" ## ## $`Galactose metabolism ` ## [1] "130589" "231" "2538" "2548" "2582" "2584" "2592" "2595" ## [9] "2645" "2683" "2717" "2720" "3098" "3099" "3101" "3906" ## [17] "3938" "5211" "5213" "5214" "5236" "55276" "57016" "57818" ## [25] "6476" "7360" "80201" "8704" "8972" "92579" "93432"generate GMT file format for GSEA or specific analysis
we build a function write_gmt that can transform pathway2geneid_list
to GMT format.
# set_ls: gene set in list class # out: output file name and directory # desc: the description of gene set as list, sometimes is NA or url if there is no # description write_gmt <- function(set_ls, out, desc=list()){ filedir <- file(description = out,open = "wt") lapply(names(set_ls), function(name){ descp <- ifelse(is.null(desc[[name]]),"KEGG_website",desc[[name]]) outline <- paste0(c(name,descp,set_ls[[name]]),collapse = "t") writeLines(outline,con = filedir) }) close(filedir) } write_gmt(PATHWAYIDs_GENEIDs_list,out = "./KEGG_hsa.gmt")