栏目分类:
子分类:
返回
文库吧用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
文库吧 > IT > 软件开发 > 后端开发 > Java

KEGG

Java 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

KEGG

We retrieve data from KEGG Website for
further gene or gene set enrichment analysis.

setwd("D:/R_wordir/API_ret/KEGG_API/")
Add package:
library("httr")
library("readr")
library("curl")

## Using libcurl 7.64.1 with Schannel

## 
## Attaching package: 'curl'

## The following object is masked from 'package:readr':
## 
##     parse_date

## The following object is masked from 'package:httr':
## 
##     handle_reset
Build API syntax
baseurl <- "https://rest.kegg.jp/"

## obtain organism abbreviation in KEGG database
KEGG_org <- read.delim("https://rest.kegg.jp/list/organism",header = F)
class(KEGG_org)

## [1] "data.frame"

head(KEGG_org)

##       V1  V2                                                  V3
## 1 T01001 hsa                                Homo sapiens (human)
## 2 T01005 ptr                        Pan troglodytes (chimpanzee)
## 3 T02283 pps                               Pan paniscus (bonobo)
## 4 T02442 ggo   Gorilla gorilla gorilla (western lowland gorilla)
## 5 T01416 pon                   Pongo abelii (Sumatran orangutan)
## 6 T03265 nle Nomascus leucogenys (northern white-cheeked gibbon)
##                                       V4
## 1 Eukaryotes;Animals;Vertebrates;Mammals
## 2 Eukaryotes;Animals;Vertebrates;Mammals
## 3 Eukaryotes;Animals;Vertebrates;Mammals
## 4 Eukaryotes;Animals;Vertebrates;Mammals
## 5 Eukaryotes;Animals;Vertebrates;Mammals
## 6 Eukaryotes;Animals;Vertebrates;Mammals

## Obtain all kegg pathway name of human
hsa_pathway <- read.delim("https://rest.kegg.jp/list/pathway/hsa",header = F)
class(hsa_pathway)

## [1] "data.frame"

head(hsa_pathway)

##              V1                                                              V2
## 1 path:hsa00010             Glycolysis / Gluconeogenesis - Homo sapiens (human)
## 2 path:hsa00020                Citrate cycle (TCA cycle) - Homo sapiens (human)
## 3 path:hsa00030                Pentose phosphate pathway - Homo sapiens (human)
## 4 path:hsa00040 Pentose and glucuronate interconversions - Homo sapiens (human)
## 5 path:hsa00051          Fructose and mannose metabolism - Homo sapiens (human)
## 6 path:hsa00052                     Galactose metabolism - Homo sapiens (human)

## Obtain pathway and genes of human
PATHWAYID2GENEID <- read.delim("https://rest.kegg.jp/link/hsa/pathway",header = F)
class(PATHWAYID2GENEID)

## [1] "data.frame"

head(PATHWAYID2GENEID)

##              V1        V2
## 1 path:hsa00010 hsa:10327
## 2 path:hsa00010   hsa:124
## 3 path:hsa00010   hsa:125
## 4 path:hsa00010   hsa:126
## 5 path:hsa00010   hsa:127
## 6 path:hsa00010   hsa:128
Integrate pathway and pathwaytogeneid and transfrom that to list format change pathway to id list
pathIDs <- unique(PATHWAYID2GENEID$V1)
PATHWAYIDs_GENEIDs_list <- lapply(pathIDs, function(x){
    substring(as.vector(PATHWAYID2GENEID[PATHWAYID2GENEID$V1==x,"V2"]),5)}
)
head(PATHWAYIDs_GENEIDs_list)

## [[1]]
##  [1] "10327"  "124"    "125"    "126"    "127"    "128"    "130"    "130589"
##  [9] "131"    "160287" "1737"   "1738"   "2023"   "2026"   "2027"   "217"   
## [17] "218"    "219"    "2203"   "221"    "222"    "223"    "224"    "226"   
## [25] "229"    "230"    "2538"   "2597"   "26330"  "2645"   "2821"   "3098"  
## [33] "3099"   "3101"   "387712" "3939"   "3945"   "3948"   "441531" "501"   
## [41] "5105"   "5106"   "5160"   "5161"   "5162"   "5211"   "5213"   "5214"  
## [49] "5223"   "5224"   "5230"   "5232"   "5236"   "5313"   "5315"   "55276" 
## [57] "55902"  "57818"  "669"    "7167"   "80201"  "83440"  "84532"  "8789"  
## [65] "92483"  "92579"  "9562"  
## 
## [[2]]
##  [1] "1431"  "1737"  "1738"  "1743"  "2271"  "3417"  "3418"  "3419"  "3420" 
## [10] "3421"  "4190"  "4191"  "47"    "48"    "4967"  "50"    "5091"  "5105" 
## [19] "5106"  "5160"  "5161"  "5162"  "55753" "6389"  "6390"  "6391"  "6392" 
## [28] "8801"  "8802"  "8803" 
## 
## [[3]]
##  [1] "132158" "2203"   "221823" "226"    "229"    "22934"  "230"    "2539"  
##  [9] "25796"  "2821"   "414328" "51071"  "5211"   "5213"   "5214"   "5226"  
## [17] "5236"   "55276"  "5631"   "5634"   "6120"   "64080"  "6888"   "7086"  
## [25] "729020" "8277"   "84076"  "8789"   "9104"   "9563"  
## 
## [[4]]
##  [1] "10327"  "10720"  "10941"  "231"    "27294"  "2990"   "51084"  "51181" 
##  [9] "54490"  "54575"  "54576"  "54577"  "54578"  "54579"  "54600"  "54657" 
## [17] "54658"  "54659"  "55277"  "57016"  "574537" "6120"   "6652"   "729020"
## [25] "729920" "7358"   "7360"   "7363"   "7364"   "7365"   "7366"   "7367"  
## [33] "79799"  "9365"   "9942"  
## 
## [[5]]
##  [1] "197258" "2203"   "226"    "229"    "230"    "231"    "26007"  "2762"  
##  [9] "29925"  "29926"  "3098"   "3099"   "3101"   "3795"   "4351"   "5207"  
## [17] "5208"   "5209"   "5210"   "5211"   "5213"   "5214"   "5372"   "5373"  
## [25] "55556"  "57016"  "57103"  "6652"   "7167"   "7264"   "80201"  "8789"  
## [33] "8790"  
## 
## [[6]]
##  [1] "130589" "231"    "2538"   "2548"   "2582"   "2584"   "2592"   "2595"  
##  [9] "2645"   "2683"   "2717"   "2720"   "3098"   "3099"   "3101"   "3906"  
## [17] "3938"   "5211"   "5213"   "5214"   "5236"   "55276"  "57016"  "57818" 
## [25] "6476"   "7360"   "80201"  "8704"   "8972"   "92579"  "93432"

## check pathway name and pathway2ID name
identical(pathIDs,hsa_pathway$V1)

## [1] TRUE

## add name to the list
brk <- unlist(lapply(gregexpr(" -",hsa_pathway$V2), function(x){
    x[1]
}))
names(PATHWAYIDs_GENEIDs_list) <- substring(hsa_pathway$V2,1,last = brk)
head(PATHWAYIDs_GENEIDs_list)

## $`Glycolysis / Gluconeogenesis `
##  [1] "10327"  "124"    "125"    "126"    "127"    "128"    "130"    "130589"
##  [9] "131"    "160287" "1737"   "1738"   "2023"   "2026"   "2027"   "217"   
## [17] "218"    "219"    "2203"   "221"    "222"    "223"    "224"    "226"   
## [25] "229"    "230"    "2538"   "2597"   "26330"  "2645"   "2821"   "3098"  
## [33] "3099"   "3101"   "387712" "3939"   "3945"   "3948"   "441531" "501"   
## [41] "5105"   "5106"   "5160"   "5161"   "5162"   "5211"   "5213"   "5214"  
## [49] "5223"   "5224"   "5230"   "5232"   "5236"   "5313"   "5315"   "55276" 
## [57] "55902"  "57818"  "669"    "7167"   "80201"  "83440"  "84532"  "8789"  
## [65] "92483"  "92579"  "9562"  
## 
## $`Citrate cycle (TCA cycle) `
##  [1] "1431"  "1737"  "1738"  "1743"  "2271"  "3417"  "3418"  "3419"  "3420" 
## [10] "3421"  "4190"  "4191"  "47"    "48"    "4967"  "50"    "5091"  "5105" 
## [19] "5106"  "5160"  "5161"  "5162"  "55753" "6389"  "6390"  "6391"  "6392" 
## [28] "8801"  "8802"  "8803" 
## 
## $`Pentose phosphate pathway `
##  [1] "132158" "2203"   "221823" "226"    "229"    "22934"  "230"    "2539"  
##  [9] "25796"  "2821"   "414328" "51071"  "5211"   "5213"   "5214"   "5226"  
## [17] "5236"   "55276"  "5631"   "5634"   "6120"   "64080"  "6888"   "7086"  
## [25] "729020" "8277"   "84076"  "8789"   "9104"   "9563"  
## 
## $`Pentose and glucuronate interconversions `
##  [1] "10327"  "10720"  "10941"  "231"    "27294"  "2990"   "51084"  "51181" 
##  [9] "54490"  "54575"  "54576"  "54577"  "54578"  "54579"  "54600"  "54657" 
## [17] "54658"  "54659"  "55277"  "57016"  "574537" "6120"   "6652"   "729020"
## [25] "729920" "7358"   "7360"   "7363"   "7364"   "7365"   "7366"   "7367"  
## [33] "79799"  "9365"   "9942"  
## 
## $`Fructose and mannose metabolism `
##  [1] "197258" "2203"   "226"    "229"    "230"    "231"    "26007"  "2762"  
##  [9] "29925"  "29926"  "3098"   "3099"   "3101"   "3795"   "4351"   "5207"  
## [17] "5208"   "5209"   "5210"   "5211"   "5213"   "5214"   "5372"   "5373"  
## [25] "55556"  "57016"  "57103"  "6652"   "7167"   "7264"   "80201"  "8789"  
## [33] "8790"  
## 
## $`Galactose metabolism `
##  [1] "130589" "231"    "2538"   "2548"   "2582"   "2584"   "2592"   "2595"  
##  [9] "2645"   "2683"   "2717"   "2720"   "3098"   "3099"   "3101"   "3906"  
## [17] "3938"   "5211"   "5213"   "5214"   "5236"   "55276"  "57016"  "57818" 
## [25] "6476"   "7360"   "80201"  "8704"   "8972"   "92579"  "93432"
generate GMT file format for GSEA or specific analysis

we build a function write_gmt that can transform pathway2geneid_list
to GMT format.

# set_ls: gene set in list class
# out: output file name and directory
# desc: the description of gene set as list, sometimes is NA or url if there is no 
# description

write_gmt <- function(set_ls, out, desc=list()){
    filedir <- file(description = out,open = "wt")
    lapply(names(set_ls), function(name){
        descp <- ifelse(is.null(desc[[name]]),"KEGG_website",desc[[name]])
        outline <- paste0(c(name,descp,set_ls[[name]]),collapse = "t")
        writeLines(outline,con = filedir)
    })
    
    close(filedir)
}

write_gmt(PATHWAYIDs_GENEIDs_list,out = "./KEGG_hsa.gmt")
转载请注明:文章转载自 www.wk8.com.cn
本文地址:https://www.wk8.com.cn/it/1039175.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 wk8.com.cn

ICP备案号:晋ICP备2021003244-6号