Post

Protein-protein interaction network에서 shortest path distance 구하기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
string_data = read.table("STRING_links_GeneName.txt",sep='\t', header = TRUE)
megena_dir = "/sc/arion/projects/scMultiscale/chos14/Project01_sysFlu/Trial02/04_MEGENA"
megena_data <- read.table(paste0(megena_dir,"/SuperCell_g",g,"/MEGENA_Network_g",g,".txt"), header = TRUE)

# Create a graph from the STRING data
string_graph <- graph_from_data_frame(string_data, directed = FALSE)

# Create a map of gene pairs to edge IDs
# tic()
# edge_map <- setNames(E(string_graph), apply(get.edgelist(string_graph), 1, paste, collapse = "--"))
# toc() #total: 171.267 sec elapsed
# saveRDS(edge_map,"edge_map.rds")
edge_map <- readRDS("edge_map.rds")

# Iterate over the rows in the MEGENA data
tic("total")
for (i in 1:nrow(megena_data)) {
    source_gene <- megena_data$row[i]
    target_gene <- megena_data$col[i]
    pair <- paste(source_gene, target_gene, sep = "--")

    # Check if the gene pair exists in the edge_map
    if (pair %in% names(edge_map)) {
        tic("edge correction"); print(i)
        E(string_graph)[edge_map[pair]]$weight <- 1 - megena_data$weight[i]
        toc()
    }}
toc()
saveRDS(string_graph,paste0("string_graph_g",g,".rds"))

g=2
string_graph <- readRDS(paste0("string_graph_g",g,".rds"))

# Ensure all edges have valid weights
E(string_graph)$weight[is.na(E(string_graph)$weight)] <- Inf # Assign a large number to missing weights
# # Calculate shortest path distances for all pairs
# dist_matrix <- distances(string_graph, v = V(string_graph), weights = E(string_graph)$weight, algorithm="bellman-ford")

# Initialize a vector to store the shortest distances
shortest_distances <- numeric(nrow(megena_data))

# Iterate over the rows in the MEGENA data and calculate shortest path distances
tic()
for (i in 1:nrow(megena_data)) {
    source_gene <- megena_data$row[i]
    target_gene <- megena_data$col[i]
    
    # Check if both genes are in the graph
    if (source_gene %in% V(string_graph)$name && target_gene %in% V(string_graph)$name) {
        # Calculate shortest path distance between the pair of genes
        shortest_distances[i] <- distances(string_graph, v = source_gene, to = target_gene, weights = E(string_graph)$weight, algorithm = "bellman-ford")
    } else {
        # Assign NA if the pair is not in the graph
        shortest_distances[i] <- NA
    }
}
toc()

# Add the shortest distances to the MEGENA data frame
megena_data$shortest_path_distance <- shortest_distances

write.table(megena_data,paste0("MEGENAdata_g",g,".txt"),row.names=FALSE,sep='\t', quote=FALSE)
This post is licensed under CC BY 4.0 by the author.

© Subin Cho. Some rights reserved.

Using the Chirpy theme for Jekyll.