-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot.py
More file actions
69 lines (53 loc) · 2.02 KB
/
plot.py
File metadata and controls
69 lines (53 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from pathlib import Path
import csv
import matplotlib.pyplot as plt
def create_plot(gProt: dict[str, tuple[float, float]], kegg_pathways: dict[str, list[str]]):
"""Creates a scatter plot. The function searches for kegg findings in gProt.
Args:
gProt (dict[str, tuple[float, float]]): A dict containing all proteins and their values
kegg_pathways (dict[str, list[str]]): A dict containing annotated clusters of proteins
"""
plots: dict[str, tuple[list[float], list[float]]] = {}
for kegg, data in kegg_pathways.items():
xs: list[float] = []
ys: list[float] = []
for protein in data:
x, y = gProt[protein]
xs.append(x)
ys.append(y)
plots[kegg] = (xs, ys)
plt.xlabel("log2 Ratio H/L")
plt.ylabel("log10 (protein abundance)")
for kegg, data in plots.items():
plt.scatter(data[0], data[1], label=kegg)
plt.legend(loc="upper right")
plt.show()
def create_kegg_pathways(read_tsv):
# Read in rows from kegg data
kegg_pathways: dict[str, list[str]] = {}
for row in read_tsv:
proteins = row[-1].split(",")
kegg_pathways[row[0]] = proteins
# Remove table headers from dict
del kegg_pathways["#term ID"]
return kegg_pathways
def create_gprot_dict(dataset2):
# Read in rows from gProt
gProt: dict[str, tuple[float, float]] = {}
with dataset2.open() as file:
# Skip headers of table
next(file)
for line in file:
line = line.strip().split()
name, input_value, property_value = line[1], line[3], line[4]
gProt[name] = (float(input_value), float(property_value))
return gProt
def main():
dataset1 = Path(__file__).parent / "kegg.tsv"
dataset2 = Path(__file__).parent / "log10.txt"
tsv_reader = csv.reader(dataset1.open(), delimiter="\t")
kegg_pathways = create_kegg_pathways(tsv_reader)
gProt = create_gprot_dict(dataset2)
create_plot(gProt, kegg_pathways)
if __name__ == "__main__":
main()