Skip to content

Commit

Permalink
tested on various methods
Browse files Browse the repository at this point in the history
ctrlaltaf committed Dec 2, 2024
1 parent 3998107 commit 4e42295
Showing 3 changed files with 18 additions and 20 deletions.
17 changes: 10 additions & 7 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -389,22 +389,21 @@ rule evaluation:
ensemble_file=lambda wildcards: f"{out_dir}{SEP}{get_dataset_label(wildcards)}-ml{SEP}ensemble-pathway.txt",
pca_coordinates_file =lambda wildcards: f"{out_dir}{SEP}{get_dataset_label(wildcards)}-ml{SEP}pca-coordinates.txt"
output:
pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-per-pathway.txt"]),
pr_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway.png']),
pr_node_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-per-pathway.txt"]),
pr_node_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway.png']),
pr_edge_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-per-pathway_edge.txt"]),
pr_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway_edge.png']),
pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-curve-ensemble-nodes.png']),
pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-pca-chosen-pathway.txt"]),
run:
node_table = Evaluation.from_file(input.gold_standard_file).node_table
edge_table = Evaluation.from_file(input.gold_standard_file).edge_table
Evaluation.precision_and_recall(input.pathways, node_table, algorithms, output.pr_file, output.pr_png)
Evaluation.precision_and_recall_node(input.pathways, node_table, algorithms, output.pr_node_file, output.pr_node_png)
Evaluation.precision_and_recall_edge(input.pathways, edge_table, algorithms, output.pr_edge_file, output.pr_edge_png)
node_ensemble = Evaluation.edge_frequency_node_ensemble(input.ensemble_file)
Evaluation.precision_recall_curve_node_ensemble(node_ensemble, node_table, output.pr_curve_png)
pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, out_dir)
Evaluation.precision_and_recall(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file)
# Evaluation.precision_and_recall_edge(pca_chosen_pathway, edge_table, algorithms, output.pca_chosen_pr_file)
Evaluation.precision_and_recall_node(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file)


# Returns all pathways for a specific algorithm and dataset
@@ -431,9 +430,13 @@ rule evaluation_per_algo_pathways:
output:
pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "{algorithm}-precision-recall-per-pathway.txt"]),
pr_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', '{algorithm}-precision-recall-per-pathway.png']),
pr_edge_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "{algorithm}-precision-recall-per-pathway_edge.txt"]),
pr_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', '{algorithm}-precision-recall-per-pathway_edge.png']),
run:
node_table = Evaluation.from_file(input.gold_standard_file).node_table
Evaluation.precision_and_recall(input.pathways, node_table, algorithms, output.pr_file, output.pr_png)
Evaluation.precision_and_recall_node(input.pathways, node_table, algorithms, output.pr_file, output.pr_png)
edge_table = Evaluation.from_file(input.gold_standard_file).edge_table
Evaluation.precision_and_recall_edge(input.pathways, edge_table, algorithms, output.pr_edge_file, output.pr_edge_png)

rule evaluation_per_algo_ensemble_pr_curve:
input:
@@ -455,7 +458,7 @@ rule evaluation_per_algo_pca_chosen:
run:
node_table = Evaluation.from_file(input.gold_standard_file).node_table
pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, out_dir)
Evaluation.precision_and_recall(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file)
Evaluation.precision_and_recall_node(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file)

# Remove the output directory
rule clean:
10 changes: 5 additions & 5 deletions config/synthetic.yaml
Original file line number Diff line number Diff line change
@@ -45,7 +45,7 @@ container_registry:
algorithms:
- name: "pathlinker"
params:
include: false
include: true
run1:
k: range(100,201,100)

@@ -69,26 +69,26 @@ algorithms:

- name: "meo"
params:
include: false
include: true
run1:
max_path_length: [3]
local_search: ["Yes"]
rand_restarts: [10]

- name: "mincostflow"
params:
include: false
include: true
run1:
flow: [1] # The flow must be an int
capacity: [1]

- name: "allpairs"
params:
include: false
include: true

- name: "domino"
params:
include: false
include: true
run1:
slice_threshold: [0.3]
module_threshold: [0.05]
11 changes: 3 additions & 8 deletions spras/evaluation.py
Original file line number Diff line number Diff line change
@@ -84,11 +84,11 @@ def load_files_from_dict(self, gold_standard_dict: Dict):
@staticmethod
def precision_and_recall_edge(file_paths: Iterable[Path], edge_table: pd.DataFrame, algorithms: list, output_file: str, output_png:str=None):
"""
Takes in file paths for a specific dataset and an associated gold standard node table.
Takes in file paths for a specific dataset and an associated gold standard edge table.
Calculates precision and recall for each pathway file
Returns output back to output_file
@param file_paths: file paths of pathway reconstruction algorithm outputs
@param node_table: the gold standard nodes
@param edge_table: the gold standard edges
@param algorithms: list of algorithms used in current run of SPRAS
@param output_file: the filename to save the precision and recall of each pathway
@param output_png (optional): the filename to plot the precision and recall of each pathway (not a PRC)
@@ -99,17 +99,12 @@ def precision_and_recall_edge(file_paths: Iterable[Path], edge_table: pd.DataFra
results = []
for file in file_paths:
df = pd.read_table(file, sep="\t", header=0, usecols=["Node1", "Node2"])
print(file)
print(df)
y_pred = set()
for row in df.itertuples():
y_pred.add((row[1], row[2]))
all_edges = set(gs_edges.union(y_pred))
y_true_binary = [1 if (edge[0], edge[1]) in gs_edges or (edge[1], edge[0]) in gs_edges else 0 for edge in all_edges]
y_pred_binary = [1 if (edge[0], edge[1]) in y_pred or (edge[1], edge[0]) in y_pred else 0 for edge in all_edges]

# # default to 0.0 if there is a divide by 0 error
# # not using precision_recall_curve because thresholds are binary (0 or 1); rather we are directly calculating precision and recall per pathway
precision = precision_score(y_true_binary, y_pred_binary, zero_division=0.0)
recall = recall_score(y_true_binary, y_pred_binary, zero_division=0.0)
results.append({"Pathway": file, "Precision": precision, "Recall": recall})
@@ -152,7 +147,7 @@ def precision_and_recall_edge(file_paths: Iterable[Path], edge_table: pd.DataFra
plt.savefig(output_png)

@staticmethod
def precision_and_recall(file_paths: Iterable[Path], node_table: pd.DataFrame, algorithms: list, output_file: str, output_png:str=None):
def precision_and_recall_node(file_paths: Iterable[Path], node_table: pd.DataFrame, algorithms: list, output_file: str, output_png:str=None):
"""
Takes in file paths for a specific dataset and an associated gold standard node table.
Calculates precision and recall for each pathway file

0 comments on commit 4e42295

Please sign in to comment.