Skip to content

Commit

Permalink
Add edges pointing from higher ATC levels to lower ATC levels (e.g. A…
Browse files Browse the repository at this point in the history
…TC2 --> ATC3) (#10)
  • Loading branch information
DilumAluthge authored Mar 2, 2021
1 parent a080f14 commit 4675522
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 133 deletions.
1 change: 1 addition & 0 deletions src/PharmaceuticalClassification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const outneighbors = MetaGraphs.outneighbors

include("types.jl")

include("atc_fix.jl")
include("build.jl")
include("countlines.jl")
include("edge.jl")
Expand Down
74 changes: 74 additions & 0 deletions src/atc_fix.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
function _get_all_atc_nodes(graph::PharmGraph)
all_atc_nodes_set = Vector{PharmClass}(undef, 0)

function f(class)
system = class.system::String
if startswith(system, "ATC")
push!(all_atc_nodes_set, class)
end
return nothing
end

_for_each_node(f, graph)

all_atc_nodes_vector = collect(all_atc_nodes_set)
unique!(all_atc_nodes_vector)
sort!(all_atc_nodes_vector)
return all_atc_nodes_vector
end

function add_atc_hierarchy_edges!(graph::PharmGraph)
all_atc_levels = _get_all_atc_nodes(graph)

all_atc1 = Vector{PharmClass}(undef, 0)
all_atc2 = Vector{PharmClass}(undef, 0)
all_atc3 = Vector{PharmClass}(undef, 0)
all_atc4 = Vector{PharmClass}(undef, 0)
all_atc5 = Vector{PharmClass}(undef, 0)

for x in all_atc_levels
system = x.system
if system == "ATC1"
push!(all_atc1, x)
elseif system == "ATC2"
push!(all_atc2, x)
elseif system == "ATC3"
push!(all_atc3, x)
elseif system == "ATC4"
push!(all_atc4, x)
elseif system == "ATC5"
push!(all_atc5, x)
end
end

unique!(all_atc1)
unique!(all_atc2)
unique!(all_atc3)
unique!(all_atc4)
unique!(all_atc5)

sort!(all_atc1)
sort!(all_atc2)
sort!(all_atc3)
sort!(all_atc4)
sort!(all_atc5)

atc = [all_atc1, all_atc2, all_atc3, all_atc4, all_atc5]

for i in 1:4
atc_i = atc[i]
atc_iplusone = atc[i + 1]
for less_specific_node in atc_i
for more_specific_node in atc_iplusone
if startswith(more_specific_node.value, less_specific_node.value)
PharmaceuticalClassification._add_edge!(
graph,
less_specific_node => more_specific_node,
)
end
end
end
end

return nothing
end
29 changes: 22 additions & 7 deletions src/build.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import MetaGraphs
import ProgressMeter

@inline function new_graph(::Type{I}, ::Type{F}) where I where F
g = MetaGraphs.MetaDiGraph{I, F}()
MetaGraphs.set_indexing_prop!(g, :class)
Expand All @@ -25,12 +22,17 @@ end
all_numlines = countlines_filename.(all_filenames)
total_numlines = sum(all_numlines)
showprogress::Bool = config.showprogress
wait_time::Float64 = showprogress ? Float64(1.0) : Float64(Inf)
p = ProgressMeter.Progress(total_numlines + 1, wait_time)
wait_time = showprogress ? Float64(1.0) : Float64(Inf)
p = ProgressMeter.Progress(total_numlines + 10, wait_time)

open(config.rxnsat, "r") do io
for line in eachline(io)
ProgressMeter.next!(p; showvalues = [(:Stage, "1 of 2 (\"RXNSAT.RRF\")")])
ProgressMeter.next!(p;
showvalues = [
(:Stage, "1 of 3"),
(:File, "RXNSAT.RRF"),
],
)
elements = split(line, "|")
left_system = "RXCUI"
left_value = elements[1]
Expand Down Expand Up @@ -63,9 +65,22 @@ end
end
end

ProgressMeter.next!(p;
showvalues = [
(:Stage, "2 of 3"),
(:Description, "Add edges for ATC hierarchy"),
],
)
add_atc_hierarchy_edges!(graph)

open(config.rxnrel, "r") do io
for line in eachline(io)
ProgressMeter.next!(p; showvalues = [(:Stage, "2 of 2 (\"RXNREL.RRF\")")])
ProgressMeter.next!(p;
showvalues = [
(:Stage, "3 of 3"),
(:File, "RXNREL.RRF"),
],
)
elements = split(line, "|")
if elements[11] == "RXNORM" && elements[3] == "CUI" && elements[7] == "CUI"
relationship = elements[8]
Expand Down
2 changes: 0 additions & 2 deletions src/iterate.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import MetaGraphs

@inline function Base.iterate(graph::PharmGraph)
iteration = Base.iterate(MetaGraphs.vertices(graph.g))
vertex_integer, next_state = iteration
Expand Down
28 changes: 12 additions & 16 deletions src/system.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
import MetaGraphs
import ProgressMeter
@inline function available_systems(graph::PharmGraph)
all_systems_set = Set{String}()

@inline function available_systems(graph::PharmGraph;
showprogress::Bool = true)
all_systems = Set{String}()
num_vertices = length(MetaGraphs.vertices(graph.g))
wait_time::Float64 = showprogress ? Float64(1.0) : Float64(Inf)
p = ProgressMeter.Progress(num_vertices, wait_time)
for vertex_integer in MetaGraphs.vertices(graph.g)
ProgressMeter.next!(p)
class = _vertex_integer_to_class(graph, vertex_integer)::PharmClass
function f(class)
system = class.system::String
push!(all_systems, system)
push!(all_systems_set, system)
return nothing
end
result = collect(all_systems)
unique!(result)
sort!(result)
return result

_for_each_node(f, graph)

all_systems_vector = collect(all_systems_set)
unique!(all_systems_vector)
sort!(all_systems_vector)
return all_systems_vector
end

@inline function system_matches(class::PharmClass, system::String)
Expand Down
9 changes: 7 additions & 2 deletions src/traversal.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import LightGraphs
import MetaGraphs
@inline function _for_each_node(f::F, graph::PharmGraph) where {F}
for vertex_integer in MetaGraphs.vertices(graph.g)
class = _vertex_integer_to_class(graph, vertex_integer)::PharmClass
f(class)
end
return nothing
end

@inline function MetaGraphs.outneighbors(graph::PharmGraph,
class::PharmClass;
Expand Down
2 changes: 0 additions & 2 deletions src/types.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import MetaGraphs

struct PharmClass
system::String
value::String
Expand Down
2 changes: 0 additions & 2 deletions src/vertex.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import MetaGraphs

@inline function Base.haskey(graph::PharmGraph,
class::PharmClass;
normalization::Bool = true)
Expand Down
67 changes: 67 additions & 0 deletions test/integration.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
@testset "Integration tests" begin
@testset "Integration tests with synthetic data" begin
in_temporary_directory() do
rxnrel = "RXNREL.RRF"
rxnsat = "RXNSAT.RRF"

open(rxnrel, "w") do io
generate_synthetic_rxnrel(io)
end
open(rxnsat, "w") do io
generate_synthetic_rxnsat(io)
end

graph_built = build_graph(;
showprogress = false,
rxnrel,
rxnsat,
)
my_filename_for_serialization = "my_filename.serialized"
rm(my_filename_for_serialization; force = true, recursive = true)
Serialization.serialize(my_filename_for_serialization, graph_built)
graph_loaded_from_file = Serialization.deserialize(my_filename_for_serialization)
for graph in [graph_built, graph_loaded_from_file]
@test graph isa PharmGraph
@test available_systems(graph) == ["ATC1", "ATC2", "ATC3", "ATC4", "ATC5", "NDC", "RXCUI"]
@test haskey(graph, PharmClass("ATC5", "A01BC23"); normalization = true)
@test haskey(graph, PharmClass("ATC5", "A01BC23"); normalization = false)
@test graph[PharmClass("ATC5", "A01BC23")] == PharmClass("ATC5", "A01BC23")
@test getindex(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass("ATC5", "A01BC23")
@test getindex(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass("ATC5", "A01BC23")
@test equivalent(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23")]
@test equivalent(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23")]
@test equivalent(graph, PharmClass("NDC", "12345678901")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test equivalent(graph, PharmClass("RXCUI", "1234567")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test parents(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC1", "A"), PharmClass("ATC2", "A01"), PharmClass("ATC3", "A01B"), PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23")]
@test parents(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC1", "A"), PharmClass("ATC2", "A01"), PharmClass("ATC3", "A01B"), PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23")]
@test parents(graph, PharmClass("NDC", "12345678901")) == PharmClass[PharmClass("ATC1", "A"), PharmClass("ATC2", "A01"), PharmClass("ATC3", "A01B"), PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test parents(graph, PharmClass("RXCUI", "1234567")) == PharmClass[PharmClass("ATC1", "A"), PharmClass("ATC2", "A01"), PharmClass("ATC3", "A01B"), PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("NDC", "12345678901")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("RXCUI", "1234567")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test all_neighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test all_neighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test inneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23")]
@test inneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC4", "A01BC"), PharmClass("ATC5", "A01BC23")]
@test outneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test outneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
a = sort(unique(all_neighbors(graph, PharmClass("ATC5", "A01BC23"))))
b = sort(unique(inneighbors(graph, PharmClass("ATC5", "A01BC23"))))
c = sort(unique(outneighbors(graph, PharmClass("ATC5", "A01BC23"))))
@test a == sort(unique(vcat(b, c)))
@test !issubset(a, b)
@test issubset(b, a)
@test issubset(c, a)
@test !issubset(c, b)
@test length(graph) == 9
count = 0
for node in graph
count += 1
end
@test count == length(graph)
@test eltype(graph) == PharmClass
end
end
end
end
95 changes: 4 additions & 91 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,97 +3,10 @@ using Test

import Serialization

include("generate-synthetic-data.jl")
include("test_utils/generate-synthetic-data.jl")
include("test_utils/utils.jl")

@testset "PharmaceuticalClassification.jl" begin
@testset "Unit tests" begin
@testset "iterate.jl" begin
@test Base.IteratorSize(PharmGraph) == Base.HasLength()
@test Base.IteratorEltype(PharmGraph) == Base.HasEltype()
end

@testset "system.jl" begin
@test system_matches(PharmClass("foo", "1"), "foo")
@test system_matches("foo")(PharmClass("foo", "1"))
@test !system_matches(PharmClass("foo", ""), "bar")
@test !system_matches("bar")(PharmClass("foo", ""))
@test system_matches(PharmClass("foo", "1"), r"foo")
@test system_matches(r"foo")(PharmClass("foo", "1"))
@test !system_matches(PharmClass("foo", "1"), r"bar")
@test !system_matches(r"bar")(PharmClass("foo", "1"))
end

@testset "types.jl" begin
@test Base.isless(PharmClass("bar", "1"), PharmClass("foo", "1"))
@test Base.isless(PharmClass("bar", "1"), PharmClass("bar", "2"))
end
end

@testset "Integration tests" begin
@testset "Integration tests with synthetic data" begin
in_temporary_directory() do
rxnrel = "RXNREL.RRF"
rxnsat = "RXNSAT.RRF"

open(rxnrel, "w") do io
generate_synthetic_rxnrel(io)
end
open(rxnsat, "w") do io
generate_synthetic_rxnsat(io)
end

graph_built = build_graph(;
showprogress = false,
rxnrel,
rxnsat,
)
my_filename_for_serialization = "my_filename.serialized"
rm(my_filename_for_serialization; force = true, recursive = true)
Serialization.serialize(my_filename_for_serialization, graph_built)
graph_loaded_from_file = Serialization.deserialize(my_filename_for_serialization)
for graph in [graph_built, graph_loaded_from_file]
@test graph isa PharmGraph
@test available_systems(graph; showprogress = false) == ["ATC5", "NDC", "RXCUI"]
@test haskey(graph, PharmClass("ATC5", "A01BC23"); normalization = true)
@test haskey(graph, PharmClass("ATC5", "A01BC23"); normalization = false)
@test graph[PharmClass("ATC5", "A01BC23")] == PharmClass("ATC5", "A01BC23")
@test getindex(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass("ATC5", "A01BC23")
@test getindex(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass("ATC5", "A01BC23")
@test equivalent(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23")]
@test equivalent(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23")]
@test equivalent(graph, PharmClass("NDC", "12345678901")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test equivalent(graph, PharmClass("RXCUI", "1234567")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test parents(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23")]
@test parents(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23")]
@test parents(graph, PharmClass("NDC", "12345678901")) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test parents(graph, PharmClass("RXCUI", "1234567")) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23"), PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("NDC", "12345678901")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test children(graph, PharmClass("RXCUI", "1234567")) == [PharmClass("NDC", "12345678901"), PharmClass("RXCUI", "1234567")]
@test all_neighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == [PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test all_neighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == [PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test inneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC5", "A01BC23")]
@test inneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC5", "A01BC23")]
@test outneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = true) == PharmClass[PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
@test outneighbors(graph, PharmClass("ATC5", "A01BC23"); normalization = false) == PharmClass[PharmClass("ATC5", "A01BC23"), PharmClass("RXCUI", "1234567")]
a = sort(unique(all_neighbors(graph, PharmClass("ATC5", "A01BC23"))))
b = sort(unique(inneighbors(graph, PharmClass("ATC5", "A01BC23"))))
c = sort(unique(outneighbors(graph, PharmClass("ATC5", "A01BC23"))))
@test a == sort(unique(vcat(b, c)))
@test !issubset(a, b)
@test issubset(b, a)
@test issubset(c, a)
@test !issubset(c, b)
@test length(graph) == 5
count = 0
for node in graph
count += 1
end
@test count == length(graph)
@test eltype(graph) == PharmClass
end
end
end
end
include("unit.jl")
include("integration.jl")
end
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
@inline function in_temporary_directory(f::Function)::Nothing
original_directory = pwd()
temporary_directory = mktempdir(; cleanup = true)
atexit(() -> rm(temporary_directory; force = true, recursive = true))
cd(temporary_directory)
f()
cd(original_directory)
rm(temporary_directory; force = true, recursive = true)
return nothing
end

@inline function generate_synthetic_rxnrel(io::IO)::Nothing
lines = String[
"123:456||CUI||789||CUI|has_ingredient|||RXNORM||||||",
Expand All @@ -26,6 +15,10 @@ end
lines = String[
"1234567||||||||NDC||12345678901|||",
"1234567|||||A01BC23|||ATC_LEVEL|ATC|5|||",
"1234567|||||A01BC|||ATC_LEVEL|ATC|4|||",
"1234567|||||A01B|||ATC_LEVEL|ATC|3|||",
"1234567|||||A01|||ATC_LEVEL|ATC|2|||",
"1234567|||||A|||ATC_LEVEL|ATC|1|||",
"1234567||||||||FAKE_ENTRY|||||",
]
for line in lines
Expand Down
Loading

0 comments on commit 4675522

Please sign in to comment.