Skip to content

Commit

Permalink
Added tojson writer and from_dict and from_json readers. (#57)
Browse files Browse the repository at this point in the history
* Added to/from json and dict interfaces along with tests.

* fix spelling

* No longer testing thicket compatibility

* black reformatting

Co-authored-by: Stephanie Brink <[email protected]>
  • Loading branch information
cscully-allison and slabasan authored Sep 12, 2022
1 parent c82e153 commit aaead67
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 0 deletions.
38 changes: 38 additions & 0 deletions hatchet/graphframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas as pd
import numpy as np
import multiprocess as mp
import json

from .node import Node
from .graph import Graph
Expand Down Expand Up @@ -300,6 +301,12 @@ def from_lists(*lists):
gf.update_inclusive_columns()
return gf

@staticmethod
def from_json(json_spec, **kwargs):
from .readers.json_reader import JsonReader

return JsonReader(json_spec).read(**kwargs)

@staticmethod
def from_hdf(filename, **kwargs):
# import this lazily to avoid circular dependencies
Expand Down Expand Up @@ -1084,6 +1091,37 @@ def add_nodes(hnode):

return graph_literal

def to_dict(self):
hatchet_dict = {}

"""
Nodes: {hatchet_nid: {node data, children:[by-id]}}
"""
graphs = []
for root in self.graph.roots:
formatted_graph_dict = {}
for n in root.traverse():
formatted_graph_dict[n._hatchet_nid] = {
"data": n.frame.attrs,
"children": [c._hatchet_nid for c in n.children],
}
graphs.append(formatted_graph_dict)

hatchet_dict["graph"] = graphs

hatchet_dict["dataframe_indices"] = list(self.dataframe.index.names)
ef = self.dataframe.reset_index()
ef["node"] = ef["node"].apply(lambda n: n._hatchet_nid)
hatchet_dict["dataframe"] = ef.replace({np.nan: None}).to_dict("records")

hatchet_dict["inclusive_metrics"] = self.inc_metrics
hatchet_dict["exclusive_metrics"] = self.exc_metrics

return hatchet_dict

def to_json(self):
return json.dumps(self.to_dict())

def _operator(self, other, op):
"""Generic function to apply operator to two dataframes and store
result in self.
Expand Down
63 changes: 63 additions & 0 deletions hatchet/readers/json_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other
# Hatchet Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

import json

import pandas as pd

import hatchet.graphframe
from hatchet.node import Node
from hatchet.graph import Graph
from hatchet.frame import Frame


class JsonReader:
"""Create a GraphFrame from a json string of the following format.
Return:
(GraphFrame): graphframe containing data from dictionaries
"""

def __init__(self, json_spec):
"""Read from a json string specification of a graphframe
json (string): Json specification of a graphframe.
"""
self.spec_dict = json.loads(json_spec)

def read(self):
roots = []
for graph_spec in self.spec_dict["graph"]:
# turn frames into nodes
for nid, value in graph_spec.items():
graph_spec[nid]["data"] = Node(Frame(value["data"]), hnid=int(nid))

# connect nodes
for nid, value in graph_spec.items():
for child in value["children"]:
child = str(child)
value["data"].add_child(graph_spec[child]["data"])
graph_spec[child]["data"].add_parent(value["data"])

for nid, value in graph_spec.items():
if len(value["data"].parents) == 0:
roots.append(value["data"])

grph = Graph(roots)

# make the dataframes
dataframe = pd.DataFrame(self.spec_dict["dataframe"])
for graph_spec in self.spec_dict["graph"]:
dataframe["node"] = dataframe["node"].map(
lambda n: graph_spec[str(n)]["data"] if (str(n) in graph_spec) else n
)
dataframe.set_index(self.spec_dict["dataframe_indices"], inplace=True)

return hatchet.graphframe.GraphFrame(
grph,
dataframe,
self.spec_dict["exclusive_metrics"],
self.spec_dict["inclusive_metrics"],
)
11 changes: 11 additions & 0 deletions hatchet/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,3 +1089,14 @@ def sw4_caliper_cuda_activity_profile_summary_cali(data_dir, tmpdir):
tmpfile = os.path.join(str(tmpdir), "caliper_cuda_activity_profile_summary_v2.cali")

return tmpfile


@pytest.fixture
def json_graphframe_specification(data_dir, tmpdir):
json_dir = os.path.join(data_dir, "json")
json_file = os.path.join(json_dir, "hatchet-graph-literal.json")

shutil.copy(json_file, str(tmpdir))
tmpfile = os.path.join(str(tmpdir), "hatchet-graph-literal.json")

return tmpfile
1 change: 1 addition & 0 deletions hatchet/tests/data/json/hatchet-graph-literal.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"graph":[{"0":{"data":{"name":"foo","type":"function"},"children":[1,4,12]},"1":{"data":{"name":"bar","type":"None"},"children":[2,3]},"2":{"data":{"name":"baz","type":"function"},"children":[]},"3":{"data":{"name":"grault","type":"None"},"children":[]},"4":{"data":{"name":"qux","type":"function"},"children":[5]},"5":{"data":{"name":"quux","type":"None"},"children":[6]},"6":{"data":{"name":"corge","type":"function"},"children":[7,11,10]},"7":{"data":{"name":"bar","type":"None"},"children":[8,9]},"8":{"data":{"name":"baz","type":"function"},"children":[]},"9":{"data":{"name":"grault","type":"None"},"children":[]},"10":{"data":{"name":"garply","type":"function"},"children":[]},"11":{"data":{"name":"grault","type":"None"},"children":[]},"12":{"data":{"name":"waldo","type":"function"},"children":[13,19]},"13":{"data":{"name":"fred","type":"function"},"children":[14,15]},"14":{"data":{"name":"plugh","type":"function"},"children":[]},"15":{"data":{"name":"xyzzy","type":"function"},"children":[16]},"16":{"data":{"name":"thud","type":"function"},"children":[17,18]},"17":{"data":{"name":"baz","type":"function"},"children":[]},"18":{"data":{"name":"garply","type":"function"},"children":[]},"19":{"data":{"name":"garply","type":"function"},"children":[]}},{"20":{"data":{"name":"waldo","type":"function"},"children":[21]},"21":{"data":{"name":"bar","type":"None"},"children":[22,23]},"22":{"data":{"name":"baz","type":"function"},"children":[]},"23":{"data":{"name":"grault","type":"None"},"children":[]}}],"dataframe_indices":["node"],"dataframe":[{"node":0,"name":"foo","time (inc)":135,"time":0},{"node":1,"name":"bar","time (inc)":20,"time":5},{"node":2,"name":"baz","time (inc)":5,"time":5},{"node":3,"name":"grault","time (inc)":10,"time":10},{"node":4,"name":"qux","time (inc)":60,"time":0},{"node":5,"name":"quux","time (inc)":60,"time":5},{"node":6,"name":"corge","time (inc)":55,"time":10},{"node":7,"name":"bar","time (inc)":20,"time":5},{"node":8,"name":"baz","time (inc)":5,"time":5},{"node":9,"name":"grault","time (inc)":10,"time":10},{"node":10,"name":"garply","time (inc)":15,"time":15},{"node":11,"name":"grault","time (inc)":10,"time":10},{"node":12,"name":"waldo","time (inc)":55,"time":0},{"node":13,"name":"fred","time (inc)":40,"time":5},{"node":14,"name":"plugh","time (inc)":5,"time":5},{"node":15,"name":"xyzzy","time (inc)":30,"time":5},{"node":16,"name":"thud","time (inc)":25,"time":5},{"node":17,"name":"baz","time (inc)":5,"time":5},{"node":18,"name":"garply","time (inc)":15,"time":15},{"node":19,"name":"garply","time (inc)":15,"time":15},{"node":20,"name":"waldo","time (inc)":30,"time":10},{"node":21,"name":"bar","time (inc)":20,"time":5},{"node":22,"name":"baz","time (inc)":5,"time":5},{"node":23,"name":"grault","time (inc)":10,"time":10}],"inclusive_metrics":["time (inc)"],"exclusive_metrics":["time"]}
29 changes: 29 additions & 0 deletions hatchet/tests/json_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other
# Hatchet Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: MIT

from hatchet import GraphFrame


def test_read_json(json_graphframe_specification):
jgs = ""
with open(json_graphframe_specification, "r") as f:
jgs = f.read()
gf = GraphFrame.from_json(jgs)

assert len(gf.dataframe) == 24
assert len(gf.graph) == 24
assert gf.graph.roots[0].frame["name"] == "foo"


def test_write_json(json_graphframe_specification):
jgs = ""
with open(json_graphframe_specification, "r") as f:
jgs = f.read()
gf = GraphFrame.from_json(jgs)
json_out = gf.to_json()

assert "".join(sorted("".join(sorted(jgs.split())))) == "".join(
sorted("".join(json_out.split()))
)

0 comments on commit aaead67

Please sign in to comment.