Added tojson writer and from_dict and from_json readers. (#57)

* Added to/from json and dict interfaces along with tests. * fix spelling * No longer testing thicket compatibility * black reformatting Co-authored-by: Stephanie Brink <[email protected]>
LLNL · Sep 12, 2022 · aaead67 · aaead67
1 parent c82e153
commit aaead67
Show file tree

Hide file tree

Showing 5 changed files with 142 additions and 0 deletions.
diff --git a/hatchet/graphframe.py b/hatchet/graphframe.py
@@ -12,6 +12,7 @@
 import pandas as pd
 import numpy as np
 import multiprocess as mp
+import json
 
 from .node import Node
 from .graph import Graph
@@ -300,6 +301,12 @@ def from_lists(*lists):
         gf.update_inclusive_columns()
         return gf
 
+    @staticmethod
+    def from_json(json_spec, **kwargs):
+        from .readers.json_reader import JsonReader
+
+        return JsonReader(json_spec).read(**kwargs)
+
     @staticmethod
     def from_hdf(filename, **kwargs):
         # import this lazily to avoid circular dependencies
@@ -1084,6 +1091,37 @@ def add_nodes(hnode):
 
         return graph_literal
 
+    def to_dict(self):
+        hatchet_dict = {}
+
+        """
+        Nodes: {hatchet_nid: {node data, children:[by-id]}}
+        """
+        graphs = []
+        for root in self.graph.roots:
+            formatted_graph_dict = {}
+            for n in root.traverse():
+                formatted_graph_dict[n._hatchet_nid] = {
+                    "data": n.frame.attrs,
+                    "children": [c._hatchet_nid for c in n.children],
+                }
+            graphs.append(formatted_graph_dict)
+
+        hatchet_dict["graph"] = graphs
+
+        hatchet_dict["dataframe_indices"] = list(self.dataframe.index.names)
+        ef = self.dataframe.reset_index()
+        ef["node"] = ef["node"].apply(lambda n: n._hatchet_nid)
+        hatchet_dict["dataframe"] = ef.replace({np.nan: None}).to_dict("records")
+
+        hatchet_dict["inclusive_metrics"] = self.inc_metrics
+        hatchet_dict["exclusive_metrics"] = self.exc_metrics
+
+        return hatchet_dict
+
+    def to_json(self):
+        return json.dumps(self.to_dict())
+
     def _operator(self, other, op):
         """Generic function to apply operator to two dataframes and store
         result in self.

diff --git a/hatchet/readers/json_reader.py b/hatchet/readers/json_reader.py
@@ -0,0 +1,63 @@
+# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other
+# Hatchet Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+import json
+
+import pandas as pd
+
+import hatchet.graphframe
+from hatchet.node import Node
+from hatchet.graph import Graph
+from hatchet.frame import Frame
+
+
+class JsonReader:
+    """Create a GraphFrame from a json string of the following format.
+
+    Return:
+        (GraphFrame): graphframe containing data from dictionaries
+    """
+
+    def __init__(self, json_spec):
+        """Read from a json string specification of a graphframe
+
+        json (string): Json specification of a graphframe.
+        """
+        self.spec_dict = json.loads(json_spec)
+
+    def read(self):
+        roots = []
+        for graph_spec in self.spec_dict["graph"]:
+            # turn frames into nodes
+            for nid, value in graph_spec.items():
+                graph_spec[nid]["data"] = Node(Frame(value["data"]), hnid=int(nid))
+
+            # connect nodes
+            for nid, value in graph_spec.items():
+                for child in value["children"]:
+                    child = str(child)
+                    value["data"].add_child(graph_spec[child]["data"])
+                    graph_spec[child]["data"].add_parent(value["data"])
+
+            for nid, value in graph_spec.items():
+                if len(value["data"].parents) == 0:
+                    roots.append(value["data"])
+
+        grph = Graph(roots)
+
+        # make the dataframes
+        dataframe = pd.DataFrame(self.spec_dict["dataframe"])
+        for graph_spec in self.spec_dict["graph"]:
+            dataframe["node"] = dataframe["node"].map(
+                lambda n: graph_spec[str(n)]["data"] if (str(n) in graph_spec) else n
+            )
+        dataframe.set_index(self.spec_dict["dataframe_indices"], inplace=True)
+
+        return hatchet.graphframe.GraphFrame(
+            grph,
+            dataframe,
+            self.spec_dict["exclusive_metrics"],
+            self.spec_dict["inclusive_metrics"],
+        )
diff --git a/hatchet/tests/conftest.py b/hatchet/tests/conftest.py
@@ -1089,3 +1089,14 @@ def sw4_caliper_cuda_activity_profile_summary_cali(data_dir, tmpdir):
     tmpfile = os.path.join(str(tmpdir), "caliper_cuda_activity_profile_summary_v2.cali")
 
     return tmpfile
+
+
+@pytest.fixture
+def json_graphframe_specification(data_dir, tmpdir):
+    json_dir = os.path.join(data_dir, "json")
+    json_file = os.path.join(json_dir, "hatchet-graph-literal.json")
+
+    shutil.copy(json_file, str(tmpdir))
+    tmpfile = os.path.join(str(tmpdir), "hatchet-graph-literal.json")
+
+    return tmpfile
diff --git a/hatchet/tests/data/json/hatchet-graph-literal.json b/hatchet/tests/data/json/hatchet-graph-literal.json
@@ -0,0 +1 @@
+{"graph":[{"0":{"data":{"name":"foo","type":"function"},"children":[1,4,12]},"1":{"data":{"name":"bar","type":"None"},"children":[2,3]},"2":{"data":{"name":"baz","type":"function"},"children":[]},"3":{"data":{"name":"grault","type":"None"},"children":[]},"4":{"data":{"name":"qux","type":"function"},"children":[5]},"5":{"data":{"name":"quux","type":"None"},"children":[6]},"6":{"data":{"name":"corge","type":"function"},"children":[7,11,10]},"7":{"data":{"name":"bar","type":"None"},"children":[8,9]},"8":{"data":{"name":"baz","type":"function"},"children":[]},"9":{"data":{"name":"grault","type":"None"},"children":[]},"10":{"data":{"name":"garply","type":"function"},"children":[]},"11":{"data":{"name":"grault","type":"None"},"children":[]},"12":{"data":{"name":"waldo","type":"function"},"children":[13,19]},"13":{"data":{"name":"fred","type":"function"},"children":[14,15]},"14":{"data":{"name":"plugh","type":"function"},"children":[]},"15":{"data":{"name":"xyzzy","type":"function"},"children":[16]},"16":{"data":{"name":"thud","type":"function"},"children":[17,18]},"17":{"data":{"name":"baz","type":"function"},"children":[]},"18":{"data":{"name":"garply","type":"function"},"children":[]},"19":{"data":{"name":"garply","type":"function"},"children":[]}},{"20":{"data":{"name":"waldo","type":"function"},"children":[21]},"21":{"data":{"name":"bar","type":"None"},"children":[22,23]},"22":{"data":{"name":"baz","type":"function"},"children":[]},"23":{"data":{"name":"grault","type":"None"},"children":[]}}],"dataframe_indices":["node"],"dataframe":[{"node":0,"name":"foo","time (inc)":135,"time":0},{"node":1,"name":"bar","time (inc)":20,"time":5},{"node":2,"name":"baz","time (inc)":5,"time":5},{"node":3,"name":"grault","time (inc)":10,"time":10},{"node":4,"name":"qux","time (inc)":60,"time":0},{"node":5,"name":"quux","time (inc)":60,"time":5},{"node":6,"name":"corge","time (inc)":55,"time":10},{"node":7,"name":"bar","time (inc)":20,"time":5},{"node":8,"name":"baz","time (inc)":5,"time":5},{"node":9,"name":"grault","time (inc)":10,"time":10},{"node":10,"name":"garply","time (inc)":15,"time":15},{"node":11,"name":"grault","time (inc)":10,"time":10},{"node":12,"name":"waldo","time (inc)":55,"time":0},{"node":13,"name":"fred","time (inc)":40,"time":5},{"node":14,"name":"plugh","time (inc)":5,"time":5},{"node":15,"name":"xyzzy","time (inc)":30,"time":5},{"node":16,"name":"thud","time (inc)":25,"time":5},{"node":17,"name":"baz","time (inc)":5,"time":5},{"node":18,"name":"garply","time (inc)":15,"time":15},{"node":19,"name":"garply","time (inc)":15,"time":15},{"node":20,"name":"waldo","time (inc)":30,"time":10},{"node":21,"name":"bar","time (inc)":20,"time":5},{"node":22,"name":"baz","time (inc)":5,"time":5},{"node":23,"name":"grault","time (inc)":10,"time":10}],"inclusive_metrics":["time (inc)"],"exclusive_metrics":["time"]}
diff --git a/hatchet/tests/json_test.py b/hatchet/tests/json_test.py
@@ -0,0 +1,29 @@
+# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other
+# Hatchet Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+from hatchet import GraphFrame
+
+
+def test_read_json(json_graphframe_specification):
+    jgs = ""
+    with open(json_graphframe_specification, "r") as f:
+        jgs = f.read()
+    gf = GraphFrame.from_json(jgs)
+
+    assert len(gf.dataframe) == 24
+    assert len(gf.graph) == 24
+    assert gf.graph.roots[0].frame["name"] == "foo"
+
+
+def test_write_json(json_graphframe_specification):
+    jgs = ""
+    with open(json_graphframe_specification, "r") as f:
+        jgs = f.read()
+    gf = GraphFrame.from_json(jgs)
+    json_out = gf.to_json()
+
+    assert "".join(sorted("".join(sorted(jgs.split())))) == "".join(
+        sorted("".join(json_out.split()))
+    )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"graph":[{"0":{"data":{"name":"foo","type":"function"},"children":[1,4,12]},"1":{"data":{"name":"bar","type":"None"},"children":[2,3]},"2":{"data":{"name":"baz","type":"function"},"children":[]},"3":{"data":{"name":"grault","type":"None"},"children":[]},"4":{"data":{"name":"qux","type":"function"},"children":[5]},"5":{"data":{"name":"quux","type":"None"},"children":[6]},"6":{"data":{"name":"corge","type":"function"},"children":[7,11,10]},"7":{"data":{"name":"bar","type":"None"},"children":[8,9]},"8":{"data":{"name":"baz","type":"function"},"children":[]},"9":{"data":{"name":"grault","type":"None"},"children":[]},"10":{"data":{"name":"garply","type":"function"},"children":[]},"11":{"data":{"name":"grault","type":"None"},"children":[]},"12":{"data":{"name":"waldo","type":"function"},"children":[13,19]},"13":{"data":{"name":"fred","type":"function"},"children":[14,15]},"14":{"data":{"name":"plugh","type":"function"},"children":[]},"15":{"data":{"name":"xyzzy","type":"function"},"children":[16]},"16":{"data":{"name":"thud","type":"function"},"children":[17,18]},"17":{"data":{"name":"baz","type":"function"},"children":[]},"18":{"data":{"name":"garply","type":"function"},"children":[]},"19":{"data":{"name":"garply","type":"function"},"children":[]}},{"20":{"data":{"name":"waldo","type":"function"},"children":[21]},"21":{"data":{"name":"bar","type":"None"},"children":[22,23]},"22":{"data":{"name":"baz","type":"function"},"children":[]},"23":{"data":{"name":"grault","type":"None"},"children":[]}}],"dataframe_indices":["node"],"dataframe":[{"node":0,"name":"foo","time (inc)":135,"time":0},{"node":1,"name":"bar","time (inc)":20,"time":5},{"node":2,"name":"baz","time (inc)":5,"time":5},{"node":3,"name":"grault","time (inc)":10,"time":10},{"node":4,"name":"qux","time (inc)":60,"time":0},{"node":5,"name":"quux","time (inc)":60,"time":5},{"node":6,"name":"corge","time (inc)":55,"time":10},{"node":7,"name":"bar","time (inc)":20,"time":5},{"node":8,"name":"baz","time (inc)":5,"time":5},{"node":9,"name":"grault","time (inc)":10,"time":10},{"node":10,"name":"garply","time (inc)":15,"time":15},{"node":11,"name":"grault","time (inc)":10,"time":10},{"node":12,"name":"waldo","time (inc)":55,"time":0},{"node":13,"name":"fred","time (inc)":40,"time":5},{"node":14,"name":"plugh","time (inc)":5,"time":5},{"node":15,"name":"xyzzy","time (inc)":30,"time":5},{"node":16,"name":"thud","time (inc)":25,"time":5},{"node":17,"name":"baz","time (inc)":5,"time":5},{"node":18,"name":"garply","time (inc)":15,"time":15},{"node":19,"name":"garply","time (inc)":15,"time":15},{"node":20,"name":"waldo","time (inc)":30,"time":10},{"node":21,"name":"bar","time (inc)":20,"time":5},{"node":22,"name":"baz","time (inc)":5,"time":5},{"node":23,"name":"grault","time (inc)":10,"time":10}],"inclusive_metrics":["time (inc)"],"exclusive_metrics":["time"]}