TheJacksonLaboratory · mellertd · Nov 11, 2022 · Aug 30, 2022 · Aug 30, 2022 · Aug 30, 2022
diff --git a/.github/workflows/run_tests_pr.yml b/.github/workflows/run_tests_pr.yml
@@ -16,7 +16,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install flake8 pytest
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+          pip install .[tables]
       - name: Test with pytest
         run: |
           pytest tests/

diff --git a/.github/workflows/run_tests_push.yml b/.github/workflows/run_tests_push.yml
@@ -1,5 +1,7 @@
 name: Run Tests on push
-on: push
+on: 
+  push:
+    branches: [ main ]
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -16,7 +18,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install flake8 pytest
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+          pip install .[tables]
       - name: Test with pytest and generate coverage report/badge
         run: |
           pip install coverage coverage-badge

diff --git a/README.md b/README.md
@@ -6,7 +6,9 @@ A module with convenience functions for writing Python code that interacts with
 
 # Installation
 
-Just `pip install ezomero` and you should be good to go! The repo contains a `requirements.txt` file with the specific package versions we test `ezomero` with, but any Python>=3.8 and latest `omero-py` and `numpy` _should_ work -  note that this package is in active development!
+Just `pip install ezomero` and you should be good to go! The repo contains the specific package versions we test `ezomero` with in `setup.py`, but any Python>=3.8 and latest `omero-py` and `numpy` _should_ work -  note that this package is in active development!
+
+If you want to use `get_table` and `post_table` to/from Pandas dataframes, you need to install `ezomero[tables]` - that install an optional `pandas` dependency. Installing ezomero without this will default `get_table` and `post_table` to use lists of row lists as their default.
 
 # Usage
 

diff --git a/coverage.svg b/coverage.svg
diff --git a/ezomero/__init__.py b/ezomero/__init__.py
@@ -19,7 +19,8 @@
                      post_file_annotation,
                      post_project,
                      post_screen,
-                     post_roi)
+                     post_roi,
+                     post_table)
 from ._gets import (get_image,
                     get_image_ids,
                     get_project_ids,
@@ -37,6 +38,7 @@
                     get_user_id,
                     get_original_filepaths,
                     get_pyramid_levels,
+                    get_table,
                     get_shape)
 
 __all__ = ['post_dataset',
@@ -46,6 +48,7 @@
            'post_project',
            'post_screen',
            'post_roi',
+           'post_table',
            'get_image',
            'get_image_ids',
            'get_project_ids',
@@ -63,6 +66,7 @@
            'get_user_id',
            'get_original_filepaths',
            'get_pyramid_levels',
+           'get_table',
            'get_shape',
            'put_map_annotation',
            'filter_by_filename',

diff --git a/ezomero/_gets.py b/ezomero/_gets.py
@@ -9,6 +9,13 @@
 from omero.sys import Parameters
 from omero.model import enums as omero_enums
 from .rois import Point, Line, Rectangle, Ellipse, Polygon, Polyline, Label
+import importlib.util
+# try importing pandas
+if (importlib.util.find_spec('pandas')):
+    import pandas as pd
+    has_pandas = True
+else:
+    has_pandas = False
 
 
 # gets
@@ -863,7 +870,7 @@ def get_file_annotation(conn, file_ann_id, folder_path=None,
     conn : ``omero.gateway.BlitzGateway`` object
         OMERO connection.
     file_ann_id : int
-        ID of map annotation to get.
+        ID of file annotation to get.
     folder_path : str
         Path where file annotation will be saved. Defaults to local script
         directory.
@@ -1077,6 +1084,43 @@ def get_pyramid_levels(conn, image_id, across_groups=True):
     return levels
 
 
+@do_across_groups
+def get_table(conn, file_ann_id, across_groups=True):
+    """Get a table from its FileAnnotation object.
+
+    Parameters
+    ----------
+    conn : ``omero.gateway.BlitzGateway`` object
+        OMERO connection.
+    file_ann_id : int
+        ID of FileAnnotation table to get.
+    across_groups : bool, optional
+        Defines cross-group behavior of function - set to
+        ``False`` to disable it.
+
+    Returns
+    -------
+    table : object
+        Object containing the actual table. It can be either a list of
+        row-lists or a pandas Dataframe in case the optional pandas dependency
+        was installed.
+
+    Examples
+    --------
+    >>> table = get_table(conn, 62)
+    >>> print(table[0])
+    ['ID', 'X', 'Y']
+    """
+    if type(file_ann_id) is not int:
+        raise TypeError('File annotation ID must be an integer')
+    ann = conn.getObject('FileAnnotation', file_ann_id)
+    orig_table_file = conn.getObject('OriginalFile', ann.getFile().id)
+    resources = conn.c.sf.sharedResources()
+    table_obj = resources.openTable(orig_table_file._obj)
+    table = _create_table(table_obj)
+    return table
+
+
 @do_across_groups
 def get_shape(conn, shape_id, across_groups=True):
     """Get an ezomero shape object from an OMERO Shape id
@@ -1112,6 +1156,42 @@ def get_shape(conn, shape_id, across_groups=True):
     return _omero_shape_to_shape(omero_shape)
 
 
+def _create_table(table_obj):
+    if importlib.util.find_spec('pandas'):
+        columns = []
+        for col in table_obj.getHeaders():
+            columns.append(col.name)
+        table = pd.DataFrame(columns=columns)
+        rowCount = table_obj.getNumberOfRows()
+        data = table_obj.read(list(range(len(columns))), 0, rowCount)
+        for col in data.columns:
+            col_data = []
+            for v in col.values:
+                col_data.append(v)
+            table[col.name] = col_data
+
+    else:
+        table = []
+        columns = []
+        data_lists = []
+        for col in table_obj.getHeaders():
+            columns.append(col.name)
+        table.append(columns)
+        rowCount = table_obj.getNumberOfRows()
+        data = table_obj.read(list(range(len(columns))), 0, rowCount)
+        for col in data.columns:
+            col_data = []
+            for v in col.values:
+                col_data.append(v)
+            data_lists.append(col_data)
+        # transpose data_lists
+        data_lists = [list(i) for i in zip(*data_lists)]
+        for row in data_lists:
+            table.append(row)
+
+    return table
+
+
 def _omero_shape_to_shape(omero_shape):
     """ Helper function to convert ezomero shapes into omero shapes"""
     shape_type = omero_shape.ice_id().split("::omero::model::")[1]

diff --git a/ezomero/_posts.py b/ezomero/_posts.py
@@ -1,16 +1,25 @@
 import logging
 import mimetypes
 import numpy as np
+from uuid import uuid4
 from ._ezomero import do_across_groups, set_group
 from ._misc import link_datasets_to_project
 from omero.model import RoiI, PointI, LineI, RectangleI, EllipseI
 from omero.model import PolygonI, PolylineI, LabelI, LengthI, enums
 from omero.model import DatasetI, ProjectI, ScreenI
+from omero.grid import BoolColumn, LongColumn, StringColumn, DoubleColumn
 from omero.gateway import ProjectWrapper, DatasetWrapper
-from omero.gateway import ScreenWrapper
-from omero.gateway import MapAnnotationWrapper
+from omero.gateway import ScreenWrapper, FileAnnotationWrapper
+from omero.gateway import MapAnnotationWrapper, OriginalFileWrapper
 from omero.rtypes import rstring, rint, rdouble
 from .rois import Point, Line, Rectangle, Ellipse, Polygon, Polyline, Label
+import importlib.util
+# try importing pandas
+if (importlib.util.find_spec('pandas')):
+    import pandas as pd
+    has_pandas = True
+else:
+    has_pandas = False
 
 
 def post_dataset(conn, dataset_name, project_id=None, description=None,
@@ -237,7 +246,7 @@ def post_map_annotation(conn, object_type, object_id, kv_dict, ns,
     >>> post_map_annotation(conn, "Image", 56, d, ns)
     234
     """
-
+    print("group at begin", conn.getGroupFromContext().getId())
     if type(kv_dict) is not dict:
         raise TypeError('kv_dict must be of type `dict`')
 
@@ -246,13 +255,13 @@ def post_map_annotation(conn, object_type, object_id, kv_dict, ns,
         k = str(k)
         v = str(v)
         kv_pairs.append([k, v])
-
     obj = None
     if object_id is not None:
         if type(object_id) is not int:
             raise TypeError('object_ids must be integer')
         obj = conn.getObject(object_type, object_id)
         if obj is not None:
+            print("object group", obj.getDetails().group.id.val)
             ret = set_group(conn, obj.getDetails().group.id.val)
             if ret is False:
                 logging.warning('Cannot change into group '
@@ -264,7 +273,7 @@ def post_map_annotation(conn, object_type, object_id, kv_dict, ns,
             return None
     else:
         raise TypeError('Object ID cannot be empty')
-
+    print("group after", conn.getGroupFromContext().getId())
     map_ann = MapAnnotationWrapper(conn)
     map_ann.setNs(str(ns))
     map_ann.setValue(kv_pairs)
@@ -522,6 +531,135 @@ def post_roi(conn, image_id, shapes, name=None, description=None,
     return roi.getId().getValue()
 
 
+def post_table(conn, table, object_type, object_id, title="", headers=True):
+    """Create new table and link it to an OMERO object.
+
+    Parameters
+    ----------
+    conn : ``omero.gateway.BlitzGateway`` object
+        OMERO connection.
+    object_type : str
+       OMERO object type, passed to ``BlitzGateway.getObjects``
+    object_id : int
+        ID of object to which the new Table will be linked.
+    table : object
+        Object containing the actual table. It can be either a list of
+        row-lists or a pandas Dataframe in case the optional pandas dependency
+        was installed. Note that each column should be of a single type;
+        mixed-type columns will be ignored. Types supported: int, string,
+        float, boolean.
+    title : str, optional
+        Title for the table. If none is specified, a `Table:ID` name is picked,
+        with a random UUID. Note that table names need to be unique!
+    headers : bool, optional
+        Whether the first line of the `table` object should be interpreted
+        as column headers or not. Defaults to `True` and is ignored for pandas
+        Dataframes.
+
+
+    Returns
+    -------
+    TableFile_id : int
+        ID of newly created FileAnnotation containing the new Table.
+
+
+    Notes
+    -------
+    Currently not working with `across_groups` - the `OriginalFile` seems to
+    ignore setting groups dynamically and always does it on the original
+    connection group, causing issues.
+
+    Examples
+    --------
+    >>> columns = ['ID', 'X', 'Y']
+    >>> table = [columns, [1, 10, 20], [2, 30, 40]]
+    >>> post_table(conn, table, "Image", 99, title='My Table', headers=True)
+    234
+    """
+    if title:
+        table_name = title
+    else:
+        table_name = f"Table:{uuid4()}"
+    obj = None
+    if object_id is not None:
+        if type(object_id) is not int:
+            raise TypeError('object_ids must be integer')
+        obj = conn.getObject(object_type, object_id)
+        if obj is not None:
+            ret = set_group(conn, obj.getDetails().group.id.val)
+            if ret is False:
+                logging.warning('Cannot change into group '
+                                f'where object {object_id} is.')
+                return None
+        else:
+            logging.warning(f'Object {object_id} could not be found '
+                            '(check if you have permissions to it)')
+            return None
+    else:
+        raise TypeError('Object ID cannot be empty')
+    columns = create_columns(table, headers)
+    resources = conn.c.sf.sharedResources()
+    repository_id = resources.repositories().descriptions[0].getId().getValue()
+    table = resources.newTable(repository_id, table_name)
+    table.initialize(columns)
+    table.addData(columns)
+    orig_file = table.getOriginalFile()
+    file_ann = FileAnnotationWrapper(conn)
+    file_obj = OriginalFileWrapper(conn, orig_file)
+    file_obj.save()
+    file_ann.setFile(file_obj)
+    file_ann = obj.linkAnnotation(file_ann)
+    return file_ann.id
+
+
+def create_columns(table, headers):
+    """Helper function to create the correct column types from a table"""
+    cols = []
+    if type(table) == list:
+        if headers:
+            titles = table[0]
+            data = table[1:]
+        else:
+            titles = [f"column {i}" for i in range(len(table[0]))]
+            data = table
+        # transposing data matrix to have columns as first dimension
+        data = [list(i) for i in zip(*data)]
+        for i in range(len(titles)):
+            types = list(set([type(data[i][j]) for j in range(len(data[i]))]))
+            if len(types) > 1:
+                continue
+            if types[0] == bool:
+                cols.append(BoolColumn(titles[i], '', data[i]))
+            if types[0] == int:
+                cols.append(LongColumn(titles[i], '', data[i]))
+            if types[0] == float:
+                cols.append(DoubleColumn(titles[i], '', data[i]))
+            if types[0] == str:
+                max_size = len(max(data[i], key=len))
+                cols.append(StringColumn(titles[i], '',
+                            max_size, data[i]))
+    elif type(table) == pd.core.frame.DataFrame:
+        df = table.convert_dtypes()
+        ints = df.select_dtypes(include='int')
+        for col in ints:
+            cols.append(LongColumn(col, '', df[col].tolist()))
+        floats = df.select_dtypes(include='float')
+        for col in floats:
+            cols.append(DoubleColumn(col, '', df[col].tolist()))
+        strings = df.select_dtypes(include='string')
+        for col in strings:
+            max_size = df[col].map(len).max()
+            cols.append(StringColumn(col, '', max_size,
+                                     df[col].tolist()))
+        bools = df.select_dtypes(include='bool')
+        for col in bools:
+            cols.append(BoolColumn(col, '', df[col].tolist()))
+    else:
+        raise TypeError("Table must be a list of row lists or "
+                        "pandas Dataframe")
+    return cols
+
+
 def _shape_to_omero_shape(shape, fill_color, stroke_color, stroke_width):
     """ Helper function to convert ezomero shapes into omero shapes"""
     if isinstance(shape, Point):

diff --git a/requirements.txt b/requirements.txt