From 28597fab30571ddd488ecc3c57a5c8f3f52b1824 Mon Sep 17 00:00:00 2001 From: astanley Date: Mon, 6 Jan 2025 11:43:54 -0400 Subject: [PATCH 1/6] Added field for Name module --- workbench | 23 +++ workbench_fields.py | 448 +++++++++++++++++++++++++++++++------------- workbench_utils.py | 12 +- 3 files changed, 353 insertions(+), 130 deletions(-) diff --git a/workbench b/workbench index 05f3330..98e336a 100755 --- a/workbench +++ b/workbench @@ -342,6 +342,12 @@ def create(): node = link_field.create( config, field_definitions, node, row, custom_field ) + # name fields. + elif field_definitions[custom_field]["field_type"] == "name": + name_field = workbench_fields.NameField() + node = name_field.create( + config, field_definitions, node, row, custom_field + ) # For non-entity reference and non-typed relation fields (text, integer, boolean etc.). else: @@ -839,6 +845,11 @@ def update(): custom_field, node_field_values[custom_field], ) + elif field_definitions[custom_field]["field_type"] == "name": + name_field = workbench_fields.NameField() + node = name_field.update( + config, field_definitions, node, row, custom_field, node_field_values[custom_field] + ) # For non-entity reference and non-typed relation fields (text, etc.). else: @@ -2018,6 +2029,18 @@ def update_media() -> None: media_field_values[custom_field], ) + # Name fields. + elif field_definitions[custom_field]["field_type"] == "name": + link_field = workbench_fields.NameField() + patch_request_json = link_field.update( + config, + field_definitions, + patch_request_json, + row, + custom_field, + media_field_values[custom_field], + ) + # For non-entity reference and non-typed relation fields (text, etc.). else: simple_field = workbench_fields.SimpleField() diff --git a/workbench_fields.py b/workbench_fields.py index f95e1b8..6e6c7b3 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -81,8 +81,8 @@ def create(self, config, field_definitions, entity, row, field_name): field_name, id_field, field_definitions[field_name], subvalue ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): field_values.append({"value": subvalue, "format": text_format}) else: @@ -101,7 +101,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -163,8 +163,8 @@ def update( subvalue, ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): entity[field_name].append( {"value": subvalue, "format": text_format} @@ -205,8 +205,8 @@ def update( subvalue, ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): field_values.append({"value": subvalue, "format": text_format}) else: @@ -221,7 +221,7 @@ def update( field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values - + print(entity) return entity def dedupe_values(self, values): @@ -265,11 +265,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid EDTF field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid EDTF field value.' ) logging.warning(message) return valid_values @@ -280,11 +280,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid integer field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid integer field value.' ) logging.warning(message) return valid_values @@ -295,13 +295,13 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid ' - + field_definitions[field_name]["field_type"] - + " field value." + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid ' + + field_definitions[field_name]["field_type"] + + " field value." ) logging.warning(message) return valid_values @@ -312,11 +312,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + "\" is not in the field's list of allowed values." + 'Value "' + + subvalue + + '" in field "' + + field_name + + "\" is not in the field's list of allowed values." ) logging.warning(message) return valid_values @@ -415,7 +415,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -524,11 +524,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Geolocation field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Geolocation field value.' ) logging.warning(message) return valid_values @@ -615,7 +615,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -722,11 +722,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Link field value.' ) logging.warning(message) return valid_values @@ -755,9 +755,9 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if ( - "title" in subvalue - and subvalue["title"] is not None - and subvalue["title"] != "" + "title" in subvalue + and subvalue["title"] is not None + and subvalue["title"] != "" ): subvalues.append(subvalue["uri"] + "%%" + subvalue["title"]) else: @@ -843,7 +843,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1010,8 +1010,8 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if ( - config["export_csv_term_mode"] == "name" - and subvalue["target_type"] == "taxonomy_term" + config["export_csv_term_mode"] == "name" + and subvalue["target_type"] == "taxonomy_term" ): # Output term names, with vocab IDs (aka namespaces). vocab_id = get_term_vocab(config, subvalue["target_id"]) @@ -1089,7 +1089,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1262,6 +1262,198 @@ def serialize(self, config, field_definitions, field_name, field_data): return subvalues[0] +class NameField: + """Functions for handling fields with 'name' Drupal field data type. + All functions return an "entity" dictionary that is passed to Requests' "json" + parameter. + + Note: this class assumes that the entity has the field identified in 'field_name'. + Callers should pre-emptively confirm that. For an example, see code near the top + of workbench.update(). + """ + + def __init__(self): + self.field_names = field_names = ['title', 'given', 'middle', 'family', 'generational', 'credentials'] + self.sf = SimpleField() + + def create(self, config, field_definitions, entity, row, field_name): + """Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + Returns + ------- + dictionary + A dictionary representing the entity that is POSTed to Drupal as JSON. + """ + if not row[field_name]: + return entity + field_values = [] + id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") + all_names = row[field_name].split(config["subdelimiter"]) + for name in all_names: + name_parts = name.split(':') + field_value = {self.field_names[i]: name_parts[i] for i in range(len(name_parts))} + field_values.append(field_value) + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if -1 < cardinality < len(field_values): + log_field_cardinality_violation(field_name, id_field, str(cardinality)) + field_values = field_values[:cardinality] + entity[field_name] = field_values + return entity + + def update( + self, config, field_definitions, entity, row, field_name, entity_field_values + ): + """Note: this method appends incoming CSV values to existing values, replaces existing field + values with incoming values, or deletes all values from fields, depending on whether + config['update_mode'] is 'append', 'replace', or 'delete'. It doesn not replace individual + values within fields. + """ + """Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + entity : dict + The dict that will be POSTed to Drupal as JSON. + row : OrderedDict. + The current CSV record. + field_name : string + The Drupal fieldname/CSV column header. + entity_field_values : list + List of dictionaries containing existing value(s) for field_name in the entity being updated. + Returns + ------- + dictionary + A dictionary represeting the entity that is PATCHed to Drupal as JSON. + """ + if config["update_mode"] == "delete": + entity[field_name] = [] + return entity + + if not row[field_name]: + return entity + + if field_name not in entity: + entity[field_name] = [] + + if config["task"] == "update_terms": + entity_id_field = "term_id" + if config["task"] == "update": + entity_id_field = "node_id" + if config["task"] == "update_media": + entity_id_field = "media_id" + + cardinality = int(field_definitions[field_name].get("cardinality", -1)) + if config["update_mode"] == "append": + subvalues = str(row[field_name]).split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + entity[field_name].append( + {"value": subvalue, "format": text_format} + ) + else: + if field_definitions[field_name][ + "field_type" + ] == "integer" and value_is_numeric(subvalue): + subvalue = int(subvalue) + if field_definitions[field_name][ + "field_type" + ] == "float" and value_is_numeric(subvalue, allow_decimals=True): + subvalue = float(subvalue) + entity[field_name].append({"value": subvalue}) + entity[field_name] = self.dedupe_values(entity[field_name]) + if -1 < cardinality < len(entity[field_name]): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + entity[field_name] = entity[field_name][:cardinality] + if config["update_mode"] == "replace": + field_values = [] + subvalues = str(row[field_name]).split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + name_list = [] + if -1 < cardinality < len(subvalues): + log_field_cardinality_violation( + field_name, row[entity_id_field], str(cardinality) + ) + subvalues = subvalues[:cardinality] + + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, + row[entity_id_field], + field_definitions[field_name], + subvalue, + ) + name_parts = subvalue.split(':') + name_dict = {self.field_names[i]: name_parts[i] for i in range(len(name_parts))} + field_values.append(name_dict) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values + return entity + + def dedupe_values(self, values): + return self.sf.dedupe_values(values) + + def remove_invalid_values(self, config, field_definitions, field_name, values): + """Removes invalid entries from 'values'.""" + """Parameters + ---------- + config : dict + The configuration settings defined by workbench_config.get_config(). + field_definitions : dict + The field definitions object defined by get_field_definitions(). + field_name : string + The Drupal fieldname/CSV column header. + values : list + List containing strings split from CSV values. + Returns + ------- + list + A list of valid field values. + """ + valid_values = list() + for subvalue in values: + if subvalue.count(':') == 5: + valid_values.append(subvalue) + else: + message = (f"Value '{subvalue}' in field '{field_name}' requires exactly 6 values, any of which can be blank") + logging.warning(message) + return valid_values + + def serialize(self, config, field_definitions, field_name, field_data): + serialized_data = [] + for name in field_data: + serialized_data.append(':'.join(str(value) for value in name.values())) + return '|'.join(serialized_data) + + class AuthorityLinkField: """Functions for handling fields with 'authority_link' Drupal field data type. All functions return a "entity" dictionary that is passed to Requests' "json" @@ -1306,7 +1498,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1410,19 +1602,19 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values = list() for subvalue in values: if ( - validate_authority_link_value( - subvalue, field_definitions[field_name]["authority_sources"] - ) - is True + validate_authority_link_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True ): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Authority Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' ) logging.warning(message) return valid_values @@ -1516,7 +1708,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1612,19 +1804,19 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values = list() for subvalue in values: if ( - validate_media_track_value( - subvalue, field_definitions[field_name]["authority_sources"] - ) - is True + validate_media_track_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True ): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Authority Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' ) logging.warning(message) return valid_values @@ -1653,14 +1845,14 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if all( - "label" in subvalue, - subvalue["label"] is not None, - "kind" in subvalue, - subvalue["kind"] is not None, - "srclang" in subvalue, - subvalue["srclang"] is not None, - "url" in subvalue, - subvalue["url"] is not None, + "label" in subvalue, + subvalue["label"] is not None, + "kind" in subvalue, + subvalue["kind"] is not None, + "srclang" in subvalue, + subvalue["srclang"] is not None, + "url" in subvalue, + subvalue["url"] is not None, ): serialized = f"{subvalue['label']}:{subvalue['kind']}:{subvalue['srclang']}:{os.path.basename(subvalue['url'])}" subvalues.append(serialized) @@ -1723,7 +1915,7 @@ def create(self, config, field_definitions, entity, row, field_name): .get(field_name, {}) ) subdelimiter = ( - paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] ) subvalues = str(row[field_name]).split(subdelimiter) @@ -1771,10 +1963,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Entity reference fields (taxonomy_term and node). if ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "entity_reference" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference" ): entity_reference_field = EntityReferenceField() paragraph = entity_reference_field.create( @@ -1787,10 +1979,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Entity reference revision fields (paragraphs). elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "entity_reference_revisions" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference_revisions" ): entity_reference_revisions_field = EntityReferenceRevisionsField() paragraph = entity_reference_field.create( @@ -1803,10 +1995,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Typed relation fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "typed_relation" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "typed_relation" ): typed_relation_field = TypedRelationField() paragraph = typed_relation_field.create( @@ -1819,10 +2011,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Geolocation fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "geolocation" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "geolocation" ): geolocation_field = GeolocationField() paragraph = geolocation_field.create( @@ -1835,10 +2027,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "link" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "link" ): link_field = LinkField() paragraph = link_field.create( @@ -1851,10 +2043,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Authority Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "authority_link" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "authority_link" ): link_field = AuthorityLinkField() paragraph = link_field.create( @@ -1915,7 +2107,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -2011,7 +2203,7 @@ def serialize(self, config, field_definitions, field_name, field_data): .get(field_name, {}) ) subdelimiter = ( - paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] ) # Cache paragraph field definitions @@ -2045,10 +2237,10 @@ def serialize(self, config, field_definitions, field_name, field_data): continue # Entity reference fields (taxonomy term and node). if ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "entity_reference" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference" ): serialized_field = EntityReferenceField() paragraph_parts.append( @@ -2061,10 +2253,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Entity reference revision fields (mostly paragraphs). elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "entity_reference_revisions" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference_revisions" ): serialized_field = EntityReferenceRevisionsField() paragraph_parts.append( @@ -2077,10 +2269,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Typed relation fields (currently, only taxonomy term) elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "typed_relation" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "typed_relation" ): serialized_field = TypedRelationField() paragraph_parts.append( @@ -2093,10 +2285,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Geolocation fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "geolocation" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "geolocation" ): serialized_field = GeolocationField() paragraph_parts.append( @@ -2109,10 +2301,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "link" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "link" ): serialized_field = LinkField() paragraph_parts.append( @@ -2125,10 +2317,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Authority Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "authority_link" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "authority_link" ): serialized_field = AuthorityLinkField() paragraph_parts.append( diff --git a/workbench_utils.py b/workbench_utils.py index d0ee378..124c1cc 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -1024,9 +1024,10 @@ def ping_remote_file(config, url): """ sections = urllib.parse.urlparse(url) + headers = {'user-agent': config["user_agent"]} try: response = requests.head( - url, allow_redirects=True, verify=config["secure_ssl_only"] + url, allow_redirects=True, verify=config["secure_ssl_only"], headers=headers ) return response.status_code except requests.exceptions.Timeout as err_timeout: @@ -9305,9 +9306,10 @@ def check_file_exists(config, filename): """ # It's a remote file. if filename.startswith("http"): + headers = {'user-agent': config["user_agent"]} try: head_response = requests.head( - filename, allow_redirects=True, verify=config["secure_ssl_only"] + filename, allow_redirects=True, verify=config["secure_ssl_only"], headers=headers ) if head_response.status_code == 200: return True @@ -10214,6 +10216,12 @@ def serialize_field_json(config, field_definitions, field_name, field_data): csv_field_data = serialized_field.serialize( config, field_definitions, field_name, field_data ) + # Name fields. + elif field_definitions[field_name]["field_type"] == "name": + serialized_field = workbench_fields.NameField() + csv_field_data = serialized_field.serialize( + config, field_definitions, field_name, field_data + ) # Simple fields. else: serialized_field = workbench_fields.SimpleField() From 693628f049eef8be26de9be8103272dfd5aa5fe5 Mon Sep 17 00:00:00 2001 From: astanley Date: Mon, 6 Jan 2025 12:24:58 -0400 Subject: [PATCH 2/6] Reformatted --- workbench_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workbench_utils.py b/workbench_utils.py index 124c1cc..3847815 100644 --- a/workbench_utils.py +++ b/workbench_utils.py @@ -1024,7 +1024,7 @@ def ping_remote_file(config, url): """ sections = urllib.parse.urlparse(url) - headers = {'user-agent': config["user_agent"]} + headers = {"user-agent": config["user_agent"]} try: response = requests.head( url, allow_redirects=True, verify=config["secure_ssl_only"], headers=headers @@ -9306,10 +9306,13 @@ def check_file_exists(config, filename): """ # It's a remote file. if filename.startswith("http"): - headers = {'user-agent': config["user_agent"]} + headers = {"user-agent": config["user_agent"]} try: head_response = requests.head( - filename, allow_redirects=True, verify=config["secure_ssl_only"], headers=headers + filename, + allow_redirects=True, + verify=config["secure_ssl_only"], + headers=headers, ) if head_response.status_code == 200: return True From ebd52a1374d25e8b4327687d9778f1a656143e0f Mon Sep 17 00:00:00 2001 From: astanley Date: Mon, 6 Jan 2025 12:46:07 -0400 Subject: [PATCH 3/6] REmoved debug line --- workbench_fields.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index 6e6c7b3..afbca7e 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -221,7 +221,6 @@ def update( field_values.append({"value": subvalue}) field_values = self.dedupe_values(field_values) entity[field_name] = field_values - print(entity) return entity def dedupe_values(self, values): @@ -1301,7 +1300,6 @@ def create(self, config, field_definitions, entity, row, field_name): all_names = row[field_name].split(config["subdelimiter"]) for name in all_names: name_parts = name.split(':') - field_value = {self.field_names[i]: name_parts[i] for i in range(len(name_parts))} field_values.append(field_value) cardinality = int(field_definitions[field_name].get("cardinality", -1)) if -1 < cardinality < len(field_values): From 99d636b89a34a4f4d668c2c4150236acc8a7d769 Mon Sep 17 00:00:00 2001 From: astanley Date: Mon, 6 Jan 2025 14:09:25 -0400 Subject: [PATCH 4/6] Black Formatting --- workbench | 7 +- workbench_fields.py | 285 +++++++++++++++++++++++--------------------- 2 files changed, 153 insertions(+), 139 deletions(-) diff --git a/workbench b/workbench index 98e336a..c796eb6 100755 --- a/workbench +++ b/workbench @@ -848,7 +848,12 @@ def update(): elif field_definitions[custom_field]["field_type"] == "name": name_field = workbench_fields.NameField() node = name_field.update( - config, field_definitions, node, row, custom_field, node_field_values[custom_field] + config, + field_definitions, + node, + row, + custom_field, + node_field_values[custom_field], ) # For non-entity reference and non-typed relation fields (text, etc.). diff --git a/workbench_fields.py b/workbench_fields.py index afbca7e..a3523ab 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -81,8 +81,8 @@ def create(self, config, field_definitions, entity, row, field_name): field_name, id_field, field_definitions[field_name], subvalue ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): field_values.append({"value": subvalue, "format": text_format}) else: @@ -101,7 +101,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -163,8 +163,8 @@ def update( subvalue, ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): entity[field_name].append( {"value": subvalue, "format": text_format} @@ -205,8 +205,8 @@ def update( subvalue, ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): field_values.append({"value": subvalue, "format": text_format}) else: @@ -264,11 +264,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid EDTF field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid EDTF field value.' ) logging.warning(message) return valid_values @@ -279,11 +279,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid integer field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid integer field value.' ) logging.warning(message) return valid_values @@ -294,13 +294,13 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid ' - + field_definitions[field_name]["field_type"] - + " field value." + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid ' + + field_definitions[field_name]["field_type"] + + " field value." ) logging.warning(message) return valid_values @@ -311,11 +311,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + "\" is not in the field's list of allowed values." + 'Value "' + + subvalue + + '" in field "' + + field_name + + "\" is not in the field's list of allowed values." ) logging.warning(message) return valid_values @@ -414,7 +414,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -523,11 +523,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Geolocation field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Geolocation field value.' ) logging.warning(message) return valid_values @@ -614,7 +614,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -721,11 +721,11 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Link field value.' ) logging.warning(message) return valid_values @@ -754,9 +754,9 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if ( - "title" in subvalue - and subvalue["title"] is not None - and subvalue["title"] != "" + "title" in subvalue + and subvalue["title"] is not None + and subvalue["title"] != "" ): subvalues.append(subvalue["uri"] + "%%" + subvalue["title"]) else: @@ -842,7 +842,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1009,8 +1009,8 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if ( - config["export_csv_term_mode"] == "name" - and subvalue["target_type"] == "taxonomy_term" + config["export_csv_term_mode"] == "name" + and subvalue["target_type"] == "taxonomy_term" ): # Output term names, with vocab IDs (aka namespaces). vocab_id = get_term_vocab(config, subvalue["target_id"]) @@ -1088,7 +1088,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1272,7 +1272,14 @@ class NameField: """ def __init__(self): - self.field_names = field_names = ['title', 'given', 'middle', 'family', 'generational', 'credentials'] + self.field_names = field_names = [ + "title", + "given", + "middle", + "family", + "generational", + "credentials", + ] self.sf = SimpleField() def create(self, config, field_definitions, entity, row, field_name): @@ -1299,7 +1306,7 @@ def create(self, config, field_definitions, entity, row, field_name): id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") all_names = row[field_name].split(config["subdelimiter"]) for name in all_names: - name_parts = name.split(':') + name_parts = name.split(":") field_values.append(field_value) cardinality = int(field_definitions[field_name].get("cardinality", -1)) if -1 < cardinality < len(field_values): @@ -1309,7 +1316,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1366,8 +1373,8 @@ def update( subvalue, ) if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True ): entity[field_name].append( {"value": subvalue, "format": text_format} @@ -1409,8 +1416,10 @@ def update( field_definitions[field_name], subvalue, ) - name_parts = subvalue.split(':') - name_dict = {self.field_names[i]: name_parts[i] for i in range(len(name_parts))} + name_parts = subvalue.split(":") + name_dict = { + self.field_names[i]: name_parts[i] for i in range(len(name_parts)) + } field_values.append(name_dict) field_values = self.dedupe_values(field_values) entity[field_name] = field_values @@ -1438,18 +1447,18 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): """ valid_values = list() for subvalue in values: - if subvalue.count(':') == 5: + if subvalue.count(":") == 5: valid_values.append(subvalue) else: - message = (f"Value '{subvalue}' in field '{field_name}' requires exactly 6 values, any of which can be blank") + message = f"Value '{subvalue}' in field '{field_name}' requires exactly 6 values, any of which can be blank" logging.warning(message) return valid_values def serialize(self, config, field_definitions, field_name, field_data): serialized_data = [] for name in field_data: - serialized_data.append(':'.join(str(value) for value in name.values())) - return '|'.join(serialized_data) + serialized_data.append(":".join(str(value) for value in name.values())) + return "|".join(serialized_data) class AuthorityLinkField: @@ -1496,7 +1505,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1600,19 +1609,19 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values = list() for subvalue in values: if ( - validate_authority_link_value( - subvalue, field_definitions[field_name]["authority_sources"] - ) - is True + validate_authority_link_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True ): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Authority Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' ) logging.warning(message) return valid_values @@ -1706,7 +1715,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -1802,19 +1811,19 @@ def remove_invalid_values(self, config, field_definitions, field_name, values): valid_values = list() for subvalue in values: if ( - validate_media_track_value( - subvalue, field_definitions[field_name]["authority_sources"] - ) - is True + validate_media_track_value( + subvalue, field_definitions[field_name]["authority_sources"] + ) + is True ): valid_values.append(subvalue) else: message = ( - 'Value "' - + subvalue - + '" in field "' - + field_name - + '" is not a valid Authority Link field value.' + 'Value "' + + subvalue + + '" in field "' + + field_name + + '" is not a valid Authority Link field value.' ) logging.warning(message) return valid_values @@ -1843,14 +1852,14 @@ def serialize(self, config, field_definitions, field_name, field_data): subvalues = list() for subvalue in field_data: if all( - "label" in subvalue, - subvalue["label"] is not None, - "kind" in subvalue, - subvalue["kind"] is not None, - "srclang" in subvalue, - subvalue["srclang"] is not None, - "url" in subvalue, - subvalue["url"] is not None, + "label" in subvalue, + subvalue["label"] is not None, + "kind" in subvalue, + subvalue["kind"] is not None, + "srclang" in subvalue, + subvalue["srclang"] is not None, + "url" in subvalue, + subvalue["url"] is not None, ): serialized = f"{subvalue['label']}:{subvalue['kind']}:{subvalue['srclang']}:{os.path.basename(subvalue['url'])}" subvalues.append(serialized) @@ -1913,7 +1922,7 @@ def create(self, config, field_definitions, entity, row, field_name): .get(field_name, {}) ) subdelimiter = ( - paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] ) subvalues = str(row[field_name]).split(subdelimiter) @@ -1961,10 +1970,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Entity reference fields (taxonomy_term and node). if ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "entity_reference" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference" ): entity_reference_field = EntityReferenceField() paragraph = entity_reference_field.create( @@ -1977,10 +1986,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Entity reference revision fields (paragraphs). elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "entity_reference_revisions" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "entity_reference_revisions" ): entity_reference_revisions_field = EntityReferenceRevisionsField() paragraph = entity_reference_field.create( @@ -1993,10 +2002,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Typed relation fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "typed_relation" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "typed_relation" ): typed_relation_field = TypedRelationField() paragraph = typed_relation_field.create( @@ -2009,10 +2018,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Geolocation fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "geolocation" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "geolocation" ): geolocation_field = GeolocationField() paragraph = geolocation_field.create( @@ -2025,10 +2034,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "link" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "link" ): link_field = LinkField() paragraph = link_field.create( @@ -2041,10 +2050,10 @@ def create(self, config, field_definitions, entity, row, field_name): # Authority Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][p_field][ - "field_type" - ] - == "authority_link" + self.paragraph_field_definitions[paragraph_type][p_field][ + "field_type" + ] + == "authority_link" ): link_field = AuthorityLinkField() paragraph = link_field.create( @@ -2105,7 +2114,7 @@ def create(self, config, field_definitions, entity, row, field_name): return entity def update( - self, config, field_definitions, entity, row, field_name, entity_field_values + self, config, field_definitions, entity, row, field_name, entity_field_values ): """Note: this method appends incoming CSV values to existing values, replaces existing field values with incoming values, or deletes all values from fields, depending on whether @@ -2201,7 +2210,7 @@ def serialize(self, config, field_definitions, field_name, field_data): .get(field_name, {}) ) subdelimiter = ( - paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] + paragraph_configs.get("subdelimiter", None) or config["subdelimiter"] ) # Cache paragraph field definitions @@ -2235,10 +2244,10 @@ def serialize(self, config, field_definitions, field_name, field_data): continue # Entity reference fields (taxonomy term and node). if ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "entity_reference" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference" ): serialized_field = EntityReferenceField() paragraph_parts.append( @@ -2251,10 +2260,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Entity reference revision fields (mostly paragraphs). elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "entity_reference_revisions" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "entity_reference_revisions" ): serialized_field = EntityReferenceRevisionsField() paragraph_parts.append( @@ -2267,10 +2276,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Typed relation fields (currently, only taxonomy term) elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "typed_relation" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "typed_relation" ): serialized_field = TypedRelationField() paragraph_parts.append( @@ -2283,10 +2292,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Geolocation fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "geolocation" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "geolocation" ): serialized_field = GeolocationField() paragraph_parts.append( @@ -2299,10 +2308,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "link" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "link" ): serialized_field = LinkField() paragraph_parts.append( @@ -2315,10 +2324,10 @@ def serialize(self, config, field_definitions, field_name, field_data): ) # Authority Link fields. elif ( - self.paragraph_field_definitions[paragraph_type][field][ - "field_type" - ] - == "authority_link" + self.paragraph_field_definitions[paragraph_type][field][ + "field_type" + ] + == "authority_link" ): serialized_field = AuthorityLinkField() paragraph_parts.append( From 82c73412d61d712b1aa2b061d923d4bcf6ebaa39 Mon Sep 17 00:00:00 2001 From: Rosie Le Faive Date: Tue, 14 Jan 2025 12:03:51 -0400 Subject: [PATCH 5/6] Add name fields to update_terms. --- workbench | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/workbench b/workbench index c796eb6..db31e62 100755 --- a/workbench +++ b/workbench @@ -3307,7 +3307,20 @@ def update_terms(): term_field_values[custom_field], ) - # For non-entity reference and non-typed relation fields (text, etc.). + # Name fields. + elif field_definitions[custom_field]["field_type"] == "name": + link_field = workbench_fields.NameField() + term = link_field.update( + config, + field_definitions, + term, + row, + custom_field, + term_field_values[custom_field], + ) + + + # For non-entity reference and non-typed relation fields (text, etc.). else: simple_field = workbench_fields.SimpleField() term = simple_field.update( From fb8c20d6c2e3e7d51e502c7e380f782463ebc5ff Mon Sep 17 00:00:00 2001 From: Rosie Le Faive Date: Tue, 14 Jan 2025 12:06:17 -0400 Subject: [PATCH 6/6] Formatting. --- workbench | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workbench b/workbench index db31e62..cc18e5b 100755 --- a/workbench +++ b/workbench @@ -342,7 +342,8 @@ def create(): node = link_field.create( config, field_definitions, node, row, custom_field ) - # name fields. + + # Name fields. elif field_definitions[custom_field]["field_type"] == "name": name_field = workbench_fields.NameField() node = name_field.create( @@ -845,6 +846,8 @@ def update(): custom_field, node_field_values[custom_field], ) + + # Name fields. elif field_definitions[custom_field]["field_type"] == "name": name_field = workbench_fields.NameField() node = name_field.update( @@ -3319,8 +3322,7 @@ def update_terms(): term_field_values[custom_field], ) - - # For non-entity reference and non-typed relation fields (text, etc.). + # For non-entity reference and non-typed relation fields (text, etc.). else: simple_field = workbench_fields.SimpleField() term = simple_field.update(