Source code for flowstrider.converters.metadata_xsxl_converter

# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
#
# SPDX-License-Identifier: BSD-3-Clause

import json
import os
import pathlib
import typing

from openpyxl import Workbook, load_workbook, styles
from openpyxl.cell.rich_text import CellRichText, TextBlock
from openpyxl.cell.text import InlineFont
from openpyxl.formatting.rule import FormulaRule
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.cell_range import CellRange
from openpyxl.worksheet.datavalidation import DataValidation

from flowstrider import settings
from flowstrider.models import common_models, dataflowdiagram
from flowstrider.rules import attributes_dict


[docs] def determine_type(entity) -> str: if isinstance(entity, common_models.Node): if "STRIDE:Interactor" in entity.tags: return "Node: Interactor" if "STRIDE:DataStore" in entity.tags: return "Node: DataStore" if "STRIDE:Process" in entity.tags: return "Node: Process" if isinstance(entity, common_models.Edge): return "Edge: Dataflow" if isinstance(entity, common_models.Cluster): return "TrustBoundary" if isinstance(entity, dataflowdiagram.DataflowDiagram): return "DataflowDiagram" return "Not defined"
# Generate metadata overview
[docs] def metadata_check(dfd: dataflowdiagram.DataflowDiagram, output_path: pathlib.Path): """Generates an xlsx file highlighting missing metadata that would be helpful to add to a dataflowdiagram Args: dfd: the dataflowdiagram for which the metadata overview should be generated """ MAX_COLUMN_WIDTH = 40 _ = settings.lang_out.gettext # Define formatting header_format = { "font": styles.Font(bold=True), "fill": styles.PatternFill( start_color="E0DEDE", end_color="E0DEDE", fill_type="solid" ), } entity_info_format = { "fill": styles.PatternFill( start_color="E0DEDE", end_color="E0DEDE", fill_type="solid" ) } # Format for when an attribute value has not been defined missing_format = { "fill": styles.PatternFill( start_color="FFDDDD", end_color="FFDDDD", fill_type="solid" ) } # Format for when an attribute value is present but not valid invalid_format = { "fill": styles.PatternFill( start_color="FF9999", end_color="FF9999", fill_type="solid" ) } # Format for when an attribute value is valid valid_format = { "fill": styles.PatternFill( start_color="D9EAD3", end_color="D9EAD3", fill_type="solid" ) } default_format = {"alignment": styles.Alignment(wrap_text=True, vertical="top")} # Extract all attribute keys that are needed for the wanted rule_sets temp_keys = [] for key, value in attributes_dict.attributes.items(): for tag in value.corresponding_rule_sets: if tag in dfd.tags: temp_keys.append(key) break # Creates a sorted list of the metadata keys, # that will be used as the second, hidden row in the xlsx file all_metadata_keys = sorted(temp_keys) all_explanations = [] for key in all_metadata_keys: all_explanations.append(attributes_dict.attributes[key].explanation) all_display_names = [] for key in all_metadata_keys: all_display_names.append(attributes_dict.attributes[key].display_name) header = [_("Entity Type"), _("Entity ID"), _("Entity Name")] + all_display_names hidden_keys = ["", "", ""] + all_metadata_keys explanations = ["", "", ""] + all_explanations # Create workbook and sheets workbook = Workbook() worksheet = workbook.active worksheet.title = _("Attributes") # Add a ReadMe sheet with explanation for the usage worksheet_readMe = workbook.create_sheet("ReadMe") inline_bold = InlineFont(b=True) readMe_text = CellRichText( TextBlock( inline_bold, _("Generated by FlowStrider for the data flow diagram '{id}'.").format( id=dfd.id ), ), _( "\n\nThe 'Attributes' sheet contains an overview of the attributes " "currently assigned to each entity in the data flow diagram.\n\nThe three " "left most columns indicate each entity with type, ID and name. The other " "columns mark the different attributes with a short explanation for each." "\n\nEach row indicates one entity with the corresponding values for the " "attributes it currently has. 'Missing' means that the value for the " "respective attribute hasn't been defined yet. Those cells are colored in " "a light red. Cells in bright red mean that the given value is invalid " "for this attribute and will most likely lead to a threat being detected " "in the elicit phase. If the cell is colored green, the value corresponds " "to one of the valid ones and will most likely not lead to a threat. " "Cells that are left blank mean that the attribute cannot be defined for " "the entity.\n\nFor changing the values of the attributes, click in one " "of the not blank cells and either input a custom value or use the drop " "down menu to select one of the valid options. Afterwards, the .xlsx " "file can be saved and can now be used to update the data flow diagram " "with the 'update' command.\n\n" ), TextBlock( inline_bold, _( "Please note that the changes made in this file will not be applied to " "the data flow diagram automatically. For changes in this file to be " "applied, use the 'update' command with your data flow diagram and " "this file after saving it." ), ), ) cell_curr = worksheet_readMe.cell(row=1, column=1, value=readMe_text) worksheet_readMe.merge_cells(start_row=1, end_row=20, start_column=1, end_column=15) cell_curr.alignment = styles.Alignment(wrap_text=True, vertical="top") cell_curr = worksheet_readMe.cell(row=22, column=2, value="Missing") cell_curr.font = styles.Font(italic=True) cell_curr.fill = missing_format["fill"] cell_curr = worksheet_readMe.cell(row=22, column=4, value="Valid") cell_curr.fill = valid_format["fill"] cell_curr = worksheet_readMe.cell(row=22, column=6, value="Invalid") cell_curr.font = styles.Font(bold=True) cell_curr.fill = invalid_format["fill"] worksheet_readMe.protection.enable() # Lock entire worksheet from editing worksheet.protection.enable() # Freeze the left three columns worksheet.freeze_panes = "D4" # Filling out the worksheet # Setting the headers for col_num, column_name in enumerate(header, start=1): cell = worksheet.cell(row=1, column=col_num, value=column_name) cell.font = header_format["font"] cell.fill = header_format["fill"] cell.alignment = styles.Alignment(wrap_text=True) # Setting the hidden attribute keys for col_num, key in enumerate(hidden_keys, start=1): cell = worksheet.cell(row=2, column=col_num, value=key) # The attribute keys should be hidden and are only for letting the update function # ...parse more easily worksheet.row_dimensions[2].hidden = True # Adding the explanations for col_num, explanation in enumerate(explanations, start=1): cell = worksheet.cell(row=3, column=col_num, value=explanation) cell.alignment = default_format["alignment"] cell.border = styles.Border(bottom=styles.borders.Side(style="medium")) worksheet.row_dimensions[3].height = 70 worksheet.cell(1, 3).border = styles.Border( right=styles.borders.Side(style="medium") ) worksheet.cell(3, 3).border = styles.Border( bottom=styles.borders.Side(style="medium"), right=styles.borders.Side(style="medium"), ) # Starting from row 4, the entities of the dfd are added row_num = 4 all_entities = [] for dict in [dfd.nodes, dfd.edges, dfd.clusters]: all_entities.extend(dict.values()) all_entities.append(dfd) previous_type = "" for entity in all_entities: draw_type_border = False # Border dividing the entity types if not isinstance( entity, ( common_models.Node, common_models.Edge, common_models.Cluster, dataflowdiagram.DataflowDiagram, ), ): print( settings.lang_sys.gettext( "Error: found unknown entity when creating metadata" ) ) else: if row_num > 4 and determine_type(entity) != previous_type: draw_type_border = True previous_type = determine_type(entity) # Entity information to fill out first three columns entity_id = entity.id entity_name = entity.name if isinstance(entity, common_models.Edge): src_name = dfd.get_node_by_id(entity.source_id).name if len(src_name) == 0: src_name = entity.source_id sink_name = dfd.get_node_by_id(entity.sink_id).name if len(sink_name) == 0: sink_name = entity.sink_id entity_name += f" ({src_name} -> {sink_name})" entity_type = determine_type(entity) # Metadata attributes = entity.attributes row = [entity_type, entity_id, entity_name] # Check existing attributes of entity against all relevant attribute keys for key in all_metadata_keys: if entity_type in attributes_dict.attributes[key].applicable_entities: value = attributes.get(key, "Missing") if isinstance(value, str) and ( value.strip() == "" or value.casefold() == "none" ): row.append("Missing") elif isinstance(value, list): # Serialize list to JSON string row.append(json.dumps(value)) elif isinstance(value, str): row.append(value) else: row.append(str(value)) else: row.append("") # TODO Should entities with no missing metadata or no metadata at all # ...be included (shown in the table)? # if "Missing" not in row: # continue # Filling the row and formatting cells # (color code and unlocking Missing cells) for col_num, cell_value in enumerate(row, start=1): cell = worksheet.cell(row=row_num, column=col_num, value=cell_value) # Left three entity-info columns if col_num <= 3: cell.fill = entity_info_format["fill"] if col_num == 3: cell.border = styles.Border( right=styles.borders.Side(style="medium") ) else: # Data validation (drop down menu) curr_key = all_metadata_keys[col_num - 4] accepted_values = attributes_dict.attributes[ curr_key ].accepted_values accepted_values_str = '"' for val in accepted_values: accepted_values_str += str(val) + ", " accepted_values_str += '"' data_val = DataValidation( type="list", formula1=accepted_values_str, showDropDown=False ) # showDropDown=False actually shows the drop down menu cell_range = CellRange( min_col=col_num, max_col=col_num, min_row=row_num, max_row=row_num, ) # Cell empty if cell_value == "": default_format["alignment"] # Cell with modifiable attribute else: data_val.add(cell_range) cell.protection = styles.Protection(locked=False) # Add conditional formatting to automatically color the cells # ...directly in the xlsx file column_letter = get_column_letter(col_num) range_string = ( column_letter + str(row_num) + ":" + column_letter + str(row_num) ) # Add missing data formatting formula_string_missing = ( '=OR(ISNUMBER(SEARCH("missing",{col}{row}))' + ",ISBLANK({col}{row}))" ).format(col=column_letter, row=row_num) worksheet.conditional_formatting.add( range_string, FormulaRule( formula=[formula_string_missing], fill=missing_format["fill"], font=styles.Font(italic=True), ), ) # Define all accepted strings for conditional formatting accepted_str_values_temp = [] accepted_str_values = [] for val in accepted_values: accepted_str_values_temp.append(str(val)) for val in accepted_str_values_temp: accepted_str_values.append(val) if " " in val: accepted_str_values.append(val.replace(" ", "")) accepted_str_values.append(val.replace(" ", "-")) accepted_str_values.append(val.replace(" ", "_")) elif "-" in val: accepted_str_values.append(val.replace("-", "")) accepted_str_values.append(val.replace("-", " ")) accepted_str_values.append(val.replace("-", "_")) elif "_" in val: accepted_str_values.append(val.replace("_", "")) accepted_str_values.append(val.replace("_", "-")) accepted_str_values.append(val.replace("_", " ")) elif "True" in val: accepted_str_values.append("1") accepted_str_values.append("yes") elif "False" in val: accepted_str_values.append("0") accepted_str_values.append("no") # Add valid or invalid formatting if len(accepted_str_values) >= 1: if len(accepted_str_values) > 1: formula_string = "=OR(" else: formula_string = "=" for j in range(len(accepted_str_values)): formula_string += ( 'ISNUMBER(SEARCH("' + accepted_str_values[j] + '",' + column_letter + str(row_num) + "))" ) if j != len(accepted_str_values) - 1: formula_string += "," if len(accepted_str_values) > 1: formula_string += ")" worksheet.conditional_formatting.add( range_string, FormulaRule( formula=[formula_string], fill=valid_format["fill"] ), ) formula_string_neg = "=NOT(" + formula_string[1:] + ")" worksheet.conditional_formatting.add( range_string, FormulaRule( formula=[formula_string_neg], fill=invalid_format["fill"], font=styles.Font(bold=True), ), ) else: formula_string = ("=NOT(ISBLANK({col}{row}))").format( col=column_letter, row=row_num ) worksheet.conditional_formatting.add( range_string, FormulaRule( formula=[formula_string], fill=valid_format["fill"], ), ) # Add drop down menu if there are values for the attribute if len(accepted_values): worksheet.add_data_validation(data_val) # Add borders if draw_type_border: if col_num == 3: cell.border = styles.Border( top=styles.borders.Side(style="dashed"), right=styles.borders.Side(style="medium"), ) else: cell.border = styles.Border( top=styles.borders.Side(style="dashed") ) # Iterate over all columns and adjust their dimensions for column in worksheet.columns: max_length = 0 column_letter = column[0].column_letter for cell in column: if cell.value: max_length = max(max_length, len(str(cell.value))) adjusted_width = min(max_length + 2, MAX_COLUMN_WIDTH) worksheet.column_dimensions[column_letter].width = adjusted_width row_num += 1 # Save the file if not os.path.exists("output"): os.mkdir("output") workbook.save(output_path)
[docs] def parse_value(value): """Helper method when updating a dfd.json file from an xlsx file. Is given the value from a cell and attempts to parse it. Especially relevant to parse lists properly. Args: value: the value from a specific cell Returns: the parsed value where more complex parsing is necessary. Otherwise returns unparsed value """ if isinstance(value, str): lower_value = value.strip().lower() # Parse booleans if lower_value in ["true", "yes", "1"]: return True elif lower_value in ["false", "no", "0"]: return False try: return json.loads(value) except (json.JSONDecodeError, TypeError): pass # Parse lists (if not handled by JSON) if "," in value: return [item.strip() for item in value.split(",")] # Return value as is for other types elif isinstance(value, int): if value == 0: return False elif value == 1: return True return value
# Add information from xslx file to dfd
[docs] def update_dfd_json_from_xlsx( dfd: dataflowdiagram.DataflowDiagram, xlsx_file ) -> dataflowdiagram.DataflowDiagram: """Updates the metadata of a given dfd from a given xlsx file Changes to the dfd are saved to the json file of it Args: dfd: the dfd that should be updated with new metadata xlsx_file: file from which metadata is taken Returns: updated dfd """ _ = settings.lang_sys.gettext # Opens the metadata xlsx file workbook = load_workbook(filename=xlsx_file) sheet = workbook.active # Read headers from hidden attribute key row headers = [cell.value for cell in sheet[2]] # Going through all entities in the spreadsheet # (starting row 4, since first 3 rows should be headers + explanations) for row in sheet.iter_rows(min_row=4, values_only=True): entity_type = row[0] entity_id = row[1] attribute_dict = { headers[i]: (parse_value(value)) for i, value in enumerate(row[3:], start=3) if isinstance(value, int) or (value and value.lower() not in ["missing", ""]) } if "Node" in entity_type: target_dict: typing.Dict[str, common_models.Node] = dfd.nodes elif "Edge" in entity_type: target_dict: typing.Dict[str, common_models.Edge] = dfd.edges elif "TrustBoundary" in entity_type: target_dict: typing.Dict[str, common_models.Cluster] = dfd.clusters elif "DataflowDiagram" in entity_type: dfd.attributes.update(attribute_dict) continue else: print( _( "Entity {id} could not be identified. \ Unknown entity type: {type}" ).format(id=entity_id, type=entity_type) ) continue if entity_id in target_dict: entity = target_dict[entity_id] entity.attributes.update(attribute_dict) else: print(_("Entity {id} could not be found").format(id=entity_id)) return dfd