Source code for flowstrider.converters.metadata_xsxl_converter

# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
#
# SPDX-License-Identifier: BSD-3-Clause

import json
import os
import pathlib
import typing

from openpyxl import Workbook, load_workbook, styles
from openpyxl.cell.rich_text import CellRichText, TextBlock
from openpyxl.cell.text import InlineFont
from openpyxl.formatting.rule import FormulaRule
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.cell_range import CellRange
from openpyxl.worksheet.datavalidation import DataValidation

from flowstrider import settings
from flowstrider.models import common_models, dataflowdiagram
from flowstrider.rules import attributes_dict



[docs]
def determine_type(entity) -> str:
    if isinstance(entity, common_models.Node):
        if "STRIDE:Interactor" in entity.tags:
            return "Node: Interactor"
        if "STRIDE:DataStore" in entity.tags:
            return "Node: DataStore"
        if "STRIDE:Process" in entity.tags:
            return "Node: Process"
    if isinstance(entity, common_models.Edge):
        return "Edge: Dataflow"
    if isinstance(entity, common_models.Cluster):
        return "TrustBoundary"
    if isinstance(entity, dataflowdiagram.DataflowDiagram):
        return "DataflowDiagram"

    return "Not defined"



# Generate metadata overview

[docs]
def metadata_check(dfd: dataflowdiagram.DataflowDiagram, output_path: pathlib.Path):
    """Generates an xlsx file highlighting missing metadata
        that would be helpful to add to a dataflowdiagram

    Args:
        dfd: the dataflowdiagram for which the metadata overview should be generated

    """
    MAX_COLUMN_WIDTH = 40

    _ = settings.lang_out.gettext

    # Define formatting
    header_format = {
        "font": styles.Font(bold=True),
        "fill": styles.PatternFill(
            start_color="E0DEDE", end_color="E0DEDE", fill_type="solid"
        ),
    }
    entity_info_format = {
        "fill": styles.PatternFill(
            start_color="E0DEDE", end_color="E0DEDE", fill_type="solid"
        )
    }
    # Format for when an attribute value has not been defined
    missing_format = {
        "fill": styles.PatternFill(
            start_color="FFDDDD", end_color="FFDDDD", fill_type="solid"
        )
    }
    # Format for when an attribute value is present but not valid
    invalid_format = {
        "fill": styles.PatternFill(
            start_color="FF9999", end_color="FF9999", fill_type="solid"
        )
    }
    # Format for when an attribute value is valid
    valid_format = {
        "fill": styles.PatternFill(
            start_color="D9EAD3", end_color="D9EAD3", fill_type="solid"
        )
    }
    default_format = {"alignment": styles.Alignment(wrap_text=True, vertical="top")}

    # Extract all attribute keys that are needed for the wanted rule_sets
    temp_keys = []
    for key, value in attributes_dict.attributes.items():
        for tag in value.corresponding_rule_sets:
            if tag in dfd.tags:
                temp_keys.append(key)
                break

    # Creates a sorted list of the metadata keys,
    # that will be used as the second, hidden row in the xlsx file
    all_metadata_keys = sorted(temp_keys)
    all_explanations = []
    for key in all_metadata_keys:
        all_explanations.append(attributes_dict.attributes[key].explanation)
    all_display_names = []
    for key in all_metadata_keys:
        all_display_names.append(attributes_dict.attributes[key].display_name)

    header = [_("Entity Type"), _("Entity ID"), _("Entity Name")] + all_display_names
    hidden_keys = ["", "", ""] + all_metadata_keys
    explanations = ["", "", ""] + all_explanations

    # Create workbook and sheets
    workbook = Workbook()
    worksheet = workbook.active
    worksheet.title = _("Attributes")

    # Add a ReadMe sheet with explanation for the usage
    worksheet_readMe = workbook.create_sheet("ReadMe")
    inline_bold = InlineFont(b=True)
    readMe_text = CellRichText(
        TextBlock(
            inline_bold,
            _("Generated by FlowStrider for the data flow diagram '{id}'.").format(
                id=dfd.id
            ),
        ),
        _(
            "\n\nThe 'Attributes' sheet contains an overview of the attributes "
            "currently assigned to each entity in the data flow diagram.\n\nThe three "
            "left most columns indicate each entity with type, ID and name. The other "
            "columns mark the different attributes with a short explanation for each."
            "\n\nEach row indicates one entity with the corresponding values for the "
            "attributes it currently has. 'Missing' means that the value for the "
            "respective attribute hasn't been defined yet. Those cells are colored in "
            "a light red. Cells in bright red mean that the given value is invalid "
            "for this attribute and will most likely lead to a threat being detected "
            "in the elicit phase. If the cell is colored green, the value corresponds "
            "to one of the valid ones and will most likely not lead to a threat. "
            "Cells that are left blank mean that the attribute cannot be defined for "
            "the entity.\n\nFor changing the values of the attributes, click in one "
            "of the not blank cells and either input a custom value or use the drop "
            "down menu to select one of the valid options. Afterwards, the .xlsx "
            "file can be saved and can now be used to update the data flow diagram "
            "with the 'update' command.\n\n"
        ),
        TextBlock(
            inline_bold,
            _(
                "Please note that the changes made in this file will not be applied to "
                "the data flow diagram automatically. For changes in this file to be "
                "applied, use the 'update' command with your data flow diagram and "
                "this file after saving it."
            ),
        ),
    )
    cell_curr = worksheet_readMe.cell(row=1, column=1, value=readMe_text)
    worksheet_readMe.merge_cells(start_row=1, end_row=20, start_column=1, end_column=15)
    cell_curr.alignment = styles.Alignment(wrap_text=True, vertical="top")

    cell_curr = worksheet_readMe.cell(row=22, column=2, value="Missing")
    cell_curr.font = styles.Font(italic=True)
    cell_curr.fill = missing_format["fill"]
    cell_curr = worksheet_readMe.cell(row=22, column=4, value="Valid")
    cell_curr.fill = valid_format["fill"]
    cell_curr = worksheet_readMe.cell(row=22, column=6, value="Invalid")
    cell_curr.font = styles.Font(bold=True)
    cell_curr.fill = invalid_format["fill"]
    worksheet_readMe.protection.enable()

    # Lock entire worksheet from editing
    worksheet.protection.enable()

    # Freeze the left three columns
    worksheet.freeze_panes = "D4"

    # Filling out the worksheet

    # Setting the headers
    for col_num, column_name in enumerate(header, start=1):
        cell = worksheet.cell(row=1, column=col_num, value=column_name)
        cell.font = header_format["font"]
        cell.fill = header_format["fill"]
        cell.alignment = styles.Alignment(wrap_text=True)

    # Setting the hidden attribute keys
    for col_num, key in enumerate(hidden_keys, start=1):
        cell = worksheet.cell(row=2, column=col_num, value=key)
    # The attribute keys should be hidden and are only for letting the update function
    # ...parse more easily
    worksheet.row_dimensions[2].hidden = True

    # Adding the explanations
    for col_num, explanation in enumerate(explanations, start=1):
        cell = worksheet.cell(row=3, column=col_num, value=explanation)
        cell.alignment = default_format["alignment"]
        cell.border = styles.Border(bottom=styles.borders.Side(style="medium"))
    worksheet.row_dimensions[3].height = 70

    worksheet.cell(1, 3).border = styles.Border(
        right=styles.borders.Side(style="medium")
    )
    worksheet.cell(3, 3).border = styles.Border(
        bottom=styles.borders.Side(style="medium"),
        right=styles.borders.Side(style="medium"),
    )

    # Starting from row 4, the entities of the dfd are added
    row_num = 4
    all_entities = []
    for dict in [dfd.nodes, dfd.edges, dfd.clusters]:
        all_entities.extend(dict.values())
    all_entities.append(dfd)
    previous_type = ""
    for entity in all_entities:
        draw_type_border = False  # Border dividing the entity types
        if not isinstance(
            entity,
            (
                common_models.Node,
                common_models.Edge,
                common_models.Cluster,
                dataflowdiagram.DataflowDiagram,
            ),
        ):
            print(
                settings.lang_sys.gettext(
                    "Error: found unknown entity when creating metadata"
                )
            )

        else:
            if row_num > 4 and determine_type(entity) != previous_type:
                draw_type_border = True
            previous_type = determine_type(entity)

            # Entity information to fill out first three columns
            entity_id = entity.id
            entity_name = entity.name
            if isinstance(entity, common_models.Edge):
                src_name = dfd.get_node_by_id(entity.source_id).name
                if len(src_name) == 0:
                    src_name = entity.source_id
                sink_name = dfd.get_node_by_id(entity.sink_id).name
                if len(sink_name) == 0:
                    sink_name = entity.sink_id
                entity_name += f" ({src_name} -> {sink_name})"

            entity_type = determine_type(entity)
            # Metadata
            attributes = entity.attributes
            row = [entity_type, entity_id, entity_name]

            # Check existing attributes of entity against all relevant attribute keys
            for key in all_metadata_keys:
                if entity_type in attributes_dict.attributes[key].applicable_entities:
                    value = attributes.get(key, "Missing")
                    if isinstance(value, str) and (
                        value.strip() == "" or value.casefold() == "none"
                    ):
                        row.append("Missing")
                    elif isinstance(value, list):
                        # Serialize list to JSON string
                        row.append(json.dumps(value))
                    elif isinstance(value, str):
                        row.append(value)
                    else:
                        row.append(str(value))
                else:
                    row.append("")

            # TODO Should entities with no missing metadata or no metadata at all
            # ...be included (shown in the table)?
            # if "Missing" not in row:
            #     continue

            # Filling the row and formatting cells
            # (color code and unlocking Missing cells)
            for col_num, cell_value in enumerate(row, start=1):
                cell = worksheet.cell(row=row_num, column=col_num, value=cell_value)

                # Left three entity-info columns
                if col_num <= 3:
                    cell.fill = entity_info_format["fill"]
                    if col_num == 3:
                        cell.border = styles.Border(
                            right=styles.borders.Side(style="medium")
                        )
                else:
                    # Data validation (drop down menu)
                    curr_key = all_metadata_keys[col_num - 4]
                    accepted_values = attributes_dict.attributes[
                        curr_key
                    ].accepted_values
                    accepted_values_str = '"'
                    for val in accepted_values:
                        accepted_values_str += str(val) + ", "
                    accepted_values_str += '"'
                    data_val = DataValidation(
                        type="list", formula1=accepted_values_str, showDropDown=False
                    )  # showDropDown=False actually shows the drop down menu
                    cell_range = CellRange(
                        min_col=col_num,
                        max_col=col_num,
                        min_row=row_num,
                        max_row=row_num,
                    )

                    # Cell empty
                    if cell_value == "":
                        default_format["alignment"]
                    # Cell with modifiable attribute
                    else:
                        data_val.add(cell_range)
                        cell.protection = styles.Protection(locked=False)

                        # Add conditional formatting to automatically color the cells
                        # ...directly in the xlsx file
                        column_letter = get_column_letter(col_num)
                        range_string = (
                            column_letter
                            + str(row_num)
                            + ":"
                            + column_letter
                            + str(row_num)
                        )

                        # Add missing data formatting
                        formula_string_missing = (
                            '=OR(ISNUMBER(SEARCH("missing",{col}{row}))'
                            + ",ISBLANK({col}{row}))"
                        ).format(col=column_letter, row=row_num)
                        worksheet.conditional_formatting.add(
                            range_string,
                            FormulaRule(
                                formula=[formula_string_missing],
                                fill=missing_format["fill"],
                                font=styles.Font(italic=True),
                            ),
                        )

                        # Define all accepted strings for conditional formatting
                        accepted_str_values_temp = []
                        accepted_str_values = []
                        for val in accepted_values:
                            accepted_str_values_temp.append(str(val))
                        for val in accepted_str_values_temp:
                            accepted_str_values.append(val)
                            if " " in val:
                                accepted_str_values.append(val.replace(" ", ""))
                                accepted_str_values.append(val.replace(" ", "-"))
                                accepted_str_values.append(val.replace(" ", "_"))
                            elif "-" in val:
                                accepted_str_values.append(val.replace("-", ""))
                                accepted_str_values.append(val.replace("-", " "))
                                accepted_str_values.append(val.replace("-", "_"))
                            elif "_" in val:
                                accepted_str_values.append(val.replace("_", ""))
                                accepted_str_values.append(val.replace("_", "-"))
                                accepted_str_values.append(val.replace("_", " "))
                            elif "True" in val:
                                accepted_str_values.append("1")
                                accepted_str_values.append("yes")
                            elif "False" in val:
                                accepted_str_values.append("0")
                                accepted_str_values.append("no")

                        # Add valid or invalid formatting
                        if len(accepted_str_values) >= 1:
                            if len(accepted_str_values) > 1:
                                formula_string = "=OR("
                            else:
                                formula_string = "="

                            for j in range(len(accepted_str_values)):
                                formula_string += (
                                    'ISNUMBER(SEARCH("'
                                    + accepted_str_values[j]
                                    + '",'
                                    + column_letter
                                    + str(row_num)
                                    + "))"
                                )
                                if j != len(accepted_str_values) - 1:
                                    formula_string += ","

                            if len(accepted_str_values) > 1:
                                formula_string += ")"

                            worksheet.conditional_formatting.add(
                                range_string,
                                FormulaRule(
                                    formula=[formula_string], fill=valid_format["fill"]
                                ),
                            )
                            formula_string_neg = "=NOT(" + formula_string[1:] + ")"
                            worksheet.conditional_formatting.add(
                                range_string,
                                FormulaRule(
                                    formula=[formula_string_neg],
                                    fill=invalid_format["fill"],
                                    font=styles.Font(bold=True),
                                ),
                            )
                        else:
                            formula_string = ("=NOT(ISBLANK({col}{row}))").format(
                                col=column_letter, row=row_num
                            )
                            worksheet.conditional_formatting.add(
                                range_string,
                                FormulaRule(
                                    formula=[formula_string],
                                    fill=valid_format["fill"],
                                ),
                            )

                    # Add drop down menu if there are values for the attribute
                    if len(accepted_values):
                        worksheet.add_data_validation(data_val)

                # Add borders
                if draw_type_border:
                    if col_num == 3:
                        cell.border = styles.Border(
                            top=styles.borders.Side(style="dashed"),
                            right=styles.borders.Side(style="medium"),
                        )
                    else:
                        cell.border = styles.Border(
                            top=styles.borders.Side(style="dashed")
                        )

            # Iterate over all columns and adjust their dimensions
            for column in worksheet.columns:
                max_length = 0
                column_letter = column[0].column_letter
                for cell in column:
                    if cell.value:
                        max_length = max(max_length, len(str(cell.value)))

                adjusted_width = min(max_length + 2, MAX_COLUMN_WIDTH)
                worksheet.column_dimensions[column_letter].width = adjusted_width

            row_num += 1

    # Save the file
    if not os.path.exists("output"):
        os.mkdir("output")

    workbook.save(output_path)




[docs]
def parse_value(value):
    """Helper method when updating a dfd.json file from an xlsx file.
        Is given the value from a cell and attempts to parse it.
        Especially relevant to parse lists properly.

    Args:
        value: the value from a specific cell
    Returns:
        the parsed value where more complex parsing is necessary.
        Otherwise returns unparsed value

    """
    if isinstance(value, str):
        lower_value = value.strip().lower()

        # Parse booleans
        if lower_value in ["true", "yes", "1"]:
            return True
        elif lower_value in ["false", "no", "0"]:
            return False

        try:
            return json.loads(value)
        except (json.JSONDecodeError, TypeError):
            pass

        # Parse lists (if not handled by JSON)
        if "," in value:
            return [item.strip() for item in value.split(",")]

    # Return value as is for other types
    elif isinstance(value, int):
        if value == 0:
            return False
        elif value == 1:
            return True

    return value



# Add information from xslx file to dfd

[docs]
def update_dfd_json_from_xlsx(
    dfd: dataflowdiagram.DataflowDiagram, xlsx_file
) -> dataflowdiagram.DataflowDiagram:
    """Updates the metadata of a given dfd from a given xlsx file
        Changes to the dfd are saved to the json file of it

    Args:
        dfd: the dfd that should be updated with new metadata
        xlsx_file: file from which metadata is taken
    Returns:
        updated dfd

    """

    _ = settings.lang_sys.gettext

    # Opens the metadata xlsx file
    workbook = load_workbook(filename=xlsx_file)
    sheet = workbook.active

    # Read headers from hidden attribute key row
    headers = [cell.value for cell in sheet[2]]

    # Going through all entities in the spreadsheet
    # (starting row 4, since first 3 rows should be headers + explanations)
    for row in sheet.iter_rows(min_row=4, values_only=True):
        entity_type = row[0]
        entity_id = row[1]
        attribute_dict = {
            headers[i]: (parse_value(value))
            for i, value in enumerate(row[3:], start=3)
            if isinstance(value, int)
            or (value and value.lower() not in ["missing", ""])
        }

        if "Node" in entity_type:
            target_dict: typing.Dict[str, common_models.Node] = dfd.nodes
        elif "Edge" in entity_type:
            target_dict: typing.Dict[str, common_models.Edge] = dfd.edges
        elif "TrustBoundary" in entity_type:
            target_dict: typing.Dict[str, common_models.Cluster] = dfd.clusters
        elif "DataflowDiagram" in entity_type:
            dfd.attributes.update(attribute_dict)
            continue
        else:
            print(
                _(
                    "Entity {id} could not be identified. \
                    Unknown entity type: {type}"
                ).format(id=entity_id, type=entity_type)
            )
            continue

        if entity_id in target_dict:
            entity = target_dict[entity_id]
            entity.attributes.update(attribute_dict)
        else:
            print(_("Entity {id} could not be found").format(id=entity_id))

    return dfd
Source code for flowstrider.converters.metadata_xsxl_converter

FlowStrider

Navigation

Related Topics