Source code for flowstrider.converters.metadata_xsxl_converter
# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
#
# SPDX-License-Identifier: BSD-3-Clause
import json
import os
import pathlib
import typing
from openpyxl import Workbook, load_workbook, styles
from openpyxl.cell.rich_text import CellRichText, TextBlock
from openpyxl.cell.text import InlineFont
from openpyxl.formatting.rule import FormulaRule
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.cell_range import CellRange
from openpyxl.worksheet.datavalidation import DataValidation
from flowstrider import settings
from flowstrider.models import common_models, dataflowdiagram
from flowstrider.rules import attributes_dict
[docs]
def determine_type(entity) -> str:
if isinstance(entity, common_models.Node):
if "STRIDE:Interactor" in entity.tags:
return "Node: Interactor"
if "STRIDE:DataStore" in entity.tags:
return "Node: DataStore"
if "STRIDE:Process" in entity.tags:
return "Node: Process"
if isinstance(entity, common_models.Edge):
return "Edge: Dataflow"
if isinstance(entity, common_models.Cluster):
return "TrustBoundary"
if isinstance(entity, dataflowdiagram.DataflowDiagram):
return "DataflowDiagram"
return "Not defined"
# Generate metadata overview
[docs]
def metadata_check(dfd: dataflowdiagram.DataflowDiagram, output_path: pathlib.Path):
"""Generates an xlsx file highlighting missing metadata
that would be helpful to add to a dataflowdiagram
Args:
dfd: the dataflowdiagram for which the metadata overview should be generated
"""
MAX_COLUMN_WIDTH = 40
_ = settings.lang_out.gettext
# Define formatting
header_format = {
"font": styles.Font(bold=True),
"fill": styles.PatternFill(
start_color="E0DEDE", end_color="E0DEDE", fill_type="solid"
),
}
entity_info_format = {
"fill": styles.PatternFill(
start_color="E0DEDE", end_color="E0DEDE", fill_type="solid"
)
}
# Format for when an attribute value has not been defined
missing_format = {
"fill": styles.PatternFill(
start_color="FFDDDD", end_color="FFDDDD", fill_type="solid"
)
}
# Format for when an attribute value is present but not valid
invalid_format = {
"fill": styles.PatternFill(
start_color="FF9999", end_color="FF9999", fill_type="solid"
)
}
# Format for when an attribute value is valid
valid_format = {
"fill": styles.PatternFill(
start_color="D9EAD3", end_color="D9EAD3", fill_type="solid"
)
}
default_format = {"alignment": styles.Alignment(wrap_text=True, vertical="top")}
# Extract all attribute keys that are needed for the wanted rule_sets
temp_keys = []
for key, value in attributes_dict.attributes.items():
for tag in value.corresponding_rule_sets:
if tag in dfd.tags:
temp_keys.append(key)
break
# Creates a sorted list of the metadata keys,
# that will be used as the second, hidden row in the xlsx file
all_metadata_keys = sorted(temp_keys)
all_explanations = []
for key in all_metadata_keys:
all_explanations.append(attributes_dict.attributes[key].explanation)
all_display_names = []
for key in all_metadata_keys:
all_display_names.append(attributes_dict.attributes[key].display_name)
header = [_("Entity Type"), _("Entity ID"), _("Entity Name")] + all_display_names
hidden_keys = ["", "", ""] + all_metadata_keys
explanations = ["", "", ""] + all_explanations
# Create workbook and sheets
workbook = Workbook()
worksheet = workbook.active
worksheet.title = _("Attributes")
# Add a ReadMe sheet with explanation for the usage
worksheet_readMe = workbook.create_sheet("ReadMe")
inline_bold = InlineFont(b=True)
readMe_text = CellRichText(
TextBlock(
inline_bold,
_("Generated by FlowStrider for the data flow diagram '{id}'.").format(
id=dfd.id
),
),
_(
"\n\nThe 'Attributes' sheet contains an overview of the attributes "
"currently assigned to each entity in the data flow diagram.\n\nThe three "
"left most columns indicate each entity with type, ID and name. The other "
"columns mark the different attributes with a short explanation for each."
"\n\nEach row indicates one entity with the corresponding values for the "
"attributes it currently has. 'Missing' means that the value for the "
"respective attribute hasn't been defined yet. Those cells are colored in "
"a light red. Cells in bright red mean that the given value is invalid "
"for this attribute and will most likely lead to a threat being detected "
"in the elicit phase. If the cell is colored green, the value corresponds "
"to one of the valid ones and will most likely not lead to a threat. "
"Cells that are left blank mean that the attribute cannot be defined for "
"the entity.\n\nFor changing the values of the attributes, click in one "
"of the not blank cells and either input a custom value or use the drop "
"down menu to select one of the valid options. Afterwards, the .xlsx "
"file can be saved and can now be used to update the data flow diagram "
"with the 'update' command.\n\n"
),
TextBlock(
inline_bold,
_(
"Please note that the changes made in this file will not be applied to "
"the data flow diagram automatically. For changes in this file to be "
"applied, use the 'update' command with your data flow diagram and "
"this file after saving it."
),
),
)
cell_curr = worksheet_readMe.cell(row=1, column=1, value=readMe_text)
worksheet_readMe.merge_cells(start_row=1, end_row=20, start_column=1, end_column=15)
cell_curr.alignment = styles.Alignment(wrap_text=True, vertical="top")
cell_curr = worksheet_readMe.cell(row=22, column=2, value="Missing")
cell_curr.font = styles.Font(italic=True)
cell_curr.fill = missing_format["fill"]
cell_curr = worksheet_readMe.cell(row=22, column=4, value="Valid")
cell_curr.fill = valid_format["fill"]
cell_curr = worksheet_readMe.cell(row=22, column=6, value="Invalid")
cell_curr.font = styles.Font(bold=True)
cell_curr.fill = invalid_format["fill"]
worksheet_readMe.protection.enable()
# Lock entire worksheet from editing
worksheet.protection.enable()
# Freeze the left three columns
worksheet.freeze_panes = "D4"
# Filling out the worksheet
# Setting the headers
for col_num, column_name in enumerate(header, start=1):
cell = worksheet.cell(row=1, column=col_num, value=column_name)
cell.font = header_format["font"]
cell.fill = header_format["fill"]
cell.alignment = styles.Alignment(wrap_text=True)
# Setting the hidden attribute keys
for col_num, key in enumerate(hidden_keys, start=1):
cell = worksheet.cell(row=2, column=col_num, value=key)
# The attribute keys should be hidden and are only for letting the update function
# ...parse more easily
worksheet.row_dimensions[2].hidden = True
# Adding the explanations
for col_num, explanation in enumerate(explanations, start=1):
cell = worksheet.cell(row=3, column=col_num, value=explanation)
cell.alignment = default_format["alignment"]
cell.border = styles.Border(bottom=styles.borders.Side(style="medium"))
worksheet.row_dimensions[3].height = 70
worksheet.cell(1, 3).border = styles.Border(
right=styles.borders.Side(style="medium")
)
worksheet.cell(3, 3).border = styles.Border(
bottom=styles.borders.Side(style="medium"),
right=styles.borders.Side(style="medium"),
)
# Starting from row 4, the entities of the dfd are added
row_num = 4
all_entities = []
for dict in [dfd.nodes, dfd.edges, dfd.clusters]:
all_entities.extend(dict.values())
all_entities.append(dfd)
previous_type = ""
for entity in all_entities:
draw_type_border = False # Border dividing the entity types
if not isinstance(
entity,
(
common_models.Node,
common_models.Edge,
common_models.Cluster,
dataflowdiagram.DataflowDiagram,
),
):
print(
settings.lang_sys.gettext(
"Error: found unknown entity when creating metadata"
)
)
else:
if row_num > 4 and determine_type(entity) != previous_type:
draw_type_border = True
previous_type = determine_type(entity)
# Entity information to fill out first three columns
entity_id = entity.id
entity_name = entity.name
if isinstance(entity, common_models.Edge):
src_name = dfd.get_node_by_id(entity.source_id).name
if len(src_name) == 0:
src_name = entity.source_id
sink_name = dfd.get_node_by_id(entity.sink_id).name
if len(sink_name) == 0:
sink_name = entity.sink_id
entity_name += f" ({src_name} -> {sink_name})"
entity_type = determine_type(entity)
# Metadata
attributes = entity.attributes
row = [entity_type, entity_id, entity_name]
# Check existing attributes of entity against all relevant attribute keys
for key in all_metadata_keys:
if entity_type in attributes_dict.attributes[key].applicable_entities:
value = attributes.get(key, "Missing")
if isinstance(value, str) and (
value.strip() == "" or value.casefold() == "none"
):
row.append("Missing")
elif isinstance(value, list):
# Serialize list to JSON string
row.append(json.dumps(value))
elif isinstance(value, str):
row.append(value)
else:
row.append(str(value))
else:
row.append("")
# TODO Should entities with no missing metadata or no metadata at all
# ...be included (shown in the table)?
# if "Missing" not in row:
# continue
# Filling the row and formatting cells
# (color code and unlocking Missing cells)
for col_num, cell_value in enumerate(row, start=1):
cell = worksheet.cell(row=row_num, column=col_num, value=cell_value)
# Left three entity-info columns
if col_num <= 3:
cell.fill = entity_info_format["fill"]
if col_num == 3:
cell.border = styles.Border(
right=styles.borders.Side(style="medium")
)
else:
# Data validation (drop down menu)
curr_key = all_metadata_keys[col_num - 4]
accepted_values = attributes_dict.attributes[
curr_key
].accepted_values
accepted_values_str = '"'
for val in accepted_values:
accepted_values_str += str(val) + ", "
accepted_values_str += '"'
data_val = DataValidation(
type="list", formula1=accepted_values_str, showDropDown=False
) # showDropDown=False actually shows the drop down menu
cell_range = CellRange(
min_col=col_num,
max_col=col_num,
min_row=row_num,
max_row=row_num,
)
# Cell empty
if cell_value == "":
default_format["alignment"]
# Cell with modifiable attribute
else:
data_val.add(cell_range)
cell.protection = styles.Protection(locked=False)
# Add conditional formatting to automatically color the cells
# ...directly in the xlsx file
column_letter = get_column_letter(col_num)
range_string = (
column_letter
+ str(row_num)
+ ":"
+ column_letter
+ str(row_num)
)
# Add missing data formatting
formula_string_missing = (
'=OR(ISNUMBER(SEARCH("missing",{col}{row}))'
+ ",ISBLANK({col}{row}))"
).format(col=column_letter, row=row_num)
worksheet.conditional_formatting.add(
range_string,
FormulaRule(
formula=[formula_string_missing],
fill=missing_format["fill"],
font=styles.Font(italic=True),
),
)
# Define all accepted strings for conditional formatting
accepted_str_values_temp = []
accepted_str_values = []
for val in accepted_values:
accepted_str_values_temp.append(str(val))
for val in accepted_str_values_temp:
accepted_str_values.append(val)
if " " in val:
accepted_str_values.append(val.replace(" ", ""))
accepted_str_values.append(val.replace(" ", "-"))
accepted_str_values.append(val.replace(" ", "_"))
elif "-" in val:
accepted_str_values.append(val.replace("-", ""))
accepted_str_values.append(val.replace("-", " "))
accepted_str_values.append(val.replace("-", "_"))
elif "_" in val:
accepted_str_values.append(val.replace("_", ""))
accepted_str_values.append(val.replace("_", "-"))
accepted_str_values.append(val.replace("_", " "))
elif "True" in val:
accepted_str_values.append("1")
accepted_str_values.append("yes")
elif "False" in val:
accepted_str_values.append("0")
accepted_str_values.append("no")
# Add valid or invalid formatting
if len(accepted_str_values) >= 1:
if len(accepted_str_values) > 1:
formula_string = "=OR("
else:
formula_string = "="
for j in range(len(accepted_str_values)):
formula_string += (
'ISNUMBER(SEARCH("'
+ accepted_str_values[j]
+ '",'
+ column_letter
+ str(row_num)
+ "))"
)
if j != len(accepted_str_values) - 1:
formula_string += ","
if len(accepted_str_values) > 1:
formula_string += ")"
worksheet.conditional_formatting.add(
range_string,
FormulaRule(
formula=[formula_string], fill=valid_format["fill"]
),
)
formula_string_neg = "=NOT(" + formula_string[1:] + ")"
worksheet.conditional_formatting.add(
range_string,
FormulaRule(
formula=[formula_string_neg],
fill=invalid_format["fill"],
font=styles.Font(bold=True),
),
)
else:
formula_string = ("=NOT(ISBLANK({col}{row}))").format(
col=column_letter, row=row_num
)
worksheet.conditional_formatting.add(
range_string,
FormulaRule(
formula=[formula_string],
fill=valid_format["fill"],
),
)
# Add drop down menu if there are values for the attribute
if len(accepted_values):
worksheet.add_data_validation(data_val)
# Add borders
if draw_type_border:
if col_num == 3:
cell.border = styles.Border(
top=styles.borders.Side(style="dashed"),
right=styles.borders.Side(style="medium"),
)
else:
cell.border = styles.Border(
top=styles.borders.Side(style="dashed")
)
# Iterate over all columns and adjust their dimensions
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
adjusted_width = min(max_length + 2, MAX_COLUMN_WIDTH)
worksheet.column_dimensions[column_letter].width = adjusted_width
row_num += 1
# Save the file
if not os.path.exists("output"):
os.mkdir("output")
workbook.save(output_path)
[docs]
def parse_value(value):
"""Helper method when updating a dfd.json file from an xlsx file.
Is given the value from a cell and attempts to parse it.
Especially relevant to parse lists properly.
Args:
value: the value from a specific cell
Returns:
the parsed value where more complex parsing is necessary.
Otherwise returns unparsed value
"""
if isinstance(value, str):
lower_value = value.strip().lower()
# Parse booleans
if lower_value in ["true", "yes", "1"]:
return True
elif lower_value in ["false", "no", "0"]:
return False
try:
return json.loads(value)
except (json.JSONDecodeError, TypeError):
pass
# Parse lists (if not handled by JSON)
if "," in value:
return [item.strip() for item in value.split(",")]
# Return value as is for other types
elif isinstance(value, int):
if value == 0:
return False
elif value == 1:
return True
return value
# Add information from xslx file to dfd
[docs]
def update_dfd_json_from_xlsx(
dfd: dataflowdiagram.DataflowDiagram, xlsx_file
) -> dataflowdiagram.DataflowDiagram:
"""Updates the metadata of a given dfd from a given xlsx file
Changes to the dfd are saved to the json file of it
Args:
dfd: the dfd that should be updated with new metadata
xlsx_file: file from which metadata is taken
Returns:
updated dfd
"""
_ = settings.lang_sys.gettext
# Opens the metadata xlsx file
workbook = load_workbook(filename=xlsx_file)
sheet = workbook.active
# Read headers from hidden attribute key row
headers = [cell.value for cell in sheet[2]]
# Going through all entities in the spreadsheet
# (starting row 4, since first 3 rows should be headers + explanations)
for row in sheet.iter_rows(min_row=4, values_only=True):
entity_type = row[0]
entity_id = row[1]
attribute_dict = {
headers[i]: (parse_value(value))
for i, value in enumerate(row[3:], start=3)
if isinstance(value, int)
or (value and value.lower() not in ["missing", ""])
}
if "Node" in entity_type:
target_dict: typing.Dict[str, common_models.Node] = dfd.nodes
elif "Edge" in entity_type:
target_dict: typing.Dict[str, common_models.Edge] = dfd.edges
elif "TrustBoundary" in entity_type:
target_dict: typing.Dict[str, common_models.Cluster] = dfd.clusters
elif "DataflowDiagram" in entity_type:
dfd.attributes.update(attribute_dict)
continue
else:
print(
_(
"Entity {id} could not be identified. \
Unknown entity type: {type}"
).format(id=entity_id, type=entity_type)
)
continue
if entity_id in target_dict:
entity = target_dict[entity_id]
entity.attributes.update(attribute_dict)
else:
print(_("Entity {id} could not be found").format(id=entity_id))
return dfd