Source code for biosimdb_interface.form.utils
#!/usr/bin/env python
import copy
import re
from biosimdb_interface.schema.invenio import INVENIO_DSMD_TEMPLATE, INVENIO_FORM_EMPTY
from biosimdb_interface.schema.webform import get_simulation_metadata
[docs]
def fill_invenio_metadata(form_data):
"""Populate a blank Invenio record with form data.
Args:
form_data: Dictionary of parsed form values from form_to_json().
Returns:
dict: Invenio-compatible record dictionary with custom_fields populated.
"""
invenio_data = copy.deepcopy(INVENIO_FORM_EMPTY)
# invenio_data["custom_fields"]["dsmd"] = [form_data]
# temporarily use dsmd template for testing
invenio_data["custom_fields"]["dsmd"] = [INVENIO_DSMD_TEMPLATE]
# add generated keywords
# add generated subjects
return invenio_data
def _set_nested(d, parts, value):
"""Recursively set a value in a nested dictionary using a list of key parts.
Numeric parts are treated as 1-based list indices. Non-numeric parts
are treated as dictionary keys.
Args:
d: Dictionary to set the value in (modified in place).
parts: List of string key parts representing the path to the value.
value: Value to set at the specified path.
"""
key = parts[0]
if len(parts) == 1:
d[key] = value
return
next_part = parts[1]
if next_part.isdigit():
idx = int(next_part) - 1
d.setdefault(key, [])
while len(d[key]) <= idx:
d[key].append({})
_set_nested(d[key][idx], parts[2:], value) if len(parts) > 2 else d[
key
].__setitem__(idx, value)
else:
d.setdefault(key, {})
_set_nested(d[key], parts[1:], value)
def _build_typehint_map(fields, path_parts, result):
"""Recursively build a map of field path tuples to their typehint.
Args:
fields (dict): Dictionary of field definitions from the webform schema.
path_parts (list[str]): Accumulated path segments leading to the current fields.
result (dict): Dictionary modified in place mapping path tuples to typehint strings.
"""
for field_name, field in fields.items():
current = path_parts + [field_name]
if field.get("typehint"):
result[tuple(current)] = field["typehint"]
if field.get("fields"):
_build_typehint_map(field["fields"], current, result)
if field.get("multiple"):
_build_typehint_map(field.get("fields", {}), current + ["*"], result)
def _get_typehint_map():
"""Build a complete typehint map from the webform schema.
Iterates over all sections and fields in ``simulation_metadata`` and
delegates to :func:`_build_typehint_map` to populate the result.
Returns:
dict: Mapping of field path tuples (e.g. ``('composition', 'molecule_ID', '*', 'atom_count')``)
to typehint strings (``'integer'``, ``'float'``, or ``'boolean'``).
Numeric list indices are represented as ``'*'`` wildcards.
"""
result = {}
simulation_metadata = get_simulation_metadata()
for section_key, section in simulation_metadata.items():
fields = section.get("fields", {})
_build_typehint_map(fields, [section_key], result)
return result
[docs]
def form_to_json(form):
"""Convert flat HTML form data into a nested dictionary.
Parses bracket-notation field names (e.g. section[field][1][subfield])
into nested dicts and lists. Skips submit/save keys and TEMPLATE entries.
Splits vector_value fields from comma-separated strings into float lists.
Args:
form: Flat form data (ImmutableMultiDict or dict) from a POST request.
Returns:
dict: Nested dictionary of form values.
"""
typehint_map = _get_typehint_map()
data = {}
# Use lists() so repeated keys (e.g. multiselect name="x[]") are preserved.
if hasattr(form, "lists"):
key_values_iter = form.lists()
else:
# Fallback for plain dict-like input
key_values_iter = (
(k, v if isinstance(v, list) else [v]) for k, v in form.items()
)
for key, values in key_values_iter:
if key in ("save", "submit"):
continue
parts = re.findall(r"\w+", key)
if not parts or "TEMPLATE" in parts:
continue
# Vector quantity field stays scalar-like text input; parse first value only.
if parts[-1] == "vector_value":
raw = values[0] if values else ""
if isinstance(raw, str) and raw:
value = [float(x.strip()) for x in raw.split(",") if x.strip()]
else:
value = raw
_set_nested(data, parts, value)
continue
# Build schema lookup key replacing numeric indices with '*'
lookup = tuple(p if not p.isdigit() else "*" for p in parts)
typehint = typehint_map.get(lookup) or typehint_map.get(tuple(parts))
converted_values = []
for value in values:
if value != "" and typehint:
if typehint == "integer":
try:
value = int(value)
except (ValueError, TypeError):
pass
elif typehint == "float":
try:
value = float(value)
except (ValueError, TypeError):
pass
elif typehint == "boolean":
value = bool(value)
converted_values.append(value)
# For repeated keys keep list (after dropping empty placeholders);
# for single keys keep scalar to preserve existing behavior.
if len(converted_values) > 1:
value_out = [v for v in converted_values if v != ""]
else:
value_out = converted_values[0] if converted_values else ""
_set_nested(data, parts, value_out)
return data
[docs]
def remove_empty_fields(d):
"""Recursively remove empty strings, None, empty dicts, and empty lists from a nested dict.
Args:
d: Nested dictionary to clean.
Returns:
dict: Cleaned dictionary with empty values removed.
"""
if not isinstance(d, dict):
return d
cleaned = {}
for k, v in d.items():
if isinstance(v, dict):
v = remove_empty_fields(v)
if v:
cleaned[k] = v
elif isinstance(v, list):
v = [remove_empty_fields(i) for i in v]
v = [i for i in v if i not in (None, "", {}, [])]
if v:
cleaned[k] = v
elif v not in (None, ""):
cleaned[k] = v
return cleaned