Source code for biosimdb_interface.form.extract
#!/usr/bin/env python
"""
Metadata extraction endpoint.
Receives uploaded topology and trajectory files, extracts simulation metadata
using :class:`biosim_extractor.metadata.populatemetadata.MetadataPopulator`, and
optionally validates the result against the BioSim schema.
"""
import os
import tempfile
from biosim_extractor.metadata.populatemetadata import MetadataPopulator
from flask import jsonify, request, session
from . import form_bp
[docs]
def extract_files_validate(top_file, traj_file):
"""Extract metadata from simulation files and validate against the schema.
Args:
top_file (str): Path to the topology file.
traj_file (str or list[str]): Path or list of paths to the trajectory file(s).
Returns:
tuple: A tuple containing:
- result (dict): The extracted and populated metadata dictionary.
- validation_errors (list[str]): A list of validation error messages,
empty if validation succeeded.
"""
populator = MetadataPopulator(
schema_path=os.getenv("ENGINE_MAPPING_SCHEMA_PATH", ""),
top_file=top_file,
traj_file=traj_file,
)
result = populator.populate()
biosimschema_path = os.getenv("BIOSIM_SCHEMA_PATH", "")
validation_errors = []
try:
populator.validate(result, biosimschema_path, strict=True)
except ValueError as e:
validation_errors = str(e).splitlines()
return result, validation_errors
[docs]
@form_bp.route("/extract_metadata", methods=["POST"])
def extract_metadata():
"""Extract simulation metadata from uploaded topology and trajectory files.
Expects a multipart POST with:
- ``topology``: a single topology file.
- ``trajectory[]``: one or more trajectory files.
Files are saved to temporary paths, passed to :class:`MetadataPopulator`,
and the result is validated against the schema at ``BIOSIM_SCHEMA_PATH``.
Returns:
JSON response with one of:
- ``{"simulation_metadata": ..., "message": "..."}`` on success.
- ``{"simulation_metadata": ..., "validation_errors": [...]}`` if schema validation fails.
- ``{"error": "..."}`` with status 400 if files are missing, or 500 on unexpected error.
"""
try:
topology = request.files.get("topology")
trajectories = request.files.getlist("trajectory[]")
if not topology or not trajectories:
return jsonify({"error": "Simulation files are missing."}), 400
with tempfile.TemporaryDirectory() as temp_dir:
topo_path = os.path.join(temp_dir, topology.filename)
topology.save(topo_path)
traj_files = []
for traj in trajectories:
traj_path = os.path.join(temp_dir, traj.filename)
traj.save(traj_path)
traj_files.append(traj_path)
result, validation_errors = extract_files_validate(topo_path, traj_files)
# Keep authoritative extracted payload on the server
session["extracted_metadata"] = result
if len(validation_errors) > 0:
return jsonify(
{
"simulation_metadata": result,
"validation_errors": validation_errors,
}
)
else:
return jsonify(
{
"simulation_metadata": result,
"message": "Metadata extracted successfully.",
}
)
except Exception as e:
print(f"ERROR: {e}")
import traceback
traceback.print_exc()
return jsonify({"error": str(e)}), 500