mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-19 12:57:58 +00:00
75 lines
2.9 KiB
Python
75 lines
2.9 KiB
Python
import logging
|
|
from typing import Dict, Set, Tuple, Union
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_pdb_chain_details(
|
|
pdb_content: str, preview_lines: int = 10
|
|
) -> Tuple[Dict[str, Dict[str, int]], str]:
|
|
"""
|
|
Parses PDB content to extract detailed information for each chain.
|
|
|
|
Returns:
|
|
A tuple containing:
|
|
- chain_details (Dict[str, Dict[str, int]]):
|
|
A dictionary where keys are chain IDs (e.g., "A").
|
|
Each value is another dictionary:
|
|
{
|
|
"min_residue": int, # Smallest residue number found for this chain
|
|
"max_residue": int, # Largest residue number found for this chain
|
|
"length": int # Count of unique C-alpha atoms (residues) in this chain
|
|
}
|
|
- pdb_preview (str): A string preview of the PDB content.
|
|
"""
|
|
chain_info_temp: Dict[str, Dict[str, Union[Set[int], int]]] = {}
|
|
atom_lines = []
|
|
header_lines = []
|
|
|
|
for line in pdb_content.splitlines():
|
|
if line.startswith("ATOM"):
|
|
atom_lines.append(line)
|
|
chain_id = line[21:22].strip()
|
|
if not chain_id:
|
|
chain_id = " "
|
|
atom_name = line[12:16].strip()
|
|
try:
|
|
residue_num = int(line[22:26].strip())
|
|
if chain_id not in chain_info_temp:
|
|
chain_info_temp[chain_id] = {"residues": set(), "ca_count": 0}
|
|
chain_info_temp[chain_id]["residues"].add(residue_num)
|
|
if atom_name == "CA":
|
|
chain_info_temp[chain_id]["ca_count"] += 1
|
|
except ValueError:
|
|
logger.warning(f"Could not parse residue number from PDB line: {line}")
|
|
continue
|
|
elif (
|
|
line.startswith("HEADER")
|
|
or line.startswith("TITLE")
|
|
or line.startswith("COMPND")
|
|
):
|
|
header_lines.append(line)
|
|
|
|
chain_details: Dict[str, Dict[str, int]] = {}
|
|
for chain_id, data in chain_info_temp.items():
|
|
if data["residues"]:
|
|
min_res = min(data["residues"])
|
|
max_res = max(data["residues"])
|
|
length = data["ca_count"] if data["ca_count"] > 0 else len(data["residues"])
|
|
chain_details[chain_id] = {
|
|
"min_residue": min_res,
|
|
"max_residue": max_res,
|
|
"length": length,
|
|
}
|
|
else:
|
|
logger.warning(f"Chain {chain_id} had no parseable ATOM residue numbers.")
|
|
|
|
preview_str_parts = header_lines[: min(len(header_lines), preview_lines // 2)]
|
|
remaining_preview_lines = preview_lines - len(preview_str_parts)
|
|
preview_str_parts.extend(
|
|
atom_lines[: min(len(atom_lines), remaining_preview_lines)]
|
|
)
|
|
pdb_preview = "\n".join(preview_str_parts)
|
|
if len(pdb_content.splitlines()) > preview_lines:
|
|
pdb_preview += "\n..."
|
|
return chain_details, pdb_preview
|