mirror of
https://github.com/NousResearch/atropos.git
synced 2026-04-22 16:48:57 +00:00
refactor, full run
This commit is contained in:
parent
de9dfff221
commit
1ee67de035
12 changed files with 1039 additions and 1127 deletions
65
environments/hack0/protein_design_env/utils/pdb_utils.py
Normal file
65
environments/hack0/protein_design_env/utils/pdb_utils.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import logging
|
||||
from typing import Dict, Tuple, List, Set, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_pdb_chain_details(pdb_content: str, preview_lines: int = 10) -> Tuple[Dict[str, Dict[str, int]], str]:
|
||||
"""
|
||||
Parses PDB content to extract detailed information for each chain.
|
||||
|
||||
Returns:
|
||||
A tuple containing:
|
||||
- chain_details (Dict[str, Dict[str, int]]):
|
||||
A dictionary where keys are chain IDs (e.g., "A").
|
||||
Each value is another dictionary:
|
||||
{
|
||||
"min_residue": int, # Smallest residue number found for this chain
|
||||
"max_residue": int, # Largest residue number found for this chain
|
||||
"length": int # Count of unique C-alpha atoms (residues) in this chain
|
||||
}
|
||||
- pdb_preview (str): A string preview of the PDB content.
|
||||
"""
|
||||
chain_info_temp: Dict[str, Dict[str, Union[Set[int], int]]] = {}
|
||||
atom_lines = []
|
||||
header_lines = []
|
||||
|
||||
for line in pdb_content.splitlines():
|
||||
if line.startswith("ATOM"):
|
||||
atom_lines.append(line)
|
||||
chain_id = line[21:22].strip()
|
||||
if not chain_id: chain_id = " "
|
||||
atom_name = line[12:16].strip()
|
||||
try:
|
||||
residue_num = int(line[22:26].strip())
|
||||
if chain_id not in chain_info_temp:
|
||||
chain_info_temp[chain_id] = {"residues": set(), "ca_count": 0}
|
||||
chain_info_temp[chain_id]["residues"].add(residue_num)
|
||||
if atom_name == "CA":
|
||||
chain_info_temp[chain_id]["ca_count"] += 1
|
||||
except ValueError:
|
||||
logger.warning(f"Could not parse residue number from PDB line: {line}")
|
||||
continue
|
||||
elif line.startswith("HEADER") or line.startswith("TITLE") or line.startswith("COMPND"):
|
||||
header_lines.append(line)
|
||||
|
||||
chain_details: Dict[str, Dict[str, int]] = {}
|
||||
for chain_id, data in chain_info_temp.items():
|
||||
if data["residues"]:
|
||||
min_res = min(data["residues"])
|
||||
max_res = max(data["residues"])
|
||||
length = data["ca_count"] if data["ca_count"] > 0 else len(data["residues"])
|
||||
chain_details[chain_id] = {
|
||||
"min_residue": min_res,
|
||||
"max_residue": max_res,
|
||||
"length": length
|
||||
}
|
||||
else:
|
||||
logger.warning(f"Chain {chain_id} had no parseable ATOM residue numbers.")
|
||||
|
||||
preview_str_parts = header_lines[:min(len(header_lines), preview_lines // 2)]
|
||||
remaining_preview_lines = preview_lines - len(preview_str_parts)
|
||||
preview_str_parts.extend(atom_lines[:min(len(atom_lines), remaining_preview_lines)])
|
||||
pdb_preview = "\n".join(preview_str_parts)
|
||||
if len(pdb_content.splitlines()) > preview_lines:
|
||||
pdb_preview += "\n..."
|
||||
return chain_details, pdb_preview
|
||||
Loading…
Add table
Add a link
Reference in a new issue