Source code for modifications

from __future__ import annotations

import enum
from dataclasses import dataclass
from abc import ABC, abstractmethod
from typing import Any, Dict, AbstractSet

import scadnano as sc
from nuad.json_noindent_serializer import JSONSerializable, NoIndent

_default_modification_id = "WARNING: no id assigned to modification"
default_connector_length = 4

# Design keys
design_modifications_key = 'modifications_in_design'

# Strand keys
modification_5p_key = '5prime_modification'
modification_3p_key = '3prime_modification'
modifications_int_key = 'internal_modifications'

# Modification keys
mod_location_key = 'location'
mod_display_text_key = 'display_text'
mod_id_key = 'id'
mod_vendor_code_key = 'vendor_code'
mod_font_size_key = 'font_size'
mod_display_connector_key = 'display_connector'
mod_allowed_bases_key = 'allowed_bases'
mod_connector_length_key = 'connector_length'


[docs] class ModificationType(enum.Enum): """ Type of modification (5', 3', or internal). """ five_prime = "5'" """5' modification type""" three_prime = "5'" """3' modification type""" internal = "internal" """internal modification type"""
[docs] @dataclass(frozen=True, eq=True) class Modification(JSONSerializable, ABC): """Abstract case class of modifications (to DNA sequences, e.g., biotin or Cy3). Use concrete subclasses :any:`Modification3Prime`, :any:`Modification5Prime`, or :any:`ModificationInternal` to instantiate. If :data:`Modification.id` is not specified, then :data:`Modification.vendor_code` is used as the unique ID. Each :data:`Modification.id` must be unique. For example if you create a 5' "modification" to represent 6 T bases: ``t6_5p = Modification5Prime(display_text='6T', idt_text='TTTTTT')`` (this is a useful hack for putting single-stranded extensions on strands until loopouts on the end of a strand are supported; see https://github.com/UC-Davis-molecular-computing/scadnano-python-package/issues/2), then this would clash with a similar 3' modification without specifying unique IDs for them: ``t6_3p = Modification3Prime(display_text='6T', vendor_code='TTTTTT') # ERROR``. In general it is recommended to create a single :any:`Modification` object for each *type* of modification in the design. For example, if many strands have a 5' biotin, then it is recommended to create a single :any:`Modification` object and re-use it on each strand with a 5' biotin: .. code-block:: python biotin_5p = Modification5Prime(display_text='B', vendor_code='/5Biosg/') design.strand(0, 0).move(8).with_modification_5p(biotin_5p) design.strand(1, 0).move(8).with_modification_5p(biotin_5p) """ vendor_code: str """Text string specifying this modification (e.g., '/5Biosg/' for 5' biotin). optional""" id: str = _default_modification_id """ Representation as a string; used to write in :any:`Strand` json representation, while the full description of the modification is written under a global key in the :any:`Design`. If not specified, but :data:`Modification.idt_text` is specified, then it will be set equal to that. """ def __post_init__(self) -> None: if self.id == _default_modification_id: object.__setattr__(self, 'id', self.vendor_code) def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> Dict[str, Any]: ret = {mod_vendor_code_key: self.vendor_code, mod_id_key: self.id} return ret @staticmethod def from_json( json_map: Dict[str, Any]) -> 'Modification': # remove quotes when Py3.6 support dropped location = json_map[mod_location_key] if location == "5'": return Modification5Prime.from_json(json_map) elif location == "3'": return Modification3Prime.from_json(json_map) elif location == "internal": return ModificationInternal.from_json(json_map) else: raise ValueError(f'unknown Modification location "{location}"') @staticmethod @abstractmethod def modification_type() -> ModificationType: pass
[docs] @dataclass(frozen=True, eq=True) class Modification5Prime(Modification): """5' modification of DNA sequence, e.g., biotin or Cy3.""" def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> Dict[str, Any]: ret = super().to_json_serializable(suppress_indent) ret[mod_location_key] = "5'" return ret # remove quotes when Py3.6 support dropped @staticmethod def from_json(json_map: Dict[str, Any]) -> 'Modification5Prime': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "5'" idt_text = json_map.get(mod_vendor_code_key) return Modification5Prime(vendor_code=idt_text, id=id_) @staticmethod def modification_type() -> ModificationType: return ModificationType.five_prime def to_scadnano_modification(self) -> sc.Modification5Prime: return sc.Modification5Prime(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id)
[docs] @dataclass(frozen=True, eq=True) class Modification3Prime(Modification): """3' modification of DNA sequence, e.g., biotin or Cy3.""" def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> Dict[str, Any]: ret = super().to_json_serializable(suppress_indent) ret[mod_location_key] = "3'" return ret # remove quotes when Py3.6 support dropped @staticmethod def from_json(json_map: Dict[str, Any]) -> 'Modification3Prime': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "3'" idt_text = json_map.get(mod_vendor_code_key) return Modification3Prime(vendor_code=idt_text, id=id_) @staticmethod def modification_type() -> ModificationType: return ModificationType.three_prime def to_scadnano_modification(self) -> sc.Modification3Prime: return sc.Modification3Prime(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id)
[docs] @dataclass(frozen=True, eq=True) class ModificationInternal(Modification): """Internal modification of DNA sequence, e.g., biotin or Cy3.""" allowed_bases: AbstractSet[str] | None = None """If None, then this is an internal modification that goes between bases. If instead it is a list of bases, then this is an internal modification that attaches to a base, and this lists the allowed bases for this internal modification to be placed at. For example, internal biotins for IDT must be at a T. If any base is allowed, it should be ``['A','C','G','T']``.""" def __post_init__(self) -> None: super().__post_init__() if self.allowed_bases is not None and not isinstance(self.allowed_bases, frozenset): object.__setattr__(self, 'allowed_bases', frozenset(self.allowed_bases)) def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> Dict[str, Any]: ret = super().to_json_serializable(suppress_indent) ret[mod_location_key] = "internal" if self.allowed_bases is not None: ret[mod_allowed_bases_key] = NoIndent( list(self.allowed_bases)) if suppress_indent else list(self.allowed_bases) return ret # remove quotes when Py3.6 support dropped @staticmethod def from_json(json_map: Dict[str, Any]) -> 'ModificationInternal': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "internal" idt_text = json_map.get(mod_vendor_code_key) allowed_bases_list = json_map.get(mod_allowed_bases_key) allowed_bases = frozenset(allowed_bases_list) if allowed_bases_list is not None else None return ModificationInternal(vendor_code=idt_text, id=id_, allowed_bases=allowed_bases) @staticmethod def modification_type() -> ModificationType: return ModificationType.internal def to_scadnano_modification(self) -> sc.ModificationInternal: return sc.ModificationInternal(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id, allowed_bases=self.allowed_bases)