Source code for archledger.ids
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
DEFAULT_ID_PREFIX = "al"
DEFAULT_ID_WIDTH = 4
DEFAULT_ID_SEGMENT_MODE = "none"
VALID_ID_SEGMENT_MODES: frozenset[str] = frozenset({"none", "type"})
ID_PREFIX_PATTERN = re.compile(r"^[a-z][a-z0-9]{1,15}$")
ID_SEGMENT_PATTERN = re.compile(r"^[a-z][a-z0-9-]{1,31}$")
_WORD_CHAR_CLASS = "A-Za-z0-9"
@dataclass(frozen=True, slots=True)
class ParsedLedgerId:
number: int
segment: str | None = None
[docs]
@dataclass(frozen=True, slots=True)
class LedgerIdFormat:
prefix: str = DEFAULT_ID_PREFIX
width: int = DEFAULT_ID_WIDTH
segment_mode: str = DEFAULT_ID_SEGMENT_MODE
def __post_init__(self) -> None:
validate_id_prefix(self.prefix)
validate_id_width(self.width)
validate_id_segment_mode(self.segment_mode)
@property
def pattern_text(self) -> str:
escaped = re.escape(self.prefix)
if self.segment_mode == "none":
return rf"^{escaped}_(?P<number>\d{{{self.width},}})$"
return (
rf"^{escaped}_"
rf"(?P<segment>[a-z][a-z0-9-]{{1,31}})_"
rf"(?P<number>\d{{{self.width},}})$"
)
@property
def reference_pattern_text(self) -> str:
escaped = re.escape(self.prefix)
if self.segment_mode == "none":
return (
rf"(?<![{_WORD_CHAR_CLASS}])"
rf"{escaped}_(?P<number>\d{{{self.width},}})"
rf"(?![{_WORD_CHAR_CLASS}])"
)
return (
rf"(?<![{_WORD_CHAR_CLASS}])"
rf"{escaped}_"
rf"(?P<segment>[a-z][a-z0-9-]{{1,31}})_"
rf"(?P<number>\d{{{self.width},}})"
rf"(?![{_WORD_CHAR_CLASS}])"
)
[docs]
def reference_pattern(self) -> re.Pattern[str]:
return re.compile(self.reference_pattern_text)
[docs]
def format(self, number: int, *, segment: str | None = None) -> str:
if isinstance(number, bool) or not isinstance(number, int) or number < 1:
raise ValueError("Ledger ID number must be a positive integer.")
if self.segment_mode == "none":
return f"{self.prefix}_{number:0{self.width}d}"
if segment is None:
raise ValueError("Segmented ledger IDs require a segment.")
validated_segment = validate_id_segment(segment)
return f"{self.prefix}_{validated_segment}_{number:0{self.width}d}"
[docs]
def parse_parts(self, record_id: str) -> ParsedLedgerId:
match = self.pattern().fullmatch(record_id)
if match is None:
raise ValueError(f"Invalid ledger ID: {record_id!r}")
number = int(match.group("number"))
if number < 1:
raise ValueError(f"Invalid ledger ID: {record_id!r}")
segment = match.groupdict().get("segment")
return ParsedLedgerId(
number=number,
segment=None if segment is None else validate_id_segment(segment),
)
[docs]
def is_id(self, value: object) -> bool:
return is_ledger_id(
value,
prefix=self.prefix,
width=self.width,
segment_mode=self.segment_mode,
)
def validate_id_prefix(prefix: str) -> str:
normalized = prefix.strip()
if not ID_PREFIX_PATTERN.fullmatch(normalized):
raise ValueError("Ledger ID prefix must match ^[a-z][a-z0-9]{1,15}$.")
return normalized
def validate_id_width(width: int) -> int:
if isinstance(width, bool) or not isinstance(width, int) or not 2 <= width <= 12:
raise ValueError("Ledger ID width must be an integer from 2 to 12.")
return width
def validate_id_segment_mode(segment_mode: str) -> str:
normalized = segment_mode.strip().lower()
if normalized not in VALID_ID_SEGMENT_MODES:
raise ValueError("Ledger ID segment mode must be one of: none, type.")
return normalized
def validate_id_segment(segment: str) -> str:
normalized = segment.strip().lower()
if not ID_SEGMENT_PATTERN.fullmatch(normalized):
raise ValueError("Ledger ID segment must match ^[a-z][a-z0-9-]{1,31}$.")
return normalized
def format_ledger_id(
number: int,
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
segment: str | None = None,
) -> str:
return LedgerIdFormat(
prefix=prefix,
width=width,
segment_mode=segment_mode,
).format(number, segment=segment)
def parse_ledger_id_parts(
record_id: str,
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
) -> ParsedLedgerId:
return LedgerIdFormat(
prefix=prefix,
width=width,
segment_mode=segment_mode,
).parse_parts(record_id)
def parse_ledger_id(
record_id: str,
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
) -> int:
return parse_ledger_id_parts(
record_id,
prefix=prefix,
width=width,
segment_mode=segment_mode,
).number
def is_ledger_id(
value: object,
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
) -> bool:
if not isinstance(value, str):
return False
try:
parse_ledger_id(
value,
prefix=prefix,
width=width,
segment_mode=segment_mode,
)
except ValueError:
return False
return True
def filename_for_ledger_id(
record_id: str,
extension: str = ".md",
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
) -> str:
parse_ledger_id(
record_id,
prefix=prefix,
width=width,
segment_mode=segment_mode,
)
return f"{record_id}{extension}"
def ledger_id_from_filename(
path: Path,
*,
prefix: str = DEFAULT_ID_PREFIX,
width: int = DEFAULT_ID_WIDTH,
segment_mode: str = DEFAULT_ID_SEGMENT_MODE,
) -> str:
record_id = path.stem
parse_ledger_id(
record_id,
prefix=prefix,
width=width,
segment_mode=segment_mode,
)
return record_id