from __future__ import annotations
import sys
from pathlib import Path
from typing import cast
from archledger.config.model import (
DEFAULT_ID_SEGMENT,
DEFAULT_ID_SEGMENT_MAP,
DEFAULT_TRACKING_EXCLUDE,
DEFAULT_TRACKING_INCLUDE,
VALID_BUILD_CONVERTERS,
VALID_DIAGRAM_IMAGE_FORMATS,
VALID_DIAGRAM_RENDERERS,
VALID_DIAGRAM_TYPES,
VALID_TRACKING_HASH_ALGORITHMS,
VALID_TRACKING_SCANNERS,
ProjectConfig,
normalize_project_name,
validate_uuid,
)
from archledger.config.schema import FieldSpec, TableSpec, parse_table_from_spec
from archledger.errors import ConfigError
from archledger.ids import (
DEFAULT_ID_PREFIX,
DEFAULT_ID_SEGMENT_MODE,
DEFAULT_ID_WIDTH,
validate_id_prefix,
validate_id_segment,
validate_id_segment_mode,
validate_id_width,
)
from archledger.model import (
CURRENT_SOURCE_SCHEMA_VERSION,
VALID_OUTPUT_FORMATS,
VALID_RECORD_TYPES,
VALID_SOURCE_FORMATS,
default_document_filename_for_output_format,
default_extension_for_source_format,
infer_output_format_from_path,
)
from archledger.storage.common import read_text
if sys.version_info >= (3, 11):
import tomllib
else: # pragma: no cover
import tomli as tomllib
_ALLOWED_TOP_LEVEL_KEYS = {
"config_version",
"archledger_dir",
"project_uuid",
"project_name",
"ids",
"source",
"build",
"arc42",
"skill",
"tracking",
"diagrams",
}
_ALLOWED_BUILD_KEYS = {
"default_output",
"default_format",
"default_output_dir",
"include_draft",
"include_superseded",
"strict",
"keep_intermediate",
"converter",
"pdf_engine",
"reference_docx",
"outputs",
"diagrams",
}
_ALLOWED_BUILD_OUTPUT_KEYS = {"enabled", "pdf_engine", "reference_docx", "tool"}
_ALLOWED_IDS_KEYS = {
"prefix",
"width",
"segment_mode",
"default_segment",
"segment_map",
}
_ALLOWED_ARC42_KEYS = {"template_version", "language", "title", "include_help"}
_ALLOWED_SKILL_KEYS = {"installed", "path"}
_ALLOWED_TRACKING_KEYS = {
"enabled",
"state_file",
"scanner",
"include",
"exclude",
"max_file_bytes",
"hash_algorithm",
}
_ALLOWED_SOURCE_KEYS = {
"format",
"front_matter",
"section_extension",
"record_extension",
"schema_version",
}
_ALLOWED_DIAGRAM_KEYS = {
"enabled",
"renderer",
"default_type",
"output_dir",
"image_format",
"kroki_url",
}
_ALLOWED_BUILD_CONVERTERS = VALID_BUILD_CONVERTERS
_ALLOWED_TRACKING_SCANNERS = VALID_TRACKING_SCANNERS
_ALLOWED_TRACKING_HASH_ALGORITHMS = VALID_TRACKING_HASH_ALGORITHMS
_ALLOWED_DIAGRAM_RENDERERS = VALID_DIAGRAM_RENDERERS
_ALLOWED_DIAGRAM_TYPES = VALID_DIAGRAM_TYPES
_ALLOWED_DIAGRAM_IMAGE_FORMATS = VALID_DIAGRAM_IMAGE_FORMATS
# --- Schema-driven table specs ---
def _parse_tracking_enabled(raw: object, field_name: str) -> bool:
return _require_bool(raw, field_name)
def _parse_tracking_state_file(raw: object, field_name: str) -> str:
return _require_non_empty_string(raw, field_name)
def _parse_tracking_scanner(raw: object, field_name: str) -> str:
return _require_choice(raw, field_name, _ALLOWED_TRACKING_SCANNERS)
def _parse_tracking_include(raw: object, field_name: str) -> tuple[str, ...]:
return _require_string_tuple(raw, field_name)
def _parse_tracking_exclude(raw: object, field_name: str) -> tuple[str, ...]:
return _require_string_tuple(raw, field_name)
def _parse_tracking_max_file_bytes(raw: object, field_name: str) -> int:
return _require_positive_int(raw, field_name)
def _parse_tracking_hash_algorithm(raw: object, field_name: str) -> str:
return _require_choice(raw, field_name, _ALLOWED_TRACKING_HASH_ALGORITHMS)
def _make_tracking(
enabled: bool,
state_file: str,
scanner: str,
include: tuple[str, ...],
exclude: tuple[str, ...],
max_file_bytes: int,
hash_algorithm: str,
) -> tuple[bool, str, str, tuple[str, ...], tuple[str, ...], int, str]:
"""Factory for the tracking table row."""
return (
enabled,
state_file,
scanner,
include,
exclude,
max_file_bytes,
hash_algorithm,
)
_TRACKING_TABLE = TableSpec(
name="tracking",
fields=(
FieldSpec("enabled", True, _parse_tracking_enabled),
FieldSpec("state_file", "source-state.json", _parse_tracking_state_file),
FieldSpec("scanner", "auto", _parse_tracking_scanner),
FieldSpec("include", DEFAULT_TRACKING_INCLUDE, _parse_tracking_include),
FieldSpec("exclude", DEFAULT_TRACKING_EXCLUDE, _parse_tracking_exclude),
FieldSpec("max_file_bytes", 1_000_000, _parse_tracking_max_file_bytes),
FieldSpec("hash_algorithm", "sha256", _parse_tracking_hash_algorithm),
),
factory=_make_tracking,
)
[docs]
def load_project_config(path: Path) -> ProjectConfig:
try:
raw_data = tomllib.loads(read_text(path))
except tomllib.TOMLDecodeError as exc:
raise ConfigError(f"Failed to parse {path.name}: {exc}") from exc
if not isinstance(raw_data, dict):
raise ConfigError(f"{path.name} did not parse to a TOML table.")
unknown_top_level = sorted(set(raw_data) - _ALLOWED_TOP_LEVEL_KEYS)
if unknown_top_level:
joined = ", ".join(unknown_top_level)
raise ConfigError(f"Unknown config keys in {path.name}: {joined}")
build_data = _validate_subtable(
path,
raw_data.get("build"),
_ALLOWED_BUILD_KEYS,
"build",
)
ids_data = _validate_subtable(
path,
raw_data.get("ids"),
_ALLOWED_IDS_KEYS,
"ids",
)
source_data = _validate_subtable(
path,
raw_data.get("source"),
_ALLOWED_SOURCE_KEYS,
"source",
)
arc42_data = _validate_subtable(
path,
raw_data.get("arc42"),
_ALLOWED_ARC42_KEYS,
"arc42",
)
skill_data = _validate_subtable(
path,
raw_data.get("skill"),
_ALLOWED_SKILL_KEYS,
"skill",
)
tracking_data = _validate_subtable(
path,
raw_data.get("tracking"),
_ALLOWED_TRACKING_KEYS,
"tracking",
)
diagrams_data = _validate_subtable(
path,
raw_data.get("diagrams"),
_ALLOWED_DIAGRAM_KEYS,
"diagrams",
)
config_version = raw_data.get("config_version")
if isinstance(config_version, bool) or config_version not in {1, 2, 3, 4, 5, 6, 7}:
raise ConfigError("config_version must be 1, 2, 3, 4, 5, 6, or 7.")
archledger_dir = raw_data.get("archledger_dir")
if not isinstance(archledger_dir, str) or not archledger_dir.strip():
raise ConfigError("archledger_dir must be a non-empty string.")
project_uuid = raw_data.get("project_uuid")
if not isinstance(project_uuid, str):
raise ConfigError("project_uuid must be a string.")
project_name = raw_data.get("project_name")
if not isinstance(project_name, str):
raise ConfigError("project_name must be a string.")
(
source_format,
source_schema_version,
front_matter,
section_extension,
record_extension,
) = _parse_source_config(source_data, cast(int, config_version))
(
default_output,
build_default_format,
output_dir,
include_draft,
include_superseded,
strict,
keep_intermediate,
build_converter,
build_pdf_engine,
reference_docx,
build_outputs,
) = _parse_build_config(build_data, cast(int, config_version), source_format)
template_version, language, title, include_help = _parse_arc42_config(arc42_data)
(
id_prefix,
id_width,
id_segment_mode,
id_default_segment,
id_segment_map,
) = _parse_ids_config(ids_data)
skill_installed, skill_path = _parse_skill_config(skill_data)
(
tracking_enabled,
tracking_state_file,
tracking_scanner,
tracking_include,
tracking_exclude,
tracking_max_file_bytes,
tracking_hash_algorithm,
) = _parse_tracking_config(tracking_data)
(
diagram_enabled,
diagram_renderer,
diagram_default_type,
diagram_output_dir,
diagram_image_format,
diagram_kroki_url,
) = _parse_diagram_config(diagrams_data, build_data)
return ProjectConfig(
config_version=cast(int, config_version),
archledger_dir=archledger_dir,
project_uuid=validate_uuid(project_uuid),
project_name=normalize_project_name(project_name),
id_prefix=id_prefix,
id_width=id_width,
id_segment_mode=id_segment_mode,
id_default_segment=id_default_segment,
id_segment_map=id_segment_map,
source_format=source_format,
source_schema_version=source_schema_version,
front_matter=front_matter.strip().lower(),
section_extension=section_extension,
record_extension=record_extension,
build_default_output=default_output,
build_default_format=build_default_format,
build_output_dir=output_dir.strip(),
build_include_draft=include_draft,
build_include_superseded=include_superseded,
build_strict=strict,
build_keep_intermediate=keep_intermediate,
build_converter=build_converter,
build_pdf_engine=build_pdf_engine,
build_reference_docx=reference_docx,
build_outputs=build_outputs,
arc42_template_version=template_version,
arc42_language=language,
arc42_title=title,
arc42_include_help=include_help,
skill_installed=skill_installed,
skill_path=skill_path,
tracking_enabled=tracking_enabled,
tracking_state_file=tracking_state_file,
tracking_scanner=tracking_scanner,
tracking_include=tracking_include,
tracking_exclude=tracking_exclude,
tracking_max_file_bytes=tracking_max_file_bytes,
tracking_hash_algorithm=tracking_hash_algorithm,
diagram_enabled=diagram_enabled,
diagram_renderer=diagram_renderer,
diagram_default_type=diagram_default_type,
diagram_output_dir=diagram_output_dir,
diagram_image_format=diagram_image_format,
diagram_kroki_url=diagram_kroki_url,
)
def _parse_ids_config(
ids_data: dict[str, object],
) -> tuple[str, int, str, str, dict[str, str]]:
prefix_value = ids_data.get("prefix", DEFAULT_ID_PREFIX)
width_value = ids_data.get("width", DEFAULT_ID_WIDTH)
segment_mode_value = ids_data.get("segment_mode", DEFAULT_ID_SEGMENT_MODE)
default_segment_value = ids_data.get("default_segment", DEFAULT_ID_SEGMENT)
segment_map_value = ids_data.get("segment_map")
if not isinstance(prefix_value, str):
raise ConfigError("ids.prefix must be a string.")
try:
prefix = validate_id_prefix(prefix_value)
except ValueError as exc:
raise ConfigError(str(exc)) from exc
try:
width = validate_id_width(width_value) # type: ignore[arg-type]
except ValueError as exc:
raise ConfigError(str(exc)) from exc
if not isinstance(segment_mode_value, str):
raise ConfigError("ids.segment_mode must be a string.")
try:
segment_mode = validate_id_segment_mode(segment_mode_value)
except ValueError as exc:
raise ConfigError(str(exc)) from exc
if not isinstance(default_segment_value, str):
raise ConfigError("ids.default_segment must be a string.")
try:
default_segment = validate_id_segment(default_segment_value)
except ValueError as exc:
raise ConfigError(str(exc)) from exc
segment_map = dict(DEFAULT_ID_SEGMENT_MAP)
if segment_map_value is not None:
if not isinstance(segment_map_value, dict):
raise ConfigError("ids.segment_map must be a TOML table.")
allowed_segment_keys = set(VALID_RECORD_TYPES) | {
"section",
"archive_tombstone",
}
unknown_keys = sorted(set(segment_map_value) - allowed_segment_keys)
if unknown_keys:
joined = ", ".join(unknown_keys)
raise ConfigError(
"ids.segment_map contains unknown record types: " + joined
)
for key, value in segment_map_value.items():
if not isinstance(value, str):
raise ConfigError(f"ids.segment_map.{key} must be a string.")
try:
segment_map[key] = validate_id_segment(value)
except ValueError as exc:
raise ConfigError(str(exc)) from exc
return prefix, width, segment_mode, default_segment, segment_map
def _validate_subtable(
path: Path,
value: object,
allowed_keys: set[str],
table_name: str,
) -> dict[str, object]:
if value is None:
return {}
if not isinstance(value, dict):
raise ConfigError(f"{table_name} in {path.name} must be a TOML table.")
unknown_keys = sorted(set(value) - allowed_keys)
if unknown_keys:
joined = ", ".join(unknown_keys)
raise ConfigError(f"Unknown keys in {table_name}: {joined}")
return dict(value)
def _parse_source_config(
source_data: dict[str, object],
config_version: int,
) -> tuple[str, int, str, str, str]:
if config_version >= 4 and "format" not in source_data:
raise ConfigError(
f"source.format is required for config_version {config_version}."
)
source_format_default = "asciidoc" if config_version == 3 else "markdown"
source_format_value = source_data.get("format", source_format_default)
if not isinstance(source_format_value, str):
raise ConfigError("source.format must be a string.")
source_format = source_format_value.strip().lower()
if source_format not in VALID_SOURCE_FORMATS:
raise ConfigError(
"source.format must be one of: "
+ ", ".join(sorted(VALID_SOURCE_FORMATS))
+ "."
)
front_matter_value = source_data.get("front_matter", "yaml")
if (
not isinstance(front_matter_value, str)
or front_matter_value.strip().lower() != "yaml"
):
raise ConfigError('source.front_matter must be the string "yaml".')
schema_version_value = source_data.get(
"schema_version",
CURRENT_SOURCE_SCHEMA_VERSION,
)
if isinstance(schema_version_value, bool) or not isinstance(
schema_version_value, int
):
raise ConfigError("source.schema_version must be an integer.")
default_extension = default_extension_for_source_format(source_format)
section_extension = _normalize_extension(
source_data.get("section_extension", default_extension),
"source.section_extension",
)
record_extension = _normalize_extension(
source_data.get("record_extension", default_extension),
"source.record_extension",
)
return (
source_format,
schema_version_value,
front_matter_value.strip().lower(),
section_extension,
record_extension,
)
def _parse_build_config(
build_data: dict[str, object],
config_version: int,
source_format: str,
) -> tuple[
str,
str,
str,
bool,
bool,
bool,
bool,
str,
str,
str,
dict[str, dict[str, object]],
]:
default_output_value = build_data.get("default_output")
if default_output_value is None:
default_output = ""
elif isinstance(default_output_value, str) and default_output_value.strip():
default_output = default_output_value.strip()
else:
raise ConfigError("build.default_output must be a non-empty string.")
default_format_value = build_data.get("default_format")
if default_format_value is None:
inferred_default_format = (
infer_output_format_from_path(default_output) if default_output else None
)
build_default_format = source_format if config_version == 3 else "markdown"
if inferred_default_format is not None:
build_default_format = inferred_default_format
elif isinstance(default_format_value, str):
build_default_format = default_format_value.strip().lower()
if build_default_format not in VALID_OUTPUT_FORMATS:
raise ConfigError(
"build.default_format must be one of: "
+ ", ".join(sorted(VALID_OUTPUT_FORMATS))
+ "."
)
else:
raise ConfigError("build.default_format must be a string.")
if not default_output:
default_output = default_document_filename_for_output_format(
build_default_format
)
inferred_output_format = infer_output_format_from_path(default_output)
if (
inferred_output_format is not None
and inferred_output_format != build_default_format
):
raise ConfigError(
"build.default_output extension must match build.default_format."
)
output_dir = _require_non_empty_string(
build_data.get("default_output_dir", "build"),
"build.default_output_dir",
)
include_draft = _require_bool(
build_data.get("include_draft", False),
"build.include_draft",
)
include_superseded = _require_bool(
build_data.get("include_superseded", False),
"build.include_superseded",
)
strict = _require_bool(build_data.get("strict", False), "build.strict")
keep_intermediate = _require_bool(
build_data.get("keep_intermediate", False),
"build.keep_intermediate",
)
converter = _require_choice(
build_data.get("converter", "auto"),
"build.converter",
_ALLOWED_BUILD_CONVERTERS,
)
pdf_engine = _require_optional_string(
build_data.get("pdf_engine", ""),
"build.pdf_engine",
)
reference_docx = build_data.get("reference_docx", "")
if not isinstance(reference_docx, str):
raise ConfigError("build.reference_docx must be a string.")
outputs_value = build_data.get("outputs", {})
if not isinstance(outputs_value, dict):
raise ConfigError("build.outputs must be a TOML table.")
build_outputs = _normalize_build_outputs(outputs_value)
return (
default_output,
build_default_format,
output_dir,
include_draft,
include_superseded,
strict,
keep_intermediate,
converter,
pdf_engine,
reference_docx,
build_outputs,
)
def _parse_arc42_config(
arc42_data: dict[str, object],
) -> tuple[str, str, str, bool]:
template_version = _require_non_empty_string(
arc42_data.get("template_version", "9.0-EN"),
"arc42.template_version",
)
language = _require_non_empty_string(
arc42_data.get("language", "en"),
"arc42.language",
)
title = _require_non_empty_string(
arc42_data.get("title", "Architecture Documentation"),
"arc42.title",
)
include_help = _require_bool(
arc42_data.get("include_help", False),
"arc42.include_help",
)
return template_version, language, title, include_help
def _parse_skill_config(skill_data: dict[str, object]) -> tuple[bool, str]:
skill_installed = _require_bool(
skill_data.get("installed", False),
"skill.installed",
)
skill_path = _require_non_empty_string(
skill_data.get("path", "skills/archledger/SKILL.md"),
"skill.path",
)
return skill_installed, skill_path
def _parse_tracking_config(
tracking_data: dict[str, object],
) -> tuple[bool, str, str, tuple[str, ...], tuple[str, ...], int, str]:
parsed = parse_table_from_spec(tracking_data, _TRACKING_TABLE)
return cast(
tuple[bool, str, str, tuple[str, ...], tuple[str, ...], int, str],
_TRACKING_TABLE.factory(**parsed),
)
def _parse_diagram_config(
diagrams_data: dict[str, object],
build_data: dict[str, object],
) -> tuple[bool, str, str, str, str, str]:
build_diagrams_raw = build_data.get("diagrams")
if build_diagrams_raw is None:
build_diagrams_data: dict[str, object] = {}
elif isinstance(build_diagrams_raw, dict):
unknown_keys = sorted(set(build_diagrams_raw) - _ALLOWED_DIAGRAM_KEYS)
if unknown_keys:
joined = ", ".join(unknown_keys)
raise ConfigError(f"Unknown keys in build.diagrams: {joined}")
build_diagrams_data = dict(build_diagrams_raw)
else:
raise ConfigError("build.diagrams must be a TOML table.")
effective_data = diagrams_data if diagrams_data else build_diagrams_data
enabled = _require_bool(effective_data.get("enabled", False), "diagrams.enabled")
renderer = _require_choice(
effective_data.get("renderer", "pass-through"),
"diagrams.renderer",
_ALLOWED_DIAGRAM_RENDERERS,
)
default_type = _require_choice(
effective_data.get("default_type", "text"),
"diagrams.default_type",
_ALLOWED_DIAGRAM_TYPES,
)
output_dir = _require_non_empty_string(
effective_data.get("output_dir", "diagrams"),
"diagrams.output_dir",
)
image_format = _require_choice(
effective_data.get("image_format", "svg"),
"diagrams.image_format",
_ALLOWED_DIAGRAM_IMAGE_FORMATS,
)
kroki_url = _require_optional_string(
effective_data.get("kroki_url", ""),
"diagrams.kroki_url",
)
return enabled, renderer, default_type, output_dir, image_format, kroki_url
def _normalize_extension(value: object, field_name: str) -> str:
if not isinstance(value, str) or not value.strip():
raise ConfigError(f"{field_name} must be a non-empty string.")
normalized = value.strip().lower()
if not normalized.startswith(".") or len(normalized) == 1:
raise ConfigError(f"{field_name} must start with a file extension dot.")
return normalized
def _require_bool(value: object, field_name: str) -> bool:
if not isinstance(value, bool):
raise ConfigError(f"{field_name} must be a boolean.")
return value
def _require_non_empty_string(value: object, field_name: str) -> str:
if not isinstance(value, str) or not value.strip():
raise ConfigError(f"{field_name} must be a non-empty string.")
return value.strip()
def _require_optional_string(value: object, field_name: str) -> str:
if not isinstance(value, str):
raise ConfigError(f"{field_name} must be a string.")
return value.strip()
def _require_choice(value: object, field_name: str, allowed: frozenset[str]) -> str:
if not isinstance(value, str):
raise ConfigError(f"{field_name} must be a string.")
normalized = value.strip().lower()
if normalized not in allowed:
raise ConfigError(
f"{field_name} must be one of: " + ", ".join(sorted(allowed)) + "."
)
return normalized
def _require_string_tuple(value: object, field_name: str) -> tuple[str, ...]:
if not isinstance(value, (list, tuple)):
raise ConfigError(f"{field_name} must be a list of strings.")
items: list[str] = []
for item in value:
if not isinstance(item, str) or not item.strip():
raise ConfigError(f"{field_name} must contain only non-empty strings.")
items.append(item.strip())
return tuple(items)
def _require_positive_int(value: object, field_name: str) -> int:
if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
raise ConfigError(f"{field_name} must be a positive integer.")
return value
def _normalize_build_outputs(value: dict[str, object]) -> dict[str, dict[str, object]]:
normalized: dict[str, dict[str, object]] = {}
for output_name, raw_config in value.items():
normalized_name = str(output_name).strip().lower()
if normalized_name not in VALID_OUTPUT_FORMATS:
raise ConfigError(
f"build.outputs.{output_name} is not a supported output format."
)
if not isinstance(raw_config, dict):
raise ConfigError(f"build.outputs.{output_name} must be a TOML table.")
unknown_keys = sorted(set(raw_config) - _ALLOWED_BUILD_OUTPUT_KEYS)
if unknown_keys:
raise ConfigError(
f"Unknown keys in build.outputs.{output_name}: "
+ ", ".join(unknown_keys)
)
output_config: dict[str, object] = {}
enabled = raw_config.get("enabled")
if enabled is not None:
output_config["enabled"] = _require_bool(
enabled,
f"build.outputs.{normalized_name}.enabled",
)
tool = raw_config.get("tool")
if tool is not None:
output_config["tool"] = _require_choice(
tool,
f"build.outputs.{normalized_name}.tool",
_ALLOWED_BUILD_CONVERTERS,
)
pdf_engine = raw_config.get("pdf_engine")
if pdf_engine is not None:
output_config["pdf_engine"] = _require_optional_string(
pdf_engine,
f"build.outputs.{normalized_name}.pdf_engine",
)
reference_docx = raw_config.get("reference_docx")
if reference_docx is not None:
output_config["reference_docx"] = _require_optional_string(
reference_docx,
f"build.outputs.{normalized_name}.reference_docx",
)
normalized[normalized_name] = output_config
return normalized