From 029488b80d067e6f43b76c0af82d6e9448f0468a Mon Sep 17 00:00:00 2001 From: Ludwig Lehnert Date: Tue, 17 Mar 2026 09:53:56 +0100 Subject: [PATCH] less file shares --- .env.example | 5 +- Dockerfile | 9 +- README.md | 88 +++++----- app/backup_to_destination.py | 322 +++++++++++++++++++++++++++++++++++ app/init.sh | 31 ++-- app/reconcile_shares.py | 306 ++++++++++++++++----------------- docker-compose.yml | 2 - etc/samba/smb.conf | 10 +- setup | 19 +-- 9 files changed, 561 insertions(+), 231 deletions(-) create mode 100644 app/backup_to_destination.py diff --git a/.env.example b/.env.example index b5cda4a..9276539 100644 --- a/.env.example +++ b/.env.example @@ -5,7 +5,6 @@ JOIN_USER=FileShare_ServiceAcc JOIN_PASSWORD=ReplaceWithLongRandomPassword DOMAIN_USERS_SID=S-1-5-21-1111111111-2222222222-3333333333-513 DOMAIN_ADMINS_SID=S-1-5-21-1111111111-2222222222-3333333333-512 -PUBLIC_GROUP_SID=S-1-5-21-1111111111-2222222222-3333333333-513 FSLOGIX_GROUP_SID=S-1-5-21-1111111111-2222222222-3333333333-513 # SAMBA_HOSTNAME=adsambafsrv # NETBIOS_NAME=ADSAMBAFSRV @@ -13,3 +12,7 @@ FSLOGIX_GROUP_SID=S-1-5-21-1111111111-2222222222-3333333333-513 # LDAP_BASE_DN=DC=example,DC=com # PRIVATE_SKIP_USERS=svc_backup,svc_sql # PRIVATE_SKIP_PREFIXES=svc_,sql_ +# BACKUP_DESTINATION=rsync://user:pass@backup.example.com/samba-backups +# BACKUP_DESTINATION=smb://DOMAIN%5Cuser:pass@backup.example.com/Backups/samba +# BACKUP_DESTINATION=davfs://user:pass@webdav.example.com/remote.php/dav/files/backup +# BACKUP_DESTINATION=sftp://user:pass@sftp.example.com/exports/samba diff --git a/Dockerfile b/Dockerfile index eb61c3e..1ec8ce0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,8 @@ RUN apt-get update \ libnss-winbind \ libpam-winbind \ python3 \ + rclone \ + rsync \ samba \ samba-vfs-modules \ sqlite3 \ @@ -19,14 +21,15 @@ RUN apt-get update \ winbind \ && rm -rf /var/lib/apt/lists/* -RUN mkdir -p /app /data/private /data/public /data/fslogix /data/groups /state /etc/samba/generated +RUN mkdir -p /app /data/private /data/fslogix /data/groups/data /data/groups/archive /state COPY app/reconcile_shares.py /app/reconcile_shares.py +COPY app/backup_to_destination.py /app/backup_to_destination.py COPY app/init.sh /app/init.sh COPY etc/samba/smb.conf /app/smb.conf.template -RUN chmod +x /app/init.sh /app/reconcile_shares.py \ - && touch /etc/samba/generated/shares.conf +RUN chmod +x /app/init.sh /app/reconcile_shares.py /app/backup_to_destination.py \ + && true ENTRYPOINT ["/usr/bin/tini", "--"] CMD ["/app/init.sh"] diff --git a/README.md b/README.md index 23ed8ac..da24a49 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,19 @@ # AD-Integrated Containerized Samba File Server -This repository provides a production-oriented Samba file server container that joins an existing Active Directory domain, exposes static and dynamic SMB shares, and persists share identity by AD `objectGUID`. +This repository provides a production-oriented Samba file server container that joins an existing Active Directory domain and exposes three SMB shares: `Privat`, `Data`, and `FSLogix`. ## Architecture - Samba runs in ADS mode with `winbind` identity mapping. - Static shares: - `\\server\Privat` -> `/data/private` - - `\\server\Geteilt` -> `/data/public` + - `\\server\Data` -> `/data/groups/data` - `\\server\FSLogix` -> `/data/fslogix` -- Dynamic shares are generated from AD groups matching `FileShare_*` or `FS_*` and written to `/etc/samba/generated/shares.conf`. -- Dynamic share records are persisted in SQLite at `/state/shares.db`. -- Backing storage is GUID-based and stable across group rename: - - `/data/groups/` +- FS_* groups are projected as folders inside the Data share (`/data/groups/data/`). +- Group records are persisted in SQLite at `/state/shares.db`. +- Group folders are name-based while active and moved to archive on deactivation: + - active: `/data/groups/data/` + - inactive/deleted groups: `/data/groups/archive/` - Samba machine trust/key material is persisted in `/var/lib/samba` to survive container recreation. - Container hostname is fixed (`SAMBA_HOSTNAME`) to keep AD computer identity stable. - NetBIOS name defaults to `ADSAMBAFSRV` and is clamped to 15 characters (`NETBIOS_NAME` override supported). @@ -20,19 +21,23 @@ This repository provides a production-oriented Samba file server container that - `FSLOGIX_GROUP_SID` controls who can access the default FSLogix share (defaults to `DOMAIN_USERS_SID`). - Startup resolves those SIDs to NSS group names via winbind, then uses those resolved groups in Samba `valid users` rules. - Share operations are audited with Samba `full_audit` (connect, list, read, write, create, delete, rename) and written to Samba log files. +- Optional remote backups run when `BACKUP_DESTINATION` is configured. - Private home creation skips well-known/service accounts by default (including `krbtgt`, `msol_*`, `FileShare_ServiceAcc`). - Reconciliation is executed: - once on startup - every 5 minutes via cron +- Backup is executed: + - once on startup (when enabled) + - every 30 minutes via cron -## Dynamic Share Lifecycle (objectGUID-first) +## Data Folder Lifecycle The reconciliation script (`/app/reconcile_shares.py`) enforces these rules: -1. New matching group -> insert DB row, create `/data/groups/`, expose share. -2. Group renamed but still `FileShare_` -> update `samAccountName` and `shareName`, keep same path. -3. Group removed or no longer matches prefix -> set `isActive=0`, remove Samba exposure, keep data. -4. Previously inactive group returns -> set `isActive=1`, reuse existing path and data. +1. New matching `FS_*` group -> insert DB row and create `/data/groups/data/`. +2. Group rename while still matching `FS_*` -> rename/update folder path. +3. Group removed or no longer matching `FS_*` -> set `isActive=0` and move folder to `/data/groups/archive/...`. +4. Previously inactive group returns -> set `isActive=1`, move back into `/data/groups/data/...`. ## SQLite State Database @@ -58,8 +63,8 @@ CREATE TABLE shares ( - Initial admin credentials with rights to create/reset `FileShare_ServiceAccount` during `./setup`. - `FileShare_ServiceAccount` must be allowed to join computers to the domain (`net ads join`) in your AD policy. - Dynamic group discovery primarily uses machine-account LDAP (`net ads search -P`); join credentials are only used as a fallback LDAP bind path. -- Group naming convention for dynamic shares: - - `FileShare_` or `FS_` +- Group naming convention for Data folders: + - `FS_` ## DNS Requirements @@ -84,8 +89,8 @@ Kerberos requires close time alignment. - `README.md` - `app/init.sh` - `app/reconcile_shares.py` +- `app/backup_to_destination.py` - `etc/samba/smb.conf` -- `etc/samba/generated/` ## Setup @@ -102,8 +107,8 @@ Kerberos requires close time alignment. - initial admin credentials (used once for provisioning) - `DOMAIN_USERS_SID` - `DOMAIN_ADMINS_SID` - - optional `PUBLIC_GROUP_SID` (defaults to `DOMAIN_USERS_SID`) - optional `FSLOGIX_GROUP_SID` (defaults to `DOMAIN_USERS_SID`) + - optional `BACKUP_DESTINATION` (empty disables backup) Optional: - `SAMBA_HOSTNAME` (defaults to `adsambafsrv`) @@ -144,31 +149,43 @@ Kerberos requires close time alignment. - mode: `700` - `hide unreadable = yes` + ACLs enforce that users only see their own folder. -### Geteilt +### Data -- Share: `\\server\Geteilt` -- Path: `/data/public` -- Read/write for authenticated users in configurable `PUBLIC_GROUP_SID` (default: `DOMAIN_USERS_SID`, resolved through winbind). +- Share: `\\server\Data` +- Path: `/data/groups/data` +- Contains one folder per active `FS_*` AD group. +- Root is discoverable as one share, while access to each group folder is enforced via POSIX/ACL group permissions. - No guest access. -- Permissions are reconciled recursively so all descendants remain homogeneous (dirs `2770`, files `0660`, shared group/admin ACLs). ### FSLogix - Share: `\\server\FSLogix` - Path: `/data/fslogix` - Access for authenticated users in configurable `FSLOGIX_GROUP_SID` (default: `DOMAIN_USERS_SID`, resolved through winbind). -- Semantics intentionally differ from `Geteilt`: only the share root is reconciled (`03770` + ACL defaults), while user-created profile container folders/files are not recursively normalized. +- Semantics intentionally differ from `Data`: only the share root is reconciled (`03770` + ACL defaults), while user-created profile container folders/files are not recursively normalized. - Samba masks are profile-container oriented (`create mask = 0600`, `directory mask = 0700`) so profile payload stays user-private by default. -### Dynamic Group Shares +## Backups -- AD groups: `FileShare_*` and `FS_*` -- Share name: group title from AD (`displayName` -> `name`/`cn` fallback). Prefix stripping from `sAMAccountName` is only a fallback when no title exists. -- Backing path: `/data/groups/` -- Share exposure generated in `/etc/samba/generated/shares.conf` -- Dynamic share names are validated for SMB compatibility and deduplicated case-insensitively. -- Group membership changes are refreshed continuously via winbind cache updates (`winbind cache time = 60`) and Samba config reload during reconciliation. -- Dynamic share trees are reconciled recursively so all descendants keep homogeneous permissions. +- Backups are enabled only if `BACKUP_DESTINATION` is non-empty. +- Sources synced to destination on each run: + - `/data/private` -> `data/private` + - `/data/groups` -> `data/groups` + - `/data/fslogix` -> `data/fslogix` + - `/state` -> `state` + - `/var/lib/samba/private` -> `samba/private` +- Supported destination schemes: + - `rsync://user:pass@host/module/path` + - `smb://user:pass@host/share/path` (domain user example: `smb://DOMAIN%5Cuser:pass@host/share/path`) + - `davfs://user:pass@host/path` (WebDAV over HTTPS) + - `sftp://user:pass@host/path` +- Username/password components should be URL-encoded when they contain reserved characters (`@`, `:`, `/`, `\`, `%`, `#`, `?`). + +- Example: + + ```env + BACKUP_DESTINATION=sftp://backupuser:StrongPassword@sftp.example.com/exports/samba + ``` ## Useful Commands @@ -178,6 +195,7 @@ docker compose exec samba python3 /app/reconcile_shares.py docker compose exec samba sqlite3 /state/shares.db 'SELECT * FROM shares;' docker compose exec samba testparm -s docker compose exec samba sh -lc 'tail -n 200 /var/log/samba/log.*' +docker compose exec samba sh -lc 'tail -n 200 /var/log/backup.log' ``` ## Troubleshooting @@ -213,9 +231,9 @@ docker compose exec samba sh -lc 'tail -n 200 /var/log/samba/log.*' docker compose exec samba wbinfo -g ``` -### Dynamic shares not appearing +### Data folders not appearing -- Confirm AD groups match `FileShare_*` or `FS_*`. +- Confirm AD groups match `FS_*`. - Run manual reconciliation and inspect logs: ```bash @@ -223,12 +241,6 @@ docker compose exec samba sh -lc 'tail -n 200 /var/log/samba/log.*' docker compose exec samba tail -n 100 /var/log/reconcile.log ``` -- Validate generated config: - - ```bash - docker compose exec samba cat /etc/samba/generated/shares.conf - ``` - ### `acl_xattr.so` or `full_audit.so` module load error - If logs show `Error loading module .../vfs/acl_xattr.so` (or `full_audit.so`), your running image is missing Samba VFS modules. @@ -262,5 +274,5 @@ docker compose exec samba sh -lc 'tail -n 200 /var/log/samba/log.*' ## Notes - User data is never automatically deleted. -- Inactive/deleted groups only stop being exposed as shares. +- Inactive/deleted FS_* groups are moved to `/data/groups/archive`. - Data and state survive container restarts via named Docker volumes (`/data/*`, `/state`, `/var/lib/samba`). diff --git a/app/backup_to_destination.py b/app/backup_to_destination.py new file mode 100644 index 0000000..f33742c --- /dev/null +++ b/app/backup_to_destination.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 + +import fcntl +import os +import subprocess +import sys +import tempfile +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple +from urllib.parse import SplitResult, unquote, urlsplit + + +LOCK_PATH = "/state/backup.lock" +BACKUP_SOURCES: List[Tuple[str, str]] = [ + ("/data/private", "data/private"), + ("/data/groups", "data/groups"), + ("/data/fslogix", "data/fslogix"), + ("/state", "state"), + ("/var/lib/samba/private", "samba/private"), +] + +RCLONE_SCHEME_MAP = { + "sftp": "sftp", + "smb": "smb", + "cifs": "smb", + "davfs": "webdav", + "dav": "webdav", + "webdav": "webdav", + "http": "webdav", + "https": "webdav", +} + + +@dataclass +class Destination: + raw_url: str + scheme: str + parts: SplitResult + username: str + password: str + hostname: str + port: Optional[int] + path: str + + +def log(message: str) -> None: + print(f"[backup] {message}", flush=True) + + +def run_command( + command: List[str], + *, + env: Optional[Dict[str, str]] = None, + check: bool = True, +) -> subprocess.CompletedProcess: + result = subprocess.run(command, capture_output=True, text=True, env=env) + if check and result.returncode != 0: + output = result.stderr.strip() or result.stdout.strip() + raise RuntimeError(f"Command failed ({command[0]}): {output}") + return result + + +def parse_destination(raw_url: str) -> Destination: + parts = urlsplit(raw_url) + scheme = parts.scheme.lower() + if not scheme: + raise RuntimeError("BACKUP_DESTINATION must include a URL scheme") + + hostname = parts.hostname or "" + if not hostname: + raise RuntimeError("BACKUP_DESTINATION must include a hostname") + + return Destination( + raw_url=raw_url, + scheme=scheme, + parts=parts, + username=unquote(parts.username or ""), + password=unquote(parts.password or ""), + hostname=hostname, + port=parts.port, + path=unquote(parts.path or ""), + ) + + +def redact_destination(raw_url: str) -> str: + parts = urlsplit(raw_url) + host = parts.hostname or "" + if ":" in host and not host.startswith("["): + host = f"[{host}]" + if parts.port is not None: + host = f"{host}:{parts.port}" + + if parts.username: + username = unquote(parts.username) + if parts.password is not None: + auth = f"{username}:***" + else: + auth = username + netloc = f"{auth}@{host}" + else: + netloc = host + + return f"{parts.scheme}://{netloc}{parts.path}" + + +def available_sources() -> List[Tuple[str, str]]: + sources: List[Tuple[str, str]] = [] + for source_path, destination_path in BACKUP_SOURCES: + if os.path.isdir(source_path): + sources.append((source_path, destination_path)) + else: + log(f"Skipping missing source: {source_path}") + return sources + + +def format_host(hostname: str) -> str: + if ":" in hostname and not hostname.startswith("["): + return f"[{hostname}]" + return hostname + + +def join_path(prefix: str, suffix: str) -> str: + left = prefix.strip("/") + right = suffix.strip("/") + if left and right: + return f"{left}/{right}" + return left or right + + +def obscure_secret(secret: str) -> str: + result = run_command(["rclone", "obscure", secret]) + value = result.stdout.strip() + if not value: + raise RuntimeError("rclone obscure returned an empty value") + return value + + +def parse_smb_identity(username: str) -> Tuple[str, str]: + if not username: + return "", "" + if ";" in username: + domain, user = username.split(";", 1) + return domain, user + if "\\" in username: + domain, user = username.split("\\", 1) + return domain, user + return "", username + + +def sync_with_rsync(destination: Destination, sources: List[Tuple[str, str]]) -> None: + module_path = destination.path.lstrip("/") + if not module_path: + raise RuntimeError( + "rsync destinations must include a module path (example: rsync://user:pass@host/module/path)" + ) + + host = format_host(destination.hostname) + if destination.port is not None: + host = f"{host}:{destination.port}" + + user_prefix = f"{destination.username}@" if destination.username else "" + remote_base = f"rsync://{user_prefix}{host}/{module_path.rstrip('/')}" + + command_env = os.environ.copy() + if destination.password: + command_env["RSYNC_PASSWORD"] = destination.password + + for source_path, destination_path in sources: + remote_path = f"{remote_base}/{destination_path.strip('/')}/" + log(f"Syncing {source_path} to rsync destination") + run_command( + ["rsync", "-a", "--delete", f"{source_path}/", remote_path], + env=command_env, + ) + + +def build_rclone_remote(destination: Destination) -> Tuple[Dict[str, str], str]: + backend = RCLONE_SCHEME_MAP.get(destination.scheme) + if backend is None: + supported = ", ".join(["rsync", *sorted(RCLONE_SCHEME_MAP.keys())]) + raise RuntimeError( + f"Unsupported BACKUP_DESTINATION scheme '{destination.scheme}'. Supported schemes: {supported}" + ) + + options: Dict[str, str] = {"type": backend} + remote_prefix = "" + + if backend == "sftp": + options["host"] = destination.hostname + if destination.port is not None: + options["port"] = str(destination.port) + if destination.username: + options["user"] = destination.username + if destination.password: + options["pass"] = obscure_secret(destination.password) + remote_prefix = destination.path.strip("/") + return options, remote_prefix + + if backend == "smb": + path_segments = [segment for segment in destination.path.split("/") if segment] + if not path_segments: + raise RuntimeError( + "smb destinations must include a share name in the path (example: smb://user:pass@host/share/path)" + ) + + share_name = path_segments[0] + remote_prefix = "/".join(path_segments[1:]) + + options["host"] = destination.hostname + options["share"] = share_name + if destination.port is not None: + options["port"] = str(destination.port) + + domain, user = parse_smb_identity(destination.username) + if user: + options["user"] = user + if domain: + options["domain"] = domain + if destination.password: + options["pass"] = obscure_secret(destination.password) + + return options, remote_prefix + + webdav_scheme = ( + destination.scheme if destination.scheme in {"http", "https"} else "https" + ) + host = format_host(destination.hostname) + if destination.port is not None: + host = f"{host}:{destination.port}" + webdav_path = destination.path or "/" + + options["url"] = f"{webdav_scheme}://{host}{webdav_path}" + options["vendor"] = "other" + if destination.username: + options["user"] = destination.username + if destination.password: + options["pass"] = obscure_secret(destination.password) + + return options, remote_prefix + + +def write_rclone_config(options: Dict[str, str]) -> str: + with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False) as handle: + handle.write("[backup]\n") + for key, value in options.items(): + handle.write(f"{key} = {value}\n") + config_path = handle.name + + os.chmod(config_path, 0o600) + return config_path + + +def sync_with_rclone(destination: Destination, sources: List[Tuple[str, str]]) -> None: + options, remote_prefix = build_rclone_remote(destination) + config_path = write_rclone_config(options) + + try: + for source_path, destination_path in sources: + remote_path = join_path(remote_prefix, destination_path) + log(f"Syncing {source_path} to {destination.scheme} destination") + run_command( + [ + "rclone", + "sync", + f"{source_path}/", + f"backup:{remote_path}", + "--config", + config_path, + "--create-empty-src-dirs", + ] + ) + finally: + if os.path.exists(config_path): + os.remove(config_path) + + +def run_backup() -> int: + destination_url = os.getenv("BACKUP_DESTINATION", "").strip() + if not destination_url: + log("BACKUP_DESTINATION is unset, skipping backup") + return 0 + + sources = available_sources() + if not sources: + log("No backup sources are available, skipping backup") + return 0 + + destination = parse_destination(destination_url) + log(f"Starting backup to {redact_destination(destination.raw_url)}") + + if destination.scheme == "rsync": + sync_with_rsync(destination, sources) + else: + sync_with_rclone(destination, sources) + + log("Backup completed") + return 0 + + +def with_lock() -> int: + lock_dir = os.path.dirname(LOCK_PATH) + if lock_dir and not os.path.isdir(lock_dir): + os.makedirs(lock_dir, exist_ok=True) + with open(LOCK_PATH, "w", encoding="utf-8") as lock_file: + try: + fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) + except BlockingIOError: + log("Another backup process is already running; skipping this cycle") + return 0 + return run_backup() + + +def main() -> int: + try: + return with_lock() + except Exception as exc: # pylint: disable=broad-except + log(f"ERROR: {exc}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/app/init.sh b/app/init.sh index 25b7ec3..0ed9520 100755 --- a/app/init.sh +++ b/app/init.sh @@ -112,15 +112,11 @@ resolve_share_groups_from_sids() { export DOMAIN_ADMINS_GROUP DOMAIN_ADMINS_GROUP="$(resolve_sid_to_group "$DOMAIN_ADMINS_SID")" - export PUBLIC_GROUP - PUBLIC_GROUP="$(resolve_sid_to_group "$PUBLIC_GROUP_SID")" - export FSLOGIX_GROUP FSLOGIX_GROUP="$(resolve_sid_to_group "$FSLOGIX_GROUP_SID")" log "Resolved DOMAIN_USERS_SID to '${DOMAIN_USERS_GROUP}'" log "Resolved DOMAIN_ADMINS_SID to '${DOMAIN_ADMINS_GROUP}'" - log "Resolved PUBLIC_GROUP_SID to '${PUBLIC_GROUP}'" log "Resolved FSLOGIX_GROUP_SID to '${FSLOGIX_GROUP}'" } @@ -159,12 +155,13 @@ write_runtime_env_file() { printf 'export NETBIOS_NAME=%q\n' "$NETBIOS_NAME" printf 'export DOMAIN_USERS_SID=%q\n' "$DOMAIN_USERS_SID" printf 'export DOMAIN_ADMINS_SID=%q\n' "$DOMAIN_ADMINS_SID" - printf 'export PUBLIC_GROUP_SID=%q\n' "$PUBLIC_GROUP_SID" printf 'export FSLOGIX_GROUP_SID=%q\n' "$FSLOGIX_GROUP_SID" printf 'export DOMAIN_USERS_GROUP=%q\n' "$DOMAIN_USERS_GROUP" printf 'export DOMAIN_ADMINS_GROUP=%q\n' "$DOMAIN_ADMINS_GROUP" - printf 'export PUBLIC_GROUP=%q\n' "$PUBLIC_GROUP" printf 'export FSLOGIX_GROUP=%q\n' "$FSLOGIX_GROUP" + if [[ -n "${BACKUP_DESTINATION:-}" ]]; then + printf 'export BACKUP_DESTINATION=%q\n' "$BACKUP_DESTINATION" + fi if [[ -n "${JOIN_USER:-}" ]]; then printf 'export JOIN_USER=%q\n' "$JOIN_USER" fi @@ -217,6 +214,13 @@ SHELL=/bin/bash PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin */5 * * * * root source /app/runtime.env && /usr/bin/python3 /app/reconcile_shares.py >> /var/log/reconcile.log 2>&1 EOF + + if [[ -n "${BACKUP_DESTINATION:-}" ]]; then + cat >> /etc/cron.d/reconcile-shares <<'EOF' +*/30 * * * * root source /app/runtime.env && /usr/bin/python3 /app/backup_to_destination.py >> /var/log/backup.log 2>&1 +EOF + fi + chmod 0644 /etc/cron.d/reconcile-shares } @@ -227,11 +231,9 @@ require_env DOMAIN_USERS_SID require_env DOMAIN_ADMINS_SID export REALM WORKGROUP DOMAIN -export PUBLIC_GROUP_SID="${PUBLIC_GROUP_SID:-${DOMAIN_USERS_SID}}" export FSLOGIX_GROUP_SID="${FSLOGIX_GROUP_SID:-${DOMAIN_USERS_SID}}" export DOMAIN_USERS_GROUP="${DOMAIN_USERS_SID}" export DOMAIN_ADMINS_GROUP="${DOMAIN_ADMINS_SID}" -export PUBLIC_GROUP="${PUBLIC_GROUP_SID}" export FSLOGIX_GROUP="${FSLOGIX_GROUP_SID}" if [[ -n "${JOIN_USER:-}" ]]; then export JOIN_USER @@ -240,8 +242,8 @@ if [[ -n "${JOIN_PASSWORD:-}" ]]; then export JOIN_PASSWORD fi -mkdir -p /data/private /data/public /data/fslogix /data/groups /state /etc/samba/generated /var/log/samba -touch /etc/samba/generated/shares.conf /var/log/reconcile.log +mkdir -p /data/private /data/fslogix /data/groups/data /data/groups/archive /state /var/log/samba +touch /var/log/reconcile.log /var/log/backup.log append_winbind_to_nss require_vfs_modules @@ -263,6 +265,15 @@ write_runtime_env_file log 'Running startup reconciliation' python3 /app/reconcile_shares.py +if [[ -n "${BACKUP_DESTINATION:-}" ]]; then + log 'Running startup backup' + if ! python3 /app/backup_to_destination.py; then + log 'Startup backup failed; continuing service startup.' + fi +else + log 'BACKUP_DESTINATION is unset; startup backup skipped' +fi + install_cron_job log 'Starting cron daemon' diff --git a/app/reconcile_shares.py b/app/reconcile_shares.py index 9a76c38..faf206b 100755 --- a/app/reconcile_shares.py +++ b/app/reconcile_shares.py @@ -17,22 +17,18 @@ from typing import Dict, List, Optional, Tuple DB_PATH = "/state/shares.db" LOCK_PATH = "/state/reconcile.lock" -GROUP_ROOT = "/data/groups" +GROUP_ROOT = "/data/groups/data" +GROUP_ARCHIVE_ROOT = "/data/groups/archive" PRIVATE_ROOT = "/data/private" -PUBLIC_ROOT = "/data/public" FSLOGIX_ROOT = "/data/fslogix" -GENERATED_CONF = "/etc/samba/generated/shares.conf" -LDAP_FILTER = ( - "(&(objectClass=group)(|(sAMAccountName=FileShare_*)(sAMAccountName=FS_*)))" -) -GROUP_PREFIXES = ("FileShare_", "FS_") +LDAP_FILTER = "(&(objectClass=group)(sAMAccountName=FS_*))" +GROUP_PREFIXES = ("FS_",) USER_STATUS_FILTER = "(&(objectClass=user)(!(objectClass=computer))(sAMAccountName=*))" -GROUP_TITLE_ATTRS = ("displayname", "name", "cn") REQUIRED_ENV = ["REALM", "WORKGROUP", "DOMAIN"] ATTR_RE = re.compile(r"^([^:]+)(::?)\s*(.*)$") -SHARE_NAME_INVALID_RE = re.compile(r"[\\/:*?\"<>|;\[\],+=]") +GROUP_FOLDER_INVALID_RE = re.compile(r"[\\/:*?\"<>|;\[\],+=]") PRIVATE_SKIP_EXACT = { "krbtgt", "administrator", @@ -47,6 +43,7 @@ PRIVATE_SKIP_PREFIXES = ("msol_", "fileshare_service", "aad_") UAC_ACCOUNTDISABLE = 0x0002 UAC_LOCKOUT = 0x0010 AD_NEVER_EXPIRES_VALUES = {0, 9223372036854775807} +MAX_GROUP_FOLDER_NAME = 120 def now_utc() -> str: @@ -125,15 +122,6 @@ def derive_share_name(sam_account_name: str) -> Optional[str]: return None -def derive_group_title(entry: Dict[str, Tuple[str, bool]]) -> Optional[str]: - for attr in GROUP_TITLE_ATTRS: - if attr in entry: - value = entry[attr][0].strip() - if value: - return value - return None - - def parse_groups_from_ldap_output(output: str) -> List[Dict[str, str]]: entries = parse_ldap_entries(output) @@ -144,7 +132,7 @@ def parse_groups_from_ldap_output(output: str) -> List[Dict[str, str]]: sam_value, _ = entry["samaccountname"] sam = sam_value.strip() - share_name = derive_group_title(entry) or derive_share_name(sam) + share_name = derive_share_name(sam) if not share_name: continue @@ -166,20 +154,76 @@ def parse_groups_from_ldap_output(output: str) -> List[Dict[str, str]]: return list(deduped.values()) +def sanitize_group_folder_name(raw_name: str) -> str: + candidate = GROUP_FOLDER_INVALID_RE.sub("_", raw_name.strip()) + candidate = candidate.strip().strip(".") + candidate = re.sub(r"\s+", " ", candidate) + if not candidate: + return "" + return candidate[:MAX_GROUP_FOLDER_NAME] + + +def with_suffix_limited(base_name: str, suffix: str) -> str: + limit = MAX_GROUP_FOLDER_NAME - len(suffix) + if limit <= 0: + return suffix[-MAX_GROUP_FOLDER_NAME:] + trimmed = base_name[:limit].rstrip(" ._") + if not trimmed: + trimmed = "group" + return f"{trimmed}{suffix}" + + +def choose_group_folder_name( + preferred_name: str, + sam_account_name: str, + guid: str, + used_names: set, + existing_name: str = "", +) -> str: + base_name = sanitize_group_folder_name(preferred_name) + if not base_name: + base_name = sanitize_group_folder_name( + derive_share_name(sam_account_name) or "" + ) + if not base_name: + base_name = sanitize_group_folder_name(sam_account_name) + if not base_name: + base_name = f"group_{guid[:8]}" + + if existing_name: + existing_folded = existing_name.casefold() + if existing_folded not in used_names: + sanitized_existing = sanitize_group_folder_name(existing_name) + if sanitized_existing.casefold() == base_name.casefold(): + used_names.add(existing_folded) + return existing_name + + candidate = base_name + index = 0 + while candidate.casefold() in used_names: + index += 1 + suffix = f"_{guid[:8]}" if index == 1 else f"_{guid[:8]}_{index}" + candidate = with_suffix_limited(base_name, suffix) + + used_names.add(candidate.casefold()) + return candidate + + +def next_available_path(path: str) -> str: + if not os.path.exists(path): + return path + + index = 1 + while True: + candidate = f"{path}_{index}" + if not os.path.exists(candidate): + return candidate + index += 1 + + def fetch_groups_via_net_ads() -> List[Dict[str, str]]: result = run_command( - [ - "net", - "ads", - "search", - "-P", - LDAP_FILTER, - "objectGUID", - "sAMAccountName", - "displayName", - "name", - "cn", - ], + ["net", "ads", "search", "-P", LDAP_FILTER, "objectGUID", "sAMAccountName"], check=False, ) if result.returncode != 0: @@ -226,9 +270,6 @@ def fetch_groups_via_ldap_bind() -> List[Dict[str, str]]: LDAP_FILTER, "objectGUID", "sAMAccountName", - "displayName", - "name", - "cn", ] ) return parse_groups_from_ldap_output(result.stdout) @@ -334,143 +375,103 @@ def reconcile_db(conn: sqlite3.Connection, ad_groups: List[Dict[str, str]]) -> N timestamp = now_utc() seen = set() + os.makedirs(GROUP_ROOT, exist_ok=True) + os.makedirs(GROUP_ARCHIVE_ROOT, exist_ok=True) + + rows = conn.execute( + "SELECT objectGUID, samAccountName, shareName, path, isActive FROM shares" + ).fetchall() + existing_by_guid = {row["objectGUID"]: row for row in rows} + used_names = set() + for group in ad_groups: guid = group["objectGUID"] sam = group["samAccountName"] - share_name = group["shareName"] + row = existing_by_guid.get(guid) + existing_name = "" + if ( + row is not None + and row["isActive"] + and row["path"].startswith(f"{GROUP_ROOT}/") + ): + existing_name = os.path.basename(row["path"]) + + folder_name = choose_group_folder_name( + group["shareName"], sam, guid, used_names, existing_name + ) + path = os.path.join(GROUP_ROOT, folder_name) seen.add(guid) - row = conn.execute( - "SELECT objectGUID, path FROM shares WHERE objectGUID = ?", (guid,) - ).fetchone() - if row is None: - path = os.path.join(GROUP_ROOT, guid) ensure_group_path(path) conn.execute( """ INSERT INTO shares (objectGUID, samAccountName, shareName, path, createdAt, lastSeenAt, isActive) VALUES (?, ?, ?, ?, ?, ?, 1) """, - (guid, sam, share_name, path, timestamp, timestamp), + (guid, sam, folder_name, path, timestamp, timestamp), ) - log(f"Discovered new share group {sam} ({guid})") + log(f"Discovered new data folder group {sam} ({guid})") continue - path = row["path"] + existing_path = row["path"] + if existing_path != path: + if os.path.exists(existing_path): + final_path = next_available_path(path) + os.rename(existing_path, final_path) + path = final_path + else: + ensure_group_path(path) + ensure_group_path(path) conn.execute( """ UPDATE shares SET samAccountName = ?, shareName = ?, + path = ?, lastSeenAt = ?, isActive = 1 WHERE objectGUID = ? """, - (sam, share_name, timestamp, guid), + (sam, folder_name, path, timestamp, guid), ) - if seen: - placeholders = ",".join("?" for _ in seen) - conn.execute( - f"UPDATE shares SET isActive = 0, lastSeenAt = ? WHERE isActive = 1 AND objectGUID NOT IN ({placeholders})", - (timestamp, *seen), + active_rows = conn.execute( + "SELECT objectGUID, samAccountName, shareName, path FROM shares WHERE isActive = 1" + ).fetchall() + for row in active_rows: + guid = row["objectGUID"] + if guid in seen: + continue + + old_path = row["path"] + archive_name = choose_group_folder_name( + row["shareName"], + row["samAccountName"], + guid, + set(), ) - else: + archive_path = os.path.join(GROUP_ARCHIVE_ROOT, archive_name) + if os.path.exists(old_path): + final_archive_path = next_available_path(archive_path) + os.rename(old_path, final_archive_path) + archive_path = final_archive_path + conn.execute( - "UPDATE shares SET isActive = 0, lastSeenAt = ? WHERE isActive = 1", - (timestamp,), + """ + UPDATE shares + SET isActive = 0, + path = ?, + lastSeenAt = ? + WHERE objectGUID = ? + """, + (archive_path, timestamp, guid), ) conn.commit() -def qualify_group(group_name: str) -> str: - if "\\" in group_name: - return f'@"{group_name}"' - workgroup = os.getenv("WORKGROUP", "").strip() - if workgroup: - return f'@"{workgroup}\\{group_name}"' - return f'@"{group_name}"' - - -def is_valid_share_name(share_name: str) -> bool: - if not share_name or share_name.casefold() in {"global", "homes", "printers"}: - return False - if SHARE_NAME_INVALID_RE.search(share_name): - return False - return True - - -def render_dynamic_shares(conn: sqlite3.Connection) -> None: - rows = conn.execute( - """ - SELECT objectGUID, samAccountName, shareName, path - FROM shares - WHERE isActive = 1 - ORDER BY shareName COLLATE NOCASE - """ - ).fetchall() - - stanzas: List[str] = [ - "# This file is generated by /app/reconcile_shares.py.", - "# Manual changes will be overwritten.", - "", - ] - used_share_names = set() - - for row in rows: - share_name = row["shareName"].strip() - if not share_name: - continue - folded_name = share_name.casefold() - if folded_name in used_share_names: - log( - f"Skipping duplicate share name '{share_name}' for objectGUID {row['objectGUID']}" - ) - continue - - if not is_valid_share_name(share_name): - log( - f"Skipping invalid SMB share name '{share_name}' for objectGUID {row['objectGUID']}" - ) - continue - - used_share_names.add(folded_name) - valid_users = qualify_group(row["samAccountName"]) - stanzas.extend( - [ - f"[{share_name}]", - f"path = {row['path']}", - "read only = no", - "browseable = yes", - "guest ok = no", - "vfs objects = acl_xattr full_audit", - "full_audit:prefix = %T|%u|%I|%m|%S", - "full_audit:success = all", - "full_audit:failure = all", - "full_audit:syslog = false", - f"valid users = {valid_users}", - "create mask = 0660", - "directory mask = 2770", - "inherit permissions = yes", - "access based share enum = yes", - "", - ] - ) - - content = "\n".join(stanzas).rstrip() + "\n" - os.makedirs(os.path.dirname(GENERATED_CONF), exist_ok=True) - with tempfile.NamedTemporaryFile( - "w", encoding="utf-8", dir=os.path.dirname(GENERATED_CONF), delete=False - ) as tmp_file: - tmp_file.write(content) - temp_path = tmp_file.name - - os.replace(temp_path, GENERATED_CONF) - - def reload_samba() -> None: result = run_command(["smbcontrol", "all", "reload-config"], check=False) if result.returncode != 0: @@ -695,27 +696,6 @@ def should_skip_private_user(username: str) -> bool: return False -def sync_public_directory() -> None: - workgroup = os.environ["WORKGROUP"] - public_group = os.getenv("PUBLIC_GROUP", "") - public_group_sid = os.getenv("PUBLIC_GROUP_SID", "") - qualified_group = public_group - - os.makedirs(PUBLIC_ROOT, exist_ok=True) - gid = None - if qualified_group: - gid = resolve_group_gid_flexible(workgroup, qualified_group) - if gid is None and public_group_sid: - gid = resolve_gid_from_sid(public_group_sid) - - if gid is not None: - admin_gid = resolve_gid_from_sid(os.getenv("DOMAIN_ADMINS_SID", "")) - enforce_group_tree_permissions(PUBLIC_ROOT, gid, admin_gid) - else: - group_display = qualified_group or public_group_sid or "" - log(f"Unable to resolve GID for {group_display}; public ACLs unchanged") - - def sync_fslogix_directory() -> None: workgroup = os.environ["WORKGROUP"] fslogix_group = os.getenv("FSLOGIX_GROUP", "") @@ -820,6 +800,11 @@ def sync_dynamic_directory_permissions(conn: sqlite3.Connection) -> None: enforce_group_tree_permissions(path, gid, admin_gid) + os.makedirs(GROUP_ROOT, exist_ok=True) + os.chown(GROUP_ROOT, 0, 0) + run_command(["setfacl", "-b", GROUP_ROOT], check=False) + os.chmod(GROUP_ROOT, 0o555) + def with_lock() -> bool: os.makedirs(os.path.dirname(LOCK_PATH), exist_ok=True) @@ -834,18 +819,17 @@ def with_lock() -> bool: try: ensure_required_env() os.makedirs(GROUP_ROOT, exist_ok=True) + os.makedirs(GROUP_ARCHIVE_ROOT, exist_ok=True) conn = open_db() try: groups = fetch_fileshare_groups() - log(f"Discovered {len(groups)} dynamic share group(s) from AD") + log(f"Discovered {len(groups)} data folder group(s) from AD") reconcile_db(conn, groups) sync_dynamic_directory_permissions(conn) - render_dynamic_shares(conn) finally: conn.close() - sync_public_directory() sync_fslogix_directory() sync_private_directories() reload_samba() diff --git a/docker-compose.yml b/docker-compose.yml index 5d98dcf..c3c853a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,6 @@ services: - "139:139" volumes: - private_data:/data/private - - public_data:/data/public - fslogix_data:/data/fslogix - group_data:/data/groups - state_data:/state @@ -28,7 +27,6 @@ services: volumes: private_data: - public_data: fslogix_data: group_data: state_data: diff --git a/etc/samba/smb.conf b/etc/samba/smb.conf index 2a5a20b..b941d98 100644 --- a/etc/samba/smb.conf +++ b/etc/samba/smb.conf @@ -37,8 +37,6 @@ logging = file log level = 1 auth:5 passdb:5 winbind:3 - include = /etc/samba/generated/shares.conf - [Privat] path = /data/private read only = no @@ -55,8 +53,8 @@ ea support = yes nt acl support = no -[Geteilt] - path = /data/public +[Data] + path = /data/groups/data read only = no browseable = yes guest ok = no @@ -65,11 +63,11 @@ full_audit:success = all full_audit:failure = all full_audit:syslog = false - valid users = @"${PUBLIC_GROUP}" - force group = "${PUBLIC_GROUP}" + valid users = @"${DOMAIN_USERS_GROUP}" create mask = 0660 directory mask = 2770 inherit permissions = yes + hide unreadable = yes access based share enum = yes [FSLogix] diff --git a/setup b/setup index f5ffbcd..aa9c37e 100755 --- a/setup +++ b/setup @@ -83,13 +83,12 @@ write_env_file() { local admin_password="" local domain_users_sid="" local domain_admins_sid="" - local public_group_sid="" local fslogix_group_sid="" + local backup_destination="" local samba_hostname="adsambafsrv" local netbios_name="ADSAMBAFSRV" local service_password="" local service_account_sam="" - local public_group_prompt="" local fslogix_group_prompt="" local samba_hostname_input="" local netbios_name_input="" @@ -103,12 +102,6 @@ write_env_file() { prompt_value domain_users_sid "DOMAIN_USERS_SID (e.g. ...-513)" prompt_value domain_admins_sid "DOMAIN_ADMINS_SID (e.g. ...-512)" - public_group_prompt="PUBLIC_GROUP_SID (press Enter to reuse DOMAIN_USERS_SID)" - read -r -p "${public_group_prompt}: " public_group_sid - if [[ -z "$public_group_sid" ]]; then - public_group_sid="$domain_users_sid" - fi - fslogix_group_prompt="FSLOGIX_GROUP_SID (press Enter to reuse DOMAIN_USERS_SID)" read -r -p "${fslogix_group_prompt}: " fslogix_group_sid if [[ -z "$fslogix_group_sid" ]]; then @@ -130,6 +123,8 @@ write_env_file() { fi netbios_name="$sanitized_netbios_name" + read -r -p "BACKUP_DESTINATION (optional URL, press Enter to disable): " backup_destination + service_account_sam="$(sanitize_sam_account_name "$SERVICE_ACCOUNT_NAME")" if [[ "$service_account_sam" != "$SERVICE_ACCOUNT_NAME" ]]; then printf "Using sAMAccountName '%s' (AD limit is 20 chars; requested '%s').\n" "$service_account_sam" "$SERVICE_ACCOUNT_NAME" @@ -165,8 +160,8 @@ SERVICE_ACCOUNT_SAM=${service_account_sam} SERVICE_ACCOUNT_PASSWORD=${service_password} DOMAIN_USERS_SID=${domain_users_sid} DOMAIN_ADMINS_SID=${domain_admins_sid} -PUBLIC_GROUP_SID=${public_group_sid} FSLOGIX_GROUP_SID=${fslogix_group_sid} +BACKUP_DESTINATION=${backup_destination} SAMBA_HOSTNAME=${samba_hostname} NETBIOS_NAME=${netbios_name} EOF @@ -217,8 +212,8 @@ JOIN_USER=${service_account_sam} JOIN_PASSWORD=${service_password} DOMAIN_USERS_SID=${domain_users_sid} DOMAIN_ADMINS_SID=${domain_admins_sid} -PUBLIC_GROUP_SID=${public_group_sid} FSLOGIX_GROUP_SID=${fslogix_group_sid} +BACKUP_DESTINATION=${backup_destination} SAMBA_HOSTNAME=${samba_hostname} NETBIOS_NAME=${netbios_name} # Optional overrides: @@ -226,6 +221,10 @@ NETBIOS_NAME=${netbios_name} # LDAP_BASE_DN=DC=example,DC=com # PRIVATE_SKIP_USERS=svc_backup,svc_sql # PRIVATE_SKIP_PREFIXES=svc_,sql_ +# BACKUP_DESTINATION=rsync://user:pass@backup.example.com/samba-backups +# BACKUP_DESTINATION=smb://DOMAIN%5Cuser:pass@backup.example.com/Backups/samba +# BACKUP_DESTINATION=davfs://user:pass@webdav.example.com/remote.php/dav/files/backup +# BACKUP_DESTINATION=sftp://user:pass@sftp.example.com/exports/samba EOF chmod 600 "$ENV_FILE"