Source code for tamr_toolbox.filesystem.bash

"""Tasks related to managing unix-level operations like file and directory management"""

import logging
import os
import shutil
from datetime import datetime, timedelta

from typing import List, Union
from pathlib import Path

LOGGER = logging.getLogger(__name__)


[docs]def create_directories( directories: List[Union[Path, str]], *, require_absolute_path: bool = True, failure_if_exists: bool = False, ) -> None: """Creates directories recursively from a list of target directories if not already created Args: directories: List of directories to create. require_absolute_path: Boolean flag indicating whether to require absolute paths. failure_if_exists: Boolean flag requiring failure if attempt to create a directory that already exists. Returns: None """ for directory in directories: if not isinstance(directory, Path): directory = Path(directory) if require_absolute_path and not directory.is_absolute(): raise ValueError(f"Expected absolute path for creation but received {directory}") if os.path.exists(directory): message = f"Tried creating {directory} but directory already exists" if failure_if_exists: raise OSError(message) LOGGER.warning(message) else: LOGGER.info(f"Creating directory {directory}") os.makedirs(directory)
[docs]def remove_directories( directories: List[Union[Path, str]], *, require_absolute_path: bool = True, failure_if_not_exists: bool = False, allow_recursive_deletes: bool = False, ) -> None: """Removes directories from a list of target directories if created. The directory must be empty Args: directories: List of directories to remove require_absolute_path: Boolean flag indicating whether to require absolute paths. failure_if_not_exists: Boolean flag requiring failure if attempt to remove a directory that does not exist. allow_recursive_deletes: Boolean flag indicating whether to allow recursive deletion. Returns: None """ LOGGER.warning(f"{directories}") for directory in directories: if not isinstance(directory, Path): directory = Path(directory) if require_absolute_path and not directory.is_absolute(): raise ValueError(f"Expected absolute path for removal but received {directory}") if not os.path.exists(directory): message = f"Tried removing {directory} but directory does not exist" if failure_if_not_exists: raise OSError(message) LOGGER.warning(message) else: if allow_recursive_deletes: LOGGER.info(f"Removing directory {directory} recursively.") shutil.rmtree(directory) else: LOGGER.info(f"Removing directory {directory}") os.rmdir(directory)
[docs]def delete_old_files( data_path: Union[Path, str], *, num_days_to_keep: int, exclude_paths: List[Union[Path, str]] = None, ) -> List[str]: """ Find all files in the directory data_path and its subdirectory which have not been modified for a specified number of days and delete them except if their subdirectory is specified in the folders to exclude Args: data_path: path to the root folder from where to search for files num_days_to_keep: number of days to keep files for exclude_paths: list of path to exclude from the search Returns: A list of deleted files Raises: ValueError: if num_days_to_keep is less than 0 FileNotFoundError: if data_path doesn't exist """ if num_days_to_keep < 0: value_error_message = ( f"Argument for num_days_to_keep must be greater than 0. Found {num_days_to_keep}" ) LOGGER.error(value_error_message) raise ValueError(value_error_message) # convert Path to str, to use with os.walk exclude_paths_strings = [str(p) for p in exclude_paths] if exclude_paths is not None else [] if not os.path.exists(data_path): message = f"directory does not exist: {data_path}" LOGGER.error(message) raise FileNotFoundError(message) time_threshold = datetime.now() - timedelta(days=num_days_to_keep) files_to_delete = [] for directory_path, _, files in os.walk(data_path): if directory_path not in exclude_paths_strings: for file in files: file_path = os.path.join(directory_path, file) if datetime.fromtimestamp(os.path.getmtime(file_path)) < time_threshold: files_to_delete.append(file_path) LOGGER.info( f"Found {len(files_to_delete)} older than the retention policy of {num_days_to_keep} days" ) LOGGER.info(f"Removing files older than the retention policy") for file in files_to_delete: LOGGER.debug(f"Deleting {file}") os.remove(file) return files_to_delete