Source code for tamr_toolbox.dataset._dataset

"""
Additional functions to manipulate the profile of the dataset.
"""

from tamr_unify_client.dataset.resource import Dataset, DatasetProfile
from requests import HTTPError
import logging

LOGGER = logging.getLogger(__name__)


[docs]def get_profile(dataset: Dataset, allow_create_or_refresh: bool = False) -> DatasetProfile: """ Returns a dataset profile object. Optionally can refresh or create profile if missing or out-of-date. Args: dataset: Tamr dataset object allow_create_or_refresh: optional bool to allow creation/refreshing of profile info Returns: DatasetProfile object Warning if profile information is out of date and allow_create_or_refresh is False Raises: RuntimeError: if profile has not been created and allow_create_or_refresh is False """ try: profile = dataset.profile() except HTTPError as e: # profile not yet created or in inconsistent state: 409 or 400 return respectively if e.response.status_code in [409, 400]: if allow_create_or_refresh: LOGGER.info(f"creating profile information for dataset: {dataset.name}") dataset.create_profile() profile = dataset.profile() else: raise RuntimeError( f"Profile information for {dataset.name} does not exist. If you would like " f"to create one, set allow_create_or_refresh to True." ) # other status codes: re-raise error else: raise e if not profile.is_up_to_date: if allow_create_or_refresh: LOGGER.info(f"refreshing profile information for dataset: {dataset.name}") profile.refresh() # retrieve again for up-to-date information profile = dataset.profile() else: # Return profile information with a staleness warning: print( f"WARNING: Profile information for {dataset.name} is out-of-date " f"and allow_create_or_refresh is False. If you would like an up-to-date " f"profile, rerun with allow_create_or_refresh set to True." ) return profile