Module data_request_api.content.dreq_content
Functions
def delete(version='all', keep_latest=False, **kwargs)
-
Expand source code
@append_kwargs_from_config def delete(version="all", keep_latest=False, **kwargs): """Delete one or all cached versions with option to keep latest versions. Parameters ---------- version : str, optional The version to delete. Can be 'all' or a specific version, eg. '1.0.0' (default is 'all'). keep_latest : bool, optional If True, keep the latest stable, prerelease and "dev" versions. If False, delete all locally cached versions (default is False). **kwargs export : {'raw', 'release'}, optional Export type. Defaults to 'release'. dryrun : bool, optional Whether to only list the files that would be removed instead of actually removing them. Defaults to False. Returns ------- None Raises ------ ValueError If the known kwargs have an invalid value. Warning If 'keep_latest' option is active when 'version' is not 'all'. """ logger = get_logger() # Get locally cached versions local_versions = get_cached(**kwargs) if version == "all": if keep_latest: # Identify the latest stable and prerelease versions valid_versions = [v for v in local_versions if _version_pattern.match(v)] valid_sversions = [v for v in valid_versions if "a" not in v and "b" not in v] latest = False latest_stable = False if valid_versions: latest = max(valid_versions, key=_parse_version) if valid_sversions: latest_stable = max(valid_sversions, key=_parse_version) to_keep = [v for v in ["dev", latest, latest_stable] if v] local_versions = [v for v in local_versions if v not in to_keep] else: if keep_latest: warnings.warn("'keep_latest' option is ignored when 'version' is not 'all'.") local_versions = [version] if version in local_versions else [] # Deletion if local_versions: logger.info("Deleting the following version(s):") logger.info(local_versions) else: logger.info("No version(s) found to delete.") return # Compile file paths if kwargs["export"] == "raw": cached_files = [os.path.join(_dreq_res, v, _json_raw) for v in local_versions] elif kwargs["export"] == "release": cached_files = [os.path.join(_dreq_res, v, _json_release) for v in local_versions] # Delete files for f in cached_files: if os.path.isfile(f): if "dryrun" in kwargs and kwargs["dryrun"]: logger.info(f"Dryrun: would delete '{f}'.") else: os.remove(f)
Delete one or all cached versions with option to keep latest versions.
Parameters
version
:str
, optional- The version to delete. Can be 'all' or a specific version, eg. '1.0.0' (default is 'all').
keep_latest
:bool
, optional- If True, keep the latest stable, prerelease and "dev" versions. If False, delete all locally cached versions (default is False).
**kwargs
- export : {'raw', 'release'}, optional Export type. Defaults to 'release'. dryrun : bool, optional Whether to only list the files that would be removed instead of actually removing them. Defaults to False.
Returns
None
Raises
ValueError
- If the known kwargs have an invalid value.
Warning
- If 'keep_latest' option is active when 'version' is not 'all'.
def get_cached(**kwargs)
-
Expand source code
@append_kwargs_from_config def get_cached(**kwargs): """Get list of cached versions. Parameters ---------- **kwargs export : {'raw', 'release'}, optional Export type. Defaults to "release". Returns ------- list The list of cached versions. Raises ------ Warning If known kwargs have an invalid value. """ local_versions = [] if os.path.isdir(_dreq_res): # List all subdirectories in the dreq_res directory that include both dreq.json files # - the subdirectory name is the tag name if "export" in kwargs: if kwargs["export"] == "raw": json_export = _json_raw elif kwargs["export"] == "release": json_export = _json_release local_versions = [ name for name in os.listdir(_dreq_res) if os.path.isfile(os.path.join(_dreq_res, name, json_export)) ] return local_versions
Get list of cached versions.
Parameters
**kwargs
- export : {'raw', 'release'}, optional Export type. Defaults to "release".
Returns
list
- The list of cached versions.
Raises
Warning
- If known kwargs have an invalid value.
def get_versions(target='tags', **kwargs)
-
Expand source code
@append_kwargs_from_config def get_versions(target="tags", **kwargs): """Fetch list of tags from the GitHub repository using the GitHub API. Args: target (str): The target to send the request for, either 'tags' or 'branches'. The default is 'tags'. Parameters ---------- target : str, optional The target to send the request for, either 'tags' or 'branches' (default is 'tags'). Please note that the main development branch is excluded from the list of branches and is included in the list of tags. **kwargs offline : bool, optional Whether to disable online requests / retrievals. Defaults to False. Returns ------- list A list of tags or branches. Raises ------ ValueError If target is not 'tags' or 'branches'. """ global versions global _versions_retrieved_last global _CHECK_API_VERSION if target not in ["tags", "branches"]: raise ValueError("target must be 'tags' or 'branches'.") if "offline" in kwargs and kwargs["offline"]: lversions = get_cached(**kwargs) if target == "tags": versions[target] = [lv for lv in lversions if lv == "dev" or _parse_version(lv) != (0, 0, 0, "", 0)] else: versions[target] = [lv for lv in lversions if lv != "dev" and _parse_version(lv) == (0, 0, 0, "", 0)] else: # Retrieve the list of tags or branches from the GitHub API if not versions[target] or _versions_retrieved_last[target] - time.time() > 60 * 60: versions[target] = _send_api_request(REPO_API_URL, REPO_PAGE_URL, target) # Update the last time the tags/branches were retrieved _versions_retrieved_last[target] = time.time() if target == "tags" and "dev" not in versions[target]: versions[target].append("dev") if kwargs['check_api_version'] and not kwargs["offline"]: # Warn user if the API version is not the latest one available on PyPI if _CHECK_API_VERSION: atexit.register(dreqcfg.check_api_version) # Set flag to prevent the same warning being shown more than once _CHECK_API_VERSION = False # List tags of dreq versions hosted on GitHub return versions[target]
Fetch list of tags from the GitHub repository using the GitHub API.
Args
target
:str
- The target to send the request for, either 'tags' or 'branches'. The default is 'tags'.
Parameters
target
:str
, optional- The target to send the request for, either 'tags' or 'branches' (default is 'tags'). Please note that the main development branch is excluded from the list of branches and is included in the list of tags.
**kwargs
- offline : bool, optional Whether to disable online requests / retrievals. Defaults to False.
Returns
list
- A list of tags or branches.
Raises
ValueError
- If target is not 'tags' or 'branches'.
def load(version='latest_stable', **kwargs)
-
Expand source code
@append_kwargs_from_config def load(version="latest_stable", **kwargs): """Load the JSON file for the specified version. Args: version (str): The version to load. Can be 'latest', 'latest_stable', 'dev', or a specific version, eg. '1.0.0'. The default is 'latest_stable'. **kwargs export : {'raw', 'release'}, optional Export type. Defaults to 'release'. consolidate: bool, optional Whether to consolidate the data request dictionary after loading it. Experimental feature. Defaults to True. offline : bool, optional Whether to disable online requests / retrievals. Defaults to False. force_consolidate : bool, optional Whether to force consolidation of the data request dictionary for raw exports of versions "<v1.2", where consolidation is not supported. Defaults to False. Returns: dict: of the loaded JSON file. """ _dreq_content_loaded["json_path"] = "" logger = get_logger() if version == "all": raise ValueError("Cannot load 'all' versions.") version_dict = retrieve(version, **kwargs) if version_dict == {}: logger.info(f"Version '{version}' could not be loaded.") return {} else: json_path = next(iter(version_dict.values())) logger.info(f"Loading version {next(iter(version_dict.keys()))}'.") _dreq_content_loaded["json_path"] = json_path with open(json_path) as f: consolidate_error = ( "Consolidation mapping is not supported for raw exports of versions < v1.2." " Set 'export' to \"release\" (recommended), or set 'consolidate' to True" " or set 'force_consolidate' to True to force consolidation regardless." ) consolidate_warning = ( "Consolidation mapping is not supported for raw exports of versions < v1.2." " Forcing it regardless ..." ) if "consolidate" in kwargs: if kwargs["consolidate"]: if "export" in kwargs and kwargs["export"] == "raw": if _parse_version(version) < _parse_version("v1.2") and version != "dev": if "force_consolidate" in kwargs and kwargs["force_consolidate"]: logger.warning(consolidate_warning) else: logger.error(consolidate_error) raise ValueError(consolidate_error) return ce.map_data(json.load(f), mapping_table, next(iter(version_dict.keys())), **kwargs) else: return json.load(f) else: if "export" in kwargs and kwargs["export"] == "raw": if _parse_version(version) < _parse_version("v1.2") and version != "dev": if "force_consolidate" in kwargs and kwargs["force_consolidate"]: logger.warning(consolidate_warning) else: logger.error(consolidate_error) raise ValueError(consolidate_error) return ce.map_data(json.load(f), mapping_table, next(iter(version_dict.keys())), **kwargs)
Load the JSON file for the specified version.
Args
version
:str
- The version to load. Can be 'latest', 'latest_stable', 'dev', or a specific version, eg. '1.0.0'. The default is 'latest_stable'.
**kwargs export : {'raw', 'release'}, optional Export type. Defaults to 'release'. consolidate: bool, optional Whether to consolidate the data request dictionary after loading it. Experimental feature. Defaults to True. offline : bool, optional Whether to disable online requests / retrievals. Defaults to False. force_consolidate : bool, optional Whether to force consolidation of the data request dictionary for raw exports of versions "<v1.2", where consolidation is not supported. Defaults to False.
Returns
dict
- of the loaded JSON file.
def retrieve(version='latest_stable', **kwargs)
-
Expand source code
@append_kwargs_from_config def retrieve(version="latest_stable", **kwargs): """Retrieve the JSON file for the specified version Parameters ---------- version: str, optional The version to retrieve. Can be 'latest', 'latest_stable', 'dev', or 'all' or a specific version, eg. '1.0.0'. (default is 'latest_stable'). **kwargs export : {'raw', 'release'}, optional Export type. Defaults to 'release'. offline : bool, optional Whether to disable online requests / retrievals. Defaults to False. Returns ------- dict The path to the retrieved JSON file. Raises ------ ValueError If the specified version is not found. Warning If the specified version does not have the specified export type. Warning If the known kwargs have an invalid value. Warning If the specified version could not be downloaded or (if applicable) updated. """ logger = get_logger() if version == "latest": versions = [_get_latest_version(stable=False, **kwargs)] elif version == "latest_stable": versions = [_get_latest_version(stable=True, **kwargs)] elif version == "dev": versions = ["dev"] elif version == "all": versions = get_versions(**kwargs) else: if version not in get_versions(**kwargs) + get_versions(target="branches", **kwargs): if version not in get_cached(**kwargs): raise ValueError(f"Version '{version}' not found.") versions = [version] if versions == [None] or not versions: raise ValueError(f"Version '{version}' not found.") elif version in ["v1.0alpha"] and "export" in kwargs and kwargs["export"] == "raw": warnings.warn(f"For version '{version}' no raw export exists. Defaulting to release export.") json_paths = dict() for version in versions: # Define the path for storing the dreq.json in the installation directory # Store it as path_to_api/content/dreq_res/version/{_json_raw/release} retrieve_to_dir = os.path.join(_dreq_res, version) # Decide whether to download release or raw json file if "export" in kwargs: if kwargs["export"] == "release" or version == "v1.0alpha": json_export = _json_release elif kwargs["export"] == "raw": json_export = _json_raw elif _version_pattern.match(version): json_export = _json_release else: json_export = _json_raw json_path = os.path.join(retrieve_to_dir, json_export) if "offline" in kwargs and kwargs["offline"]: if os.path.isfile(json_path): json_paths[version] = json_path else: os.makedirs(retrieve_to_dir, exist_ok=True) # If not already cached download with POOCH if not os.path.isfile(json_path): # Download with pooch - use "main" branch for "dev" try: json_path = pooch.retrieve( path=retrieve_to_dir, url=REPO_RAW_URL.format( version=(_dev_branch if version == "dev" else version), _json_export=json_export, _github_org=_github_org, ), known_hash=None, fname=json_export, ) except Exception as e: warnings.warn(f"Could not retrieve version '{version}': {e}") continue logger.info(f"Retrieved version '{version}'.") # or if the version is "dev" or a branch rather than a tag elif version == "dev" or version not in get_versions(): # Download with pooch to temporary file and compare to cached version json_path_temp = json_path + ".tmp" try: # Delete temp file if it exists if os.path.exists(json_path_temp): os.remove(json_path_temp) # Retrieve json_path_temp = pooch.retrieve( path=retrieve_to_dir, url=REPO_RAW_URL.format( version=(_dev_branch if version == "dev" else version), _json_export=json_export, _github_org=_github_org, ), known_hash=None, fname=json_export + ".tmp", ) # Compare files if not cmp(json_path, json_path_temp, shallow=False): move(json_path_temp, json_path) logger.info(f"Updated version '{version}'.") else: os.remove(json_path_temp) except Exception as e: warnings.warn(f"Potential update for version '{version}' failed: {e}") # Store the path to the dreq.json in the json_paths dictionary json_paths[version] = json_path # Capture no correct export found for cached versions (offline mode) if not json_paths or json_paths == {}: raise ValueError( "The version(s) you requested are not cached. Please deactivate offline mode and try again." ) return json_paths
Retrieve the JSON file for the specified version
Parameters
version
:str
, optional- The version to retrieve. Can be 'latest', 'latest_stable', 'dev', or 'all' or a specific version, eg. '1.0.0'. (default is 'latest_stable').
**kwargs
- export : {'raw', 'release'}, optional Export type. Defaults to 'release'. offline : bool, optional Whether to disable online requests / retrievals. Defaults to False.
Returns
dict
- The path to the retrieved JSON file.
Raises
ValueError
- If the specified version is not found.
Warning
- If the specified version does not have the specified export type.
Warning
- If the known kwargs have an invalid value.
Warning
- If the specified version could not be downloaded or (if applicable) updated.