Source code for EXOSIMS.util.getExoplanetArchive

import requests
import pandas  # type: ignore
import numpy as np
import os
from io import BytesIO
import glob
import time
from EXOSIMS.util.get_dirs import get_downloads_dir
from typing import Optional, Dict, Any
from requests.exceptions import ReadTimeout



[docs]
def queryExoplanetArchive(
    querystring: str, filename: Optional[str] = None
) -> pandas.DataFrame:
    """
    Query the exoplanet archive, optionally save results to disk, and return the
    result as a pandas dataframe.

    Args:
        querystring (str):
            Exact query string to use. Do not include format (csv will be specified).
            See: https://exoplanetarchive.ipac.caltech.edu/docs/TAP/usingTAP.html
            for details. A valid string is: "select+*+from+pscomppars"
        filename (str):
            Full path to save file.  If None (default) results are not written to disk.
            Data will be written in pickle format.

    Returns:
        pandas.DataFrame:
            Result of query
    """

    query = (
        """https://exoplanetarchive.ipac.caltech.edu/TAP/sync?"""
        """query={}&format=csv"""
    ).format(querystring)

    r = requests.get(query)
    data = pandas.read_csv(BytesIO(r.content))

    if filename is not None:
        data.to_pickle(filename)

    return data




[docs]
def cacheExoplanetArchiveQuery(
    basestr: str, querystring: str, forceNew: bool = False, **specs: Any
) -> pandas.DataFrame:
    """
    Look for cached query results, and return newest one.  If none exist, execute the
    query.

    Args:
        basestr (str):
            Base of the cache filename.
        querystring (str):
            Exact query string to use. See ``queryExoplanetArchive`` for details
        forceNew (bool):
            Run a fresh query even if results exist on disk.
        **specs (any):
            Any additional kewyords to pass to ``get_downloads_dir``

    Returns:
        pandas.DataFrame:
            Result of query
    """

    ddir = get_downloads_dir(**specs)

    # look for existing files and return newest
    if not (forceNew):
        files = glob.glob(os.path.join(ddir, f"{basestr}_*.pkl"))
        if files:
            files = np.sort(np.array(files))[-1]
            data = pandas.read_pickle(files)
            print(f"Loaded data from {files}")
            return data

    # if we're here, we need a fresh version
    filename = f"{basestr}_{time.strftime('%Y%m%d%H%M%S')}.pkl"
    filename = os.path.join(ddir, filename)

    return queryExoplanetArchive(querystring, filename=filename)




[docs]
def getExoplanetArchivePS(
    forceNew: bool = False, **specs: Dict[Any, Any]
) -> pandas.DataFrame:
    """
    Get the contents of the Exoplanet Archive's Planetary Systems table and cache
    results.  If a previous query has been saved to disk, load that.

    Args:
        forceNew (bool):
            Run a fresh query even if results exist on disk. Defaults False.
        **specs (any):
            Any additional kewyords to pass to ``cacheExoplanetArchiveQuery``

    Returns:
        pandas.DataFrame:
            Planetary Systems table
    """

    basestr = "exoplanetArchivePS"
    querystring = r"select+*+from+ps"

    return cacheExoplanetArchiveQuery(basestr, querystring, forceNew=forceNew, **specs)




[docs]
def getExoplanetArchivePSCP(forceNew: bool = False, **specs: Any) -> pandas.DataFrame:
    """
    Get the contents of the Exoplanet Archive's Planetary Systems Composite Parameters
    table and cache results.  If a previous query has been saved to disk, load that.

    Args:
        forceNew (bool):
            Run a fresh query even if results exist on disk.
        **specs (any):
            Any additional kewyords to pass to ``cacheExoplanetArchiveQuery``

    Returns:
        pandas.DataFrame:
            Planetary Systems composited parameters table
    """

    basestr = "exoplanetArchivePSCP"
    querystring = r"select+*+from+pscomppars"

    return cacheExoplanetArchiveQuery(basestr, querystring, forceNew=forceNew, **specs)




[docs]
def getHWOStars(forceNew: bool = False, **specs: Any) -> pandas.DataFrame:
    """
    Get the contents of the ExEP HWO Star List and cache results.  If a previous query
    has been saved to disk, load that.

    Args:
        forceNew (bool):
            Run a fresh query even if results exist on disk.
        **specs (any):
            Any additional kewyords to pass to ``cacheExoplanetArchiveQuery``

    Returns:
        pandas.DataFrame:
            Planetary Systems composited parameters table

    See: https://exoplanetarchive.ipac.caltech.edu/docs/2645_NASA_ExEP_Target_List_HWO_Documentation_2023.pdf  # noqa: E501
    """

    basestr = "HWOStarList"
    querystring = r"select+*+from+di_stars_exep"

    return cacheExoplanetArchiveQuery(basestr, querystring, forceNew=forceNew, **specs)




[docs]
def getExoplanetArchiveAliases(name: str) -> Optional[Dict[str, Any]]:
    """Query the exoplanet archive's system alias service and return results

    See: https://exoplanetarchive.ipac.caltech.edu/docs/sysaliases.html

    Args:
        name (str):
            Target name to resolve

    Returns:
        dict or None:
            Dictionary

    .. note::

        This has a tendency to get stuck when run in a loop. This is set up to
        fail after 10 seconds and retry once with a 30 second timeout.

    """

    query = (
        """https://exoplanetarchive.ipac.caltech.edu/cgi-bin/Lookup/"""
        f"""nph-aliaslookup.py?objname={name}"""
    )

    try:
        r = requests.get(query, timeout=10)
    except ReadTimeout:
        r = requests.get(query, timeout=30)

    data = r.json()

    if data["manifest"]["lookup_status"] != "OK":
        return {}

    return data["system"]