Source code for pudl.glue.ferc714

"""Extract and transform glue tables between FERC Form 714's CSV and XBRL raw sources."""

import importlib.resources

import pandas as pd

import pudl.logging_helpers

[docs] logger = pudl.logging_helpers.get_logger(__name__)
[docs] RESP_ID_FERC_MAP_CSV = ( importlib.resources.files("pudl.package_data.glue") / "respondent_id_ferc714.csv" )
"""Path to the PUDL ID mapping sheet with the plant map."""
[docs] def get_respondent_map_ferc714() -> pd.DataFrame: """Read in the manual CSV to XBRL FERC714 respondent mapping data.""" return pd.read_csv(RESP_ID_FERC_MAP_CSV).convert_dtypes()
[docs] def glue() -> dict[str : pd.DataFrame]: """Make the FERC 714 glue tables out of stored CSVs of association tables. This function was mirrored off of ferc1_eia.glue, but is much more paired down. """ respondent_map = get_respondent_map_ferc714() respondents_pudl_ids = ( respondent_map.loc[:, ["respondent_id_ferc714"]] .drop_duplicates("respondent_id_ferc714") .dropna(subset=["respondent_id_ferc714"]) ) respondents_csv_ids = ( respondent_map.loc[:, ["respondent_id_ferc714", "respondent_id_ferc714_csv"]] .drop_duplicates("respondent_id_ferc714_csv") .dropna(subset=["respondent_id_ferc714_csv"]) ) respondents_xbrl_ids = ( respondent_map.loc[:, ["respondent_id_ferc714", "respondent_id_ferc714_xbrl"]] .drop_duplicates("respondent_id_ferc714_xbrl") .dropna(subset=["respondent_id_ferc714_xbrl"]) ) glue_dfs = { "core_pudl__assn_ferc714_pudl_respondents": respondents_pudl_ids, "core_pudl__assn_ferc714_csv_pudl_respondents": respondents_csv_ids, "core_pudl__assn_ferc714_xbrl_pudl_respondents": respondents_xbrl_ids, } return glue_dfs