Source code for pudl.metadata.resources.ferc714

"""Tables definitions for data coming from the FERC Form 714."""

from typing import Any

[docs] RESOURCE_METADATA: dict[str, dict[str, Any]] = { "core_ferc714__respondent_id": { "description": { "additional_summary_text": "Respondent identification.", "additional_source_text": "(Part I, Schedule 1)", }, "schema": { "fields": [ "respondent_id_ferc714", "respondent_id_ferc714_csv", "respondent_id_ferc714_xbrl", "respondent_name_ferc714", "eia_code", ], "primary_key": ["respondent_id_ferc714"], }, "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "ferc714", }, "core_ferc714__hourly_planning_area_demand": { "description": { "additional_summary_text": "electricity demand by planning area.", "additional_source_text": "(Part III, Schedule 2a)", "usage_warnings": [ { "type": "custom", "description": "The datetime_utc timestamps have been cleaned due to inconsistent datetime reporting. See below for additional details.", } ], "additional_details_text": ( "This table includes data from the pre-2021 CSV raw source " "as well as the newer 2021 through present XBRL raw source.\n\n" "This table includes three respondent ID columns: one from the " "CSV raw source, one from the XBRL raw source and another that is PUDL-derived " "that links those two source ID's together. This table has filled in source IDs " "for all records so you can select the full timeseries for a given respondent from " "any of these three IDs.\n\nAn important " "caveat to note is that there was some cleaning done to the datetime_utc " "timestamps. The Form 714 includes sparse documentation for respondents " "for how to interpret timestamps - the form asks respondents to provide " "24 instances of hourly demand for each day. The form is labeled with hour " "1-24. There is no indication if hour 1 begins at midnight.\n\nThe XBRL data " "contained several formats of timestamps. Most records corresponding to hour " "1 of the Form have a timestamp with hour 1 as T1. About two thirds of the records " "in the hour 24 location of the form have a timestamp with an hour reported as " "T24 while the remaining third report this as T00 of the next day. T24 is not a " "valid format for the hour of a datetime, so we convert these T24 hours into " "T00 of the next day. A smaller subset of the respondents reports the 24th hour " "as the last second of the day - we also convert these records to the T00 of the " "next day." ), }, "schema": { "fields": [ "respondent_id_ferc714", "respondent_id_ferc714_csv", "respondent_id_ferc714_xbrl", "report_date", "datetime_utc", "timezone", "demand_mwh", ], "primary_key": ["respondent_id_ferc714", "datetime_utc"], }, "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "ferc714", "create_database_schema": False, }, "out_ferc714__hourly_planning_area_demand": { "description": ( { "additional_summary_text": "electricity demand by planning area.", "additional_source_text": "(Part III, Schedule 2a)", "usage_warnings": [ "imputed_values", { "type": "custom", "description": ( "The datetime_utc timestamps have been cleaned due to " "inconsistent datetime reporting. See :ref:`core_ferc714__hourly_planning_area_demand` for additional details." ), }, ], "additional_details_text": ( "This table is based on :ref:`core_ferc714__hourly_planning_area_demand`, but adds " "imputed demand values where the original data was missing or anomalous. " "Codes explaining why values have been imputed can be found in the " ":ref:`core_pudl__codes_imputation_reasons` table." ), } ), "schema": { "fields": [ "respondent_id_ferc714", "respondent_id_ferc714_csv", "respondent_id_ferc714_xbrl", "report_date", "datetime_utc", "timezone", "demand_reported_mwh", "demand_imputed_pudl_mwh", "demand_imputed_pudl_mwh_imputation_code", ], "primary_key": ["respondent_id_ferc714", "datetime_utc"], }, "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "ferc714", "create_database_schema": False, }, "out_ferc714__respondents_with_fips": { "description": { "additional_summary_text": "Annual respondents with the county FIPS IDs for their service territories.", "additional_primary_key_text": ( "Note that the state and county FIPS columns can be null. Otherwise " "the natural primary key would be: " "['respondent_id_ferc714', 'report_date', 'county_id_fips']. The state " "FIPS code is not part of the primary key because the first two digits " "of the county FIPS code indicate the state." ), }, "schema": { "fields": [ "eia_code", "respondent_type", "respondent_id_ferc714", "respondent_name_ferc714", "report_date", "balancing_authority_id_eia", "balancing_authority_code_eia", "balancing_authority_name_eia", "utility_id_eia", "utility_name_eia", "state", "county", "state_id_fips", "county_id_fips", ] }, "sources": ["ferc714", "censusdp1tract", "eia861"], "field_namespace": "ferc714", "etl_group": "outputs", }, "out_ferc714__georeferenced_respondents": { "description": { "additional_summary_text": "Annual summary of demand and other information about FERC-714 respondents.", "additional_details_text": """This table differs from :ref:`out_ferc714__summarized_demand` in that it also includes a geometry column describing the respondent's service territory in each year. These service territories are based on the counties that the corresponding EIA-861 respondent reported serving in that year. There is sometimes ambiguity as to whether a FERC-714 respondent should be interpreted as an individual utility or a balancing authority. The ``respodent_type`` column indicates which type of entity has been assumed in determining the service territory from EIA-861 data.""", }, "schema": { "fields": [ "report_date", "respondent_id_ferc714", "geometry", "eia_code", "respondent_type", "respondent_name_ferc714", "balancing_authority_id_eia", "balancing_authority_code_eia", "balancing_authority_name_eia", "utility_id_eia", "utility_name_eia", "population", "area_km2", "demand_annual_mwh", ], "primary_key": ["respondent_id_ferc714", "report_date"], }, "sources": ["ferc714", "censusdp1tract", "eia861"], "field_namespace": "ferc714", "etl_group": "outputs", "create_database_schema": False, }, "out_ferc714__summarized_demand": { "description": { "additional_summary_text": ( "Summarized demand statistics and FERC-714 respondent attributes by respondent-year." ) }, "schema": { "fields": [ "report_date", "respondent_id_ferc714", "demand_annual_mwh", "population", "area_km2", "population_density_km2", "demand_annual_per_capita_mwh", "demand_density_mwh_km2", "eia_code", "respondent_type", "respondent_name_ferc714", "balancing_authority_id_eia", "balancing_authority_code_eia", "balancing_authority_name_eia", "utility_id_eia", "utility_name_eia", ], "primary_key": ["respondent_id_ferc714", "report_date"], }, "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "outputs", }, "core_ferc714__yearly_planning_area_demand_forecast": { "description": { "additional_summary_text": "10-year forecasted summer and winter peak demand and annual net energy " "per planning area.", "additional_source_text": "(Part III, Schedule 2b)", "usage_warnings": [ { "type": "custom", "description": ( "This table " "includes data from the pre-2021 CSV raw source as well as the newer 2021 " "through present XBRL raw source. We created the respondent_id_ferc714 " "field to blend disparate IDs from the CSV and XBRL data over time. See " "the :ref:`core_ferc714__respondent_id` table for links to the original source IDs." ), }, { "type": "custom", "description": "There is a small handful of respondents (~11) that report more than 10 " "years and an even smaller handful that report less than 10 (~9).", }, ], "additional_details_text": ( "This table contains forecasted net demand (MWh) as well as summer and winter " "peak demand (MW) for the next ten years after after the report_year." ), }, "schema": { "fields": [ "respondent_id_ferc714", "report_year", "forecast_year", "summer_peak_demand_forecast_mw", "winter_peak_demand_forecast_mw", "net_demand_forecast_mwh", ], "primary_key": ["respondent_id_ferc714", "report_year", "forecast_year"], }, "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "ferc714", "create_database_schema": True, }, "core_pudl__assn_ferc714_pudl_respondents": { "description": { "table_type_code": "entity", "additional_summary_text": "PUDL-derived FERC 714 respondent IDs.", "additional_details_text": ( "These IDs are used to connect " "older CSV data which uses different respondent IDs than the newer XBRL entity IDs. " "These IDs are manually assigned when new FERC 714 data is integrated, and any " "newly found utilities are added to " "the list with a new ID. " "This table is read in from a CSV stored in the PUDL " "repository: src/pudl/package_data/glue/respondent_id_ferc714.xlsx" ), }, "schema": { "fields": ["respondent_id_ferc714"], "primary_key": ["respondent_id_ferc714"], "foreign_key_rules": {"fields": [["respondent_id_ferc714"]]}, }, "etl_group": "glue", "field_namespace": "pudl", "sources": ["pudl", "ferc714"], }, "core_pudl__assn_ferc714_csv_pudl_respondents": { "description": { "additional_summary_text": "PUDL-assigned FERC714 respondent IDs and native FERC714 CSV respondent IDs originally reported as ``respondent_id``.", }, "schema": { "fields": ["respondent_id_ferc714", "respondent_id_ferc714_csv"], "primary_key": ["respondent_id_ferc714", "respondent_id_ferc714_csv"], }, "etl_group": "glue", "field_namespace": "pudl", "sources": ["pudl", "ferc714"], }, "core_pudl__assn_ferc714_xbrl_pudl_respondents": { "description": { "additional_summary_text": "PUDL-assigned FERC714 respondent IDs and native FERC714 XBRL respondent IDs originally reported as ``entity_id``.", }, "schema": { "fields": ["respondent_id_ferc714", "respondent_id_ferc714_xbrl"], "primary_key": ["respondent_id_ferc714", "respondent_id_ferc714_xbrl"], }, "etl_group": "glue", "field_namespace": "pudl", "sources": ["pudl", "ferc714"], }, }
"""FERC Form 714 resource attributes by PUDL identifier (``resource.name``). See :func:`pudl.metadata.helpers.build_foreign_keys` for the expected format of ``foreign_key_rules``. """