import logging
from marss2l.utils import setup_stream_logger, get_remote_filesystem, pathjoin
logger = logging.getLogger(__name__)
setup_stream_logger(logger, level=logging.DEBUG)
IEA 2023 Methane Emissions data
CSV file downloaded from IEA data portal.
import pandas as pd
worlddata = pd.read_csv("IEA-MethaneEmissionsComparison-World.csv")
worlddata = worlddata[~worlddata.country.isna()].copy()
worlddata
Our test set is stratified in 11 geographical regions:
- Algeria, Egypt, Libya, Venezuela, ,Arabian peninsula, Iran, Iraq, Syria, United States of America, Uzbekistan & Kazakhstan, Turkmenistan, Offshore and Rest.
Set the MARS-S2L case study that the country belongs to.
from marss2l import loaders
worlddata["case_study_marss2l"] = worlddata.country.apply(loaders._set_case_study)
case_studies = worlddata.case_study_marss2l.unique()
for ce in case_studies:
countries = worlddata.country[worlddata.case_study_marss2l == ce].unique().tolist()
countries = sorted([c for c in countries if isinstance(c,str)])
print(f"{ce} -> {countries}")
print(case_studies)
Select data from the Oil and Gas sector
We selected data with type "Energy" and and segment onshore or offshore gas.
segments = ["Onshore oil", "Offshore oil", "Onshore gas", "Offshore gas"]
worlddata_og_upstream_all = worlddata[(worlddata.type=="Energy") & worlddata.segment.isin(segments)].copy()
worlddata_og_upstream_all["offshore"] = worlddata_og_upstream_all.segment.apply(lambda x: "Offshore" in x)
worlddata_og_upstream_all.reason.value_counts()
Compute percentage of emissions on each of the 12 geographical regions
worlddata_og_upstream_onshore = worlddata_og_upstream_all[~worlddata_og_upstream_all.offshore].groupby("case_study_marss2l")[["emissions"]].sum().reset_index()
# worlddata_og_upstream_onshore["percentage emissions"] = worlddata_og_upstream_onshore.emissions / worlddata_og_upstream_onshore.emissions.sum() * 100
emissions_offshore = worlddata_og_upstream_all[worlddata_og_upstream_all.offshore][["emissions"]].sum().item()
emissions_offshore_df = pd.DataFrame([{"case_study_marss2l": "Offshore", "emissions": emissions_offshore}])
worlddata_og_upstream = pd.concat([worlddata_og_upstream_onshore, emissions_offshore_df],
ignore_index=True)
worlddata_og_upstream["percentage emissions"] = worlddata_og_upstream.emissions / worlddata_og_upstream.emissions.sum() * 100
worlddata_og_upstream = worlddata_og_upstream.sort_values("emissions",ascending=False).reset_index(drop=True)
# worlddata_og_upstream = pd.concat([worlddata_og_upstream_onshore,],axis=0)
worlddata_og_upstream
Best estimate: including offshore excluding countries not in case-studies
worlddata_og_upstream.loc[worlddata_og_upstream.case_study_marss2l != "Rest", "percentage emissions"].sum()
More conservative estimate: excluding offshore and Venezuela (as detection limit in Venezuela and Offshore is much higher)
worlddata_og_upstream.loc[~worlddata_og_upstream.case_study_marss2l.isin(["Rest", "Offshore", "Venezuela"]), "percentage emissions"].sum()
Percentage of emissions by country
worlddata_og_upstream_by_country_onshore = worlddata_og_upstream_all[~worlddata_og_upstream_all.offshore].groupby(["country"])[["emissions"]].sum().reset_index()
# Concat offshore
emissions_offshore_df = pd.DataFrame([{"country": "Offshore", "emissions": emissions_offshore}])
worlddata_og_upstream_by_country = pd.concat([worlddata_og_upstream_by_country_onshore, emissions_offshore_df],
ignore_index=True)
worlddata_og_upstream_by_country["case_study_marss2l"] = worlddata_og_upstream_by_country.country.apply(loaders._set_case_study)
worlddata_og_upstream_by_country["percentage emissions"] = worlddata_og_upstream_by_country.emissions / worlddata_og_upstream_by_country.emissions.sum() * 100
worlddata_og_upstream_by_country = worlddata_og_upstream_by_country.sort_values("emissions",ascending=False).reset_index(drop=True)
worlddata_og_upstream_by_country.iloc[:25]
worlddata_og_upstream_by_country.iloc[:25]["percentage emissions"].sum()
# Create a copy of the dataframe to avoid modifying the original
df_to_print = worlddata_og_upstream_by_country.iloc[:25][["country", "case_study_marss2l", "percentage emissions"]].copy()
# Replace "Rest" with "-" in the case_study_marss2l column
df_to_print["case_study_marss2l"] = df_to_print["case_study_marss2l"].replace("Rest", "-")
# Print LaTeX table with formatted percentage (1 decimal place)
print(df_to_print.to_latex(index=False, float_format="%.2f"))
Countries with significant percentage of O&G upstream emissions not covered in the MARS-S2L dataset
worlddata_og_upstream_by_country[worlddata_og_upstream_by_country.case_study_marss2l == "Rest"].iloc[:30]