from marss2l.utils import setup_stream_logger, get_remote_filesystem, pathjoin

logger = setup_stream_logger()

IEA 2023 Methane Emissions data

CSV file downloaded from IEA data portal.

import pandas as pd

worlddata = pd.read_csv("IEA-MethaneEmissionsComparison-World.csv")
worlddata =  worlddata[~worlddata.country.isna()].copy()
worlddata

	region	country	emissions	source	type	segment	reason	baseYear	emissionsRank	energyRank	notes
0	Africa	Algeria	261	IEA	Agriculture	Total	All	2019-2021	19.0	11.0	Average based on United Nations Framework Conv...
1	Africa	Algeria	0	IEA	Energy	Bioenergy	All	2023	19.0	11.0	Estimates from end-uses are for 2020 or 2021 (...
2	Africa	Algeria	45	IEA	Energy	Gas pipelines and LNG facilities	Fugitive	2023	19.0	11.0	NaN
3	Africa	Algeria	71	IEA	Energy	Gas pipelines and LNG facilities	Vented	2023	19.0	11.0	NaN
4	Africa	Algeria	191	IEA	Energy	Onshore gas	Fugitive	2023	19.0	11.0	NaN
...	...	...	...	...	...	...	...	...	...	...	...
1479	Russia & Caspian	Uzbekistan	20	IEA	Energy	Other from oil and gas	All	2023	33.0	25.0	Estimates from end-uses are for 2020 or 2021 (...
1480	Russia & Caspian	Uzbekistan	57	IEA	Energy	Satellite-detected large oil and gas emissions	All	2023	33.0	25.0	NaN
1481	Russia & Caspian	Uzbekistan	933	IEA	Energy	Total	All	2023	33.0	25.0	Estimates from end-uses are for 2020 or 2021 (...
1482	Russia & Caspian	Uzbekistan	6	IEA	Other	Total	All	2019-2021	33.0	25.0	Average based on United Nations Framework Conv...
1483	Russia & Caspian	Uzbekistan	281	IEA	Waste	Total	All	2019-2021	33.0	25.0	Average based on United Nations Framework Conv...

1484 rows × 11 columns

Our test set is stratified in 11 geographical regions:

Algeria, Egypt, Libya, Venezuela, ,Arabian peninsula, Iran, Iraq, Syria, United States of America, Uzbekistan & Kazakhstan, Turkmenistan, Offshore and Rest.

Set the MARS-S2L case study that the country belongs to.

from marss2l import loaders
worlddata["case_study_marss2l"] = worlddata.country.apply(loaders._set_case_study)

case_studies = worlddata.case_study_marss2l.unique()
for ce in case_studies:
    countries = worlddata.country[worlddata.case_study_marss2l == ce].unique().tolist()
    countries = sorted([c for c in countries if isinstance(c,str)])
    print(f"{ce} -&gt; {countries}")

Algeria -> ['Algeria']
Rest -> ['Angola', 'Argentina', 'Australia', 'Azerbaijan', 'Bangladesh', 'Benin', 'Bolivia', 'Botswana', 'Brazil', 'Brunei', 'Cameroon', 'Canada', 'Chad', 'China', 'Colombia', 'Congo', "Cote d'Ivoire", 'Cuba', 'Democratic Republic of Congo', 'Denmark', 'Ecuador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'France', 'Gabon', 'Germany', 'Ghana', 'Guyana', 'India', 'Indonesia', 'Israel', 'Italy', 'Japan', 'Jordan', 'Kenya', 'Korea', 'Lebanon', 'Malaysia', 'Mexico', 'Mongolia', 'Morocco', 'Mozambique', 'Namibia', 'Netherlands', 'New Zealand', 'Niger', 'Nigeria', 'Norway', 'Other', 'Other EU17 countries', 'Other EU7 countries', 'Other countries in Europe', 'Other countries in Southeast Asia', 'Pakistan', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Romania', 'Russia', 'Senegal', 'Slovenia', 'South Africa', 'South Sudan', 'Sudan', 'Sweden', 'Tanzania', 'Thailand', 'Togo', 'Trinidad and Tobago', 'Tunisia', 'Ukraine', 'United Kingdom', 'Uruguay', 'Viet Nam']
Egypt -> ['Egypt']
Libya -> ['Libya']
Venezuela -> ['Venezuela']
Arabian peninsula -> ['Bahrain', 'Kuwait', 'Oman', 'Qatar', 'Saudi Arabia', 'United Arab Emirates', 'Yemen']
Iran (Islamic Republic of) -> ['Iran']
Iraq -> ['Iraq']
Syrian Arab Republic -> ['Syria']
United States of America -> ['United States']
Uzbekistan & Kazakhstan -> ['Kazakhstan', 'Uzbekistan']
Turkmenistan -> ['Turkmenistan']

print(case_studies)

['Algeria' 'Rest' 'Egypt' 'Libya' 'Venezuela' 'Arabian peninsula'
 'Iran (Islamic Republic of)' 'Iraq' 'Syrian Arab Republic'
 'United States of America' 'Uzbekistan & Kazakhstan' 'Turkmenistan']

Select data from the Oil and Gas sector

We selected data with type "Energy" and and segment onshore or offshore gas.

segments = ["Onshore oil", "Offshore oil", "Onshore gas", "Offshore gas"]
worlddata_og_upstream_all = worlddata[(worlddata.type=="Energy") &amp; worlddata.segment.isin(segments)].copy()
worlddata_og_upstream_all["offshore"] = worlddata_og_upstream_all.segment.apply(lambda x: "Offshore" in x)

worlddata_og_upstream_all.reason.value_counts()

reason
Fugitive    250
Vented      250
Flared      120
Name: count, dtype: int64

Compute percentage of emissions on each of the 12 geographical regions

worlddata_og_upstream_onshore = worlddata_og_upstream_all[~worlddata_og_upstream_all.offshore].groupby("case_study_marss2l")[["emissions"]].sum().reset_index()
# worlddata_og_upstream_onshore["percentage emissions"] = worlddata_og_upstream_onshore.emissions / worlddata_og_upstream_onshore.emissions.sum() * 100

emissions_offshore = worlddata_og_upstream_all[worlddata_og_upstream_all.offshore][["emissions"]].sum().item()
emissions_offshore_df = pd.DataFrame([{"case_study_marss2l": "Offshore", "emissions": emissions_offshore}])
worlddata_og_upstream = pd.concat([worlddata_og_upstream_onshore, emissions_offshore_df], 
                                  ignore_index=True)
worlddata_og_upstream["percentage emissions"] = worlddata_og_upstream.emissions / worlddata_og_upstream.emissions.sum() * 100
worlddata_og_upstream = worlddata_og_upstream.sort_values("emissions",ascending=False).reset_index(drop=True)
# worlddata_og_upstream = pd.concat([worlddata_og_upstream_onshore,],axis=0)
worlddata_og_upstream

	case_study_marss2l	emissions	percentage emissions
0	Rest	19319	29.445655
1	Offshore	15011	22.879483
2	United States of America	10133	15.444527
3	Arabian peninsula	4113	6.268957
4	Iran (Islamic Republic of)	3645	5.555640
5	Algeria	2572	3.920194
6	Venezuela	2542	3.874468
7	Iraq	2259	3.443125
8	Turkmenistan	1915	2.918807
9	Uzbekistan & Kazakhstan	1818	2.770961
10	Libya	1566	2.386868
11	Syrian Arab Republic	381	0.580713
12	Egypt	335	0.510601

Best estimate: including offshore excluding countries not in case-studies

worlddata_og_upstream.loc[worlddata_og_upstream.case_study_marss2l != "Rest", "percentage emissions"].sum()

70.55434467832157

More conservative estimate: excluding offshore and Venezuela (as detection limit in Venezuela and Offshore is much higher)

worlddata_og_upstream.loc[~worlddata_og_upstream.case_study_marss2l.isin(["Rest", "Offshore", "Venezuela"]), "percentage emissions"].sum()

43.8003932387325

Percentage of emissions by country

worlddata_og_upstream_by_country_onshore = worlddata_og_upstream_all[~worlddata_og_upstream_all.offshore].groupby(["country"])[["emissions"]].sum().reset_index()
# Concat offshore
emissions_offshore_df = pd.DataFrame([{"country": "Offshore", "emissions": emissions_offshore}])
worlddata_og_upstream_by_country = pd.concat([worlddata_og_upstream_by_country_onshore, emissions_offshore_df], 
                                             ignore_index=True)

worlddata_og_upstream_by_country["case_study_marss2l"] = worlddata_og_upstream_by_country.country.apply(loaders._set_case_study)
worlddata_og_upstream_by_country["percentage emissions"] = worlddata_og_upstream_by_country.emissions / worlddata_og_upstream_by_country.emissions.sum() * 100
worlddata_og_upstream_by_country = worlddata_og_upstream_by_country.sort_values("emissions",ascending=False).reset_index(drop=True)
worlddata_og_upstream_by_country.iloc[:25]

	country	emissions	case_study_marss2l	percentage emissions
0	Offshore	15011	Offshore	22.879483
1	United States	10133	United States of America	15.444527
2	Russia	9525	Rest	14.517825
3	Iran	3645	Iran (Islamic Republic of)	5.555640
4	Algeria	2572	Algeria	3.920194
5	Venezuela	2542	Venezuela	3.874468
6	Iraq	2259	Iraq	3.443125
7	Canada	2202	Rest	3.356247
8	China	1965	Rest	2.995016
9	Turkmenistan	1915	Turkmenistan	2.918807
10	Libya	1566	Libya	2.386868
11	Saudi Arabia	1338	Arabian peninsula	2.039354
12	Kazakhstan	1180	Uzbekistan & Kazakhstan	1.798534
13	Kuwait	961	Arabian peninsula	1.464738
14	Argentina	945	Rest	1.440351
15	United Arab Emirates	827	Arabian peninsula	1.260498
16	Nigeria	684	Rest	1.042540
17	Uzbekistan	638	Uzbekistan & Kazakhstan	0.972428
18	Oman	626	Arabian peninsula	0.954137
19	Indonesia	556	Rest	0.847445
20	Mexico	391	Rest	0.595955
21	Syria	381	Syrian Arab Republic	0.580713
22	Pakistan	367	Rest	0.559374
23	Egypt	335	Egypt	0.510601
24	Colombia	325	Rest	0.495359

worlddata_og_upstream_by_country.iloc[:25]["percentage emissions"].sum()

95.85422731637426

# Create a copy of the dataframe to avoid modifying the original
df_to_print = worlddata_og_upstream_by_country.iloc[:25][["country", "case_study_marss2l", "percentage emissions"]].copy()

# Replace "Rest" with "-" in the case_study_marss2l column
df_to_print["case_study_marss2l"] = df_to_print["case_study_marss2l"].replace("Rest", "-")

# Print LaTeX table with formatted percentage (1 decimal place)
print(df_to_print.to_latex(index=False, float_format="%.2f"))

\begin{tabular}{llr}
\toprule
country & case_study_marss2l & percentage emissions \\
\midrule
Offshore & Offshore & 22.88 \\
United States & United States of America & 15.44 \\
Russia & - & 14.52 \\
Iran & Iran (Islamic Republic of) & 5.56 \\
Algeria & Algeria & 3.92 \\
Venezuela & Venezuela & 3.87 \\
Iraq & Iraq & 3.44 \\
Canada & - & 3.36 \\
China & - & 3.00 \\
Turkmenistan & Turkmenistan & 2.92 \\
Libya & Libya & 2.39 \\
Saudi Arabia & Arabian peninsula & 2.04 \\
Kazakhstan & Uzbekistan & Kazakhstan & 1.80 \\
Kuwait & Arabian peninsula & 1.46 \\
Argentina & - & 1.44 \\
United Arab Emirates & Arabian peninsula & 1.26 \\
Nigeria & - & 1.04 \\
Uzbekistan & Uzbekistan & Kazakhstan & 0.97 \\
Oman & Arabian peninsula & 0.95 \\
Indonesia & - & 0.85 \\
Mexico & - & 0.60 \\
Syria & Syrian Arab Republic & 0.58 \\
Pakistan & - & 0.56 \\
Egypt & Egypt & 0.51 \\
Colombia & - & 0.50 \\
\bottomrule
\end{tabular}

Countries with significant percentage of O&G upstream emissions not covered in the MARS-S2L dataset

worlddata_og_upstream_by_country[worlddata_og_upstream_by_country.case_study_marss2l == "Rest"].iloc[:30]

	country	emissions	case_study_marss2l	percentage emissions
2	Russia	9525	Rest	14.517825
7	Canada	2202	Rest	3.356247
8	China	1965	Rest	2.995016
14	Argentina	945	Rest	1.440351
16	Nigeria	684	Rest	1.042540
19	Indonesia	556	Rest	0.847445
20	Mexico	391	Rest	0.595955
22	Pakistan	367	Rest	0.559374
24	Colombia	325	Rest	0.495359
25	India	311	Rest	0.474020
26	Ecuador	296	Rest	0.451158
27	Australia	139	Rest	0.211861
28	Romania	137	Rest	0.208813
30	Brazil	130	Rest	0.198144
31	Ukraine	125	Rest	0.190523
33	Peru	119	Rest	0.181378
34	Gabon	114	Rest	0.173757
35	Other	112	Rest	0.170708
37	Bolivia	104	Rest	0.158515
38	Bangladesh	102	Rest	0.155466
39	South Sudan	77	Rest	0.117362
40	Chad	58	Rest	0.088403
41	Other countries in Europe	57	Rest	0.086878
42	Sudan	56	Rest	0.085354
43	South Africa	46	Rest	0.070112
44	Angola	43	Rest	0.065540
45	Congo	41	Rest	0.062491
46	Thailand	40	Rest	0.060967
47	Tunisia	22	Rest	0.033532
48	Poland	22	Rest	0.033532