Skip to content

Figure number of images per country

Install marss2l package

pip install marss2l
%%time
import matplotlib
from marss2l.utils import setup_stream_logger
import logging
from huggingface_hub import hf_file_system
from marss2l.huggingface import REPO_ID

logger = logging.getLogger(__name__)
setup_stream_logger(logger)

matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

C0 = "#648FFF"
C1 = "#785EF0"
C2 = "#DC267F"
C3 = "#FE6100"
C4 = "#FFB000" # #FFB000

import os

fs = hf_file_system.HfFileSystem()
os.makedirs("figures", exist_ok=True)
/home/gonzalo/mambaforge/envs/marss2ltacopy312/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

CPU times: user 2.1 s, sys: 1.94 s, total: 4.04 s
Wall time: 1.97 s

%%time 

from marss2l import loaders

LOCS_TRAINING_IN_DISTRIBUTION = loaders.LOCS_TRAINING_ABLATION
csv_path = f"datasets/{REPO_ID}/validated_images_all.csv" 
dataframe_data_traintest = loaders.read_csv(csv_path, 
                                            add_columns_for_analysis=True, 
                                            fs=fs, split="all", add_case_study=True, 
                                            add_loc_type=True)
dataframe_data_traintest["country"] = dataframe_data_traintest.apply(lambda row: "Offshore" if row.offshore else row.country, 
                                                                     axis=1)
CPU times: user 51.4 s, sys: 2 s, total: 53.4 s
Wall time: 1min 2s

# Count number of images with more than one plume
# from shapely import wkt
# import geopandas as gpd
# geometries = dataframe_data_traintest.plume.apply(wkt.loads)
# dataframe_data_traintest["nplumes"] = geometries.apply(lambda geom: 0 if geom.is_empty else len(geom.geoms))
# dataframe_data_traintest.nplumes.value_counts()
dataframe_data_traintest = dataframe_data_traintest[~dataframe_data_traintest.location_name.isin(loaders.LOCATIONS_CONTROL_RELEASES)].copy()
rename_splits = {"test_2023": "test", "train_2023": "train", "val_2023": "val"}
dataframe_data_traintest["split_name"] = dataframe_data_traintest["split_name"].apply(lambda x: rename_splits[x] if x in rename_splits else x)
nplumes_total = dataframe_data_traintest.loc[dataframe_data_traintest.split_name.isin(["train","val","test"]),"isplume"].sum()
nimages_total = dataframe_data_traintest.loc[dataframe_data_traintest.split_name.isin(["train","val","test"])].shape[0]
nlocs_total = dataframe_data_traintest.location_name.nunique()
print(f"In total {nimages_total} images containing {nplumes_total} emissions over {nlocs_total} distinct emitters")
In total 87887 images containing 5643 emissions over 1315 distinct emitters

images_by_yearmonth = dataframe_data_traintest.groupby(["country","year_month"])["isplume"].agg(["count","sum"]).rename({"count": "# images", "sum": "# plumes"}, axis=1)
images_by_yearmonth["# locs"] = dataframe_data_traintest.groupby(["country","year_month"])["location_name"].nunique()
images_by_yearmonth = images_by_yearmonth.reset_index()
images_by_yearmonth
country year_month # images # plumes # locs
0 Algeria 2013-03-01 1 0 1
1 Algeria 2013-04-01 1 0 1
2 Algeria 2013-06-01 2 0 1
3 Algeria 2013-08-01 1 1 1
4 Algeria 2013-09-01 1 1 1
... ... ... ... ... ...
1250 Yemen 2024-08-01 23 2 3
1251 Yemen 2024-09-01 31 10 3
1252 Yemen 2024-10-01 32 11 3
1253 Yemen 2024-11-01 28 16 3
1254 Yemen 2024-12-01 36 8 4

1255 rows × 5 columns

images_by_yearmonth_casestudy = dataframe_data_traintest.groupby(["case_study","year_month"])["isplume"].agg(["count","sum"]).rename({"count": "# images", "sum": "# plumes"}, axis=1)
images_by_yearmonth_casestudy["# locs"] = dataframe_data_traintest.groupby(["case_study","year_month"])["location_name"].nunique()
images_by_yearmonth_casestudy = images_by_yearmonth_casestudy.reset_index()
images_by_yearmonth_casestudy
case_study year_month # images # plumes # locs
0 Algeria 2013-03-01 1 0 1
1 Algeria 2013-04-01 1 0 1
2 Algeria 2013-06-01 2 0 1
3 Algeria 2013-08-01 1 1 1
4 Algeria 2013-09-01 1 1 1
... ... ... ... ... ...
869 Venezuela 2024-08-01 3 0 2
870 Venezuela 2024-09-01 2 0 2
871 Venezuela 2024-10-01 4 1 2
872 Venezuela 2024-11-01 14 1 4
873 Venezuela 2024-12-01 14 0 5

874 rows × 5 columns

Time series total number of images for Figure 1b

from marss2l.plot import images_by_month
from importlib import reload
import matplotlib.pyplot as plt
from datetime import datetime
reload(images_by_month)

fig, ax = plt.subplots(1, 1, figsize=(11, 2.75))
images_by_month.plot_images_by_month(dataframe_data_traintest, ax, datetime(2018, 1, 1),
                                    step_in_months=3,
                                    datetime_end=datetime(2025, 2, 1))

plt.tight_layout()
plt.savefig("figures/fig1_ts_images_detections.pdf")
No description has been provided for this image
import pandas as pd
pd.set_option('display.max_rows', 300)

Number of images in case studies

All data

case_studies_plot = loaders.ORDER_CASE_STUDIES[:6]

ncountries = len(case_studies_plot)
fig, ax = plt.subplots(ncountries,1, figsize=(11, ncountries*2.75), sharex=True, tight_layout=True)

for i, country in enumerate(case_studies_plot):
    df_plot = dataframe_data_traintest[dataframe_data_traintest.case_study == country]
    images_by_month.plot_images_by_month(df_plot, ax=ax[i],
                                         loc_legend="upper left", 
                                         plot_legend=  i == 0,
                                         datetime_start=datetime(2018,1,1),
                                         datetime_end=datetime(2025,1,1))
    ax[i].set_title(country)

plt.savefig("figures/fig1_ts_images_detections_by_country_1.pdf")
No description has been provided for this image
case_studies_plot = loaders.ORDER_CASE_STUDIES[6:]

ncountries = len(case_studies_plot)
fig, ax = plt.subplots(ncountries,1, figsize=(11, ncountries*2.75), sharex=True, tight_layout=True)

for i, country in enumerate(case_studies_plot):
    df_plot = dataframe_data_traintest[dataframe_data_traintest.case_study == country]
    images_by_month.plot_images_by_month(df_plot, ax=ax[i],
                                         loc_legend="upper left", 
                                         plot_legend= i == 0,
                                         datetime_start=datetime(2018,1,1),
                                         datetime_end=datetime(2025,1,1))
    ax[i].set_title(country)
plt.savefig("figures/fig1_ts_images_detections_by_country_2.pdf")
No description has been provided for this image

Distribution of test data by case study

from matplotlib import gridspec
import seaborn as sns

# Create a figure with a specified size
fig = plt.figure(figsize=(10,2.5*len(loaders.ORDER_CASE_STUDIES)), 
                 layout="constrained")

# Create a GridSpec with 2 rows and 2 columns, specifying the width ratios
gs = gridspec.GridSpec(len(loaders.ORDER_CASE_STUDIES), 2, 
                       width_ratios=[1, 4], figure=fig)

# Create subplots using the GridSpec and share the x-axis between specific subplots
ax1 = fig.add_subplot(gs[0, 0])  # Narrower subplot
ax2 = fig.add_subplot(gs[0, 1])  # Wider subplot


for i, country in enumerate(loaders.ORDER_CASE_STUDIES):
    df_plot_hist_fluxrate = dataframe_data_traintest[(dataframe_data_traintest.split_name == "test")  & (dataframe_data_traintest.case_study == country)]
    if i == 0:
        ax1iter = ax1
        ax2iter = ax2
    else:
        ax1iter = fig.add_subplot(gs[i, 0], sharex=ax1)  # Narrower subplot, sharing x-axis with ax1
        ax2iter = fig.add_subplot(gs[i, 1], sharex=ax2)  # Wider subplot, sharing x-axis with ax2

    sns.countplot(data=df_plot_hist_fluxrate[df_plot_hist_fluxrate.isplume], 
                  x="interval_ch4_fluxrate_str", ax=ax2iter,color=C1)
    sns.countplot(data=df_plot_hist_fluxrate, x="isplume", ax=ax1iter,color=C1)
    ax1iter.set_ylabel("# images")
    ax2iter.set_ylabel("# plumes")
    ax2iter.set_title(country)
    # if i != len(loaders.ORDER_CASE_STUDIES) -1:
        # ax2iter.xaxis.set_visible(False)
    #    ax1iter.xaxis.set_visible(False)
    # ax2iter.grid(axis="x")
    ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
    ax2iter.set_xlabel("")
    ax1iter.set_xlabel("")

plt.xticks(rotation=30)

ax1.xaxis.set_visible(False)
# ax2.xaxis.set_visible(False)

ax2iter.set_xlabel("Flux rate (t/h)")
ax1iter.set_xlabel("")
ax1iter.set_xticks([False, True],["no plume","plume"])
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)
/tmp/ipykernel_70658/1239688717.py:36: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax2iter.set_xticklabels(ax2iter.get_xticklabels(), rotation=30)

[<matplotlib.axis.XTick at 0x7fa24906d2b0>,
 <matplotlib.axis.XTick at 0x7fa24906d250>]
No description has been provided for this image