Code to reproduce the controlled release plots
This code loads the results from the controlled releases experiment that are stored in Hugging Face.
To eval the model in the controlled releases run:
# Run eval in the controlled releases MARS S2L model
python -m marss2l.eval_final \
--split control_releases_test \
--output_dir trained_models/MARSS2L_20250326 \
--device cpu \
--num_workers 16\
--suffix_output controlled_releases\
--csv_path /path/qto/localdir/MARS-S2L/validated_images_all.csv
Install marss2l package
pip install marss2l
import pandas as pd
import matplotlib
import os
from huggingface_hub import hf_file_system
from marss2l.huggingface import REPO_ID
fs = hf_file_system.HfFileSystem()
from marss2l.plot import C0, C1, C2, C3, C4
os.makedirs("figures", exist_ok=True)
releases_path = f"datasets/{REPO_ID}/data/control_releases/releases_cleaned_up_data.csv"
with fs.open(releases_path) as f:
releases_gt = pd.read_csv(f)
releases_gt_tile_indexed = releases_gt.set_index("tile")
releases_gt_tile_indexed
path_test_control_releases = f"datasets/{REPO_ID}/trained_models/MARSS2L_20250326/preds_control_releases_testth100.csv"
with fs.open(path_test_control_releases) as f:
model_output = pd.read_csv(f)
model_output["experiment"] = model_output.tile.apply(lambda x: releases_gt_tile_indexed.loc[x,"experiment"])
model_output["ch4_kgh_mean"] = model_output.tile.apply(lambda x: releases_gt_tile_indexed.loc[x,"ch4_kgh_mean"])
model_output["Satellite"] = model_output.tile.apply(lambda x: releases_gt_tile_indexed.loc[x,"Satellite"])
model_output["tile_date"] = model_output.tile.apply(lambda x: releases_gt_tile_indexed.loc[x,"tile_date"])
model_output.loc[pd.isna(model_output.Q), "Q"] = 0
model_output.loc[pd.isna(model_output.sigma_Q), "sigma_Q"] = 0
model_output = model_output.rename(columns={"Q": "pred_ch4_kgh_mean","sigma_Q": "pred_ch4_kgh_sigma"})
model_output["isplumepred"] = (model_output["scene_pred"] > 0.5).astype(int)
model_output[["scene_pred","target","ch4_kgh_mean","pred_ch4_kgh_mean","pred_ch4_kgh_sigma","TP","FP","TN","FN","experiment","tile_date","Satellite"]].sort_values("tile_date")
model_output["sensor"] = model_output.tile.apply(lambda x: x.split("_")[0])
Stats experiment for paragraph
model_output.groupby("experiment")["target"].agg(["count","sum"]).rename(columns={"count":"nimages", "sum":"nplumes"})
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(model_output.ch4_kgh_mean > 0,
model_output.pred_ch4_kgh_mean > 0)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(1,1,figsize=(5.75,3), tight_layout=True)
COLORS_SATELLITE = {"Sentinel-2": C0, "Landsat 8": C2}
ltys = []
for satellite in COLORS_SATELLITE:
dsherwin2023 = model_output[(model_output["Satellite"] == satellite) & (model_output["experiment"] == "Sherwin 2023")]
dsherwin2024 = model_output[(model_output["Satellite"] == satellite) & (model_output["experiment"] == "Sherwin 2024")]
lty = ax.scatter(x=dsherwin2023["ch4_kgh_mean"], y=dsherwin2023["scene_pred"], c=COLORS_SATELLITE[satellite],marker="o",alpha=.9)
lty2 = ax.scatter(x=dsherwin2024["ch4_kgh_mean"], y=dsherwin2024["scene_pred"], c=COLORS_SATELLITE[satellite], marker="x",alpha=.9)
ltys.append([lty, lty2])
ax.grid(axis="x")
ax.set_xlabel("Metered emission rate (t/h)")
ax.set_ylabel("predicted probability")
ticks = np.arange(0, 7100, 1000)
ax.set_xticks(ticks, (ticks/1000).astype(int))
legend1 = plt.legend(ltys[0],["Sherwin 2023", "Sherwin 2024"], loc="center right")
legend2 = plt.legend([l[0] for l in ltys], COLORS_SATELLITE.keys(), loc=4)
ax.add_artist(legend1)
plt.savefig("figures/scatterplot_controlled_releases.pdf")
# pyplot.gca().add_artist(legend1)
controlled_releases_positives = model_output[(model_output.pred_ch4_kgh_mean>0) & (model_output.ch4_kgh_mean>10)]
fig, ax = plt.subplots(1,1,figsize=(3,3), tight_layout=True)
for satame, data in controlled_releases_positives.reset_index().groupby("Satellite"):
# ax.scatter(insitu_survey_s2_l89_gt_indexed_pred_plume.ch4_kgh_mean, insitu_survey_s2_l89_gt_indexed_pred_plume.pred_ch4_kgh_mean, label=satame)
ax.errorbar(data.ch4_kgh_mean, data.pred_ch4_kgh_mean,
yerr=2*data.pred_ch4_kgh_sigma,fmt="o",label=satame,color=COLORS_SATELLITE[satame])
ax.legend()
ax.set_xlabel("Metered emission rate (t/h)")
ax.set_ylabel("Estimated emission rate (t/h)")
ticks = np.arange(0, 8100, 1000)
ax.set_xticks(ticks, (ticks/1000).astype(int))
ax.set_yticks(ticks, (ticks/1000).astype(int))
ax.plot([0,8000],[0,8000] ,c="black")
ax.set_xlim(0,8000)
ax.set_ylim(0,8000)
ax.grid()
plt.savefig("figures/quantified_controlled_releases.pdf")