Stats dataset toareflectances
Install marss2l package
pip install marss2l
import logging
from marss2l.utils import setup_stream_logger, pathjoin
from huggingface_hub import hf_file_system
from marss2l.huggingface import REPO_ID
fs = hf_file_system.HfFileSystem()
logger = logging.getLogger(__name__)
setup_stream_logger(logger, level=logging.DEBUG)
from marss2l import loaders
import os
os.makedirs("figures", exist_ok=True)
Read validated images
csv_path = f"datasets/{REPO_ID}/validated_images_all.csv"
dataframe_data_traintest = loaders.read_csv(csv_path,
add_columns_for_analysis=True,
add_case_study=True,
split="all",
fs=fs)
dataframe_data_traintest.shape
dataframe_data_traintest.columns
dataframe_test = dataframe_data_traintest[dataframe_data_traintest.split_name=="test_2023"].copy()
dataframe_test.shape
ids_test = set(dataframe_test.id_loc_image)
len(ids_test)
Load MARS-S2L stats
import pandas as pd
import uuid
import numpy as np
stats_path = f"datasets/{REPO_ID}/data/stats_dataset.csv"
with fs.open(stats_path) as fh:
dataframe_stats_original = pd.read_csv(fh)
dataframe_stats_original["id_loc_image"] = dataframe_stats_original["id_loc_image"].apply(uuid.UUID)
dataframe_stats_original["isplume"] = dataframe_stats_original.isplume.astype(bool)
dataframe_stats_original.shape
# Fill noplume stats with normal stats
from tqdm import tqdm
for cnoplume in tqdm(dataframe_stats_original.columns):
if "_noplume" in cnoplume:
cnormal = cnoplume.replace("_noplume","")
dataframe_stats_original[cnoplume] = dataframe_stats_original.apply(lambda row: getattr(row,cnoplume) if row.isplume else getattr(row,cnormal),axis=1)
dataframe_stats = pd.merge(dataframe_stats_original,
dataframe_data_traintest[["id_loc_image","offshore",'interval_ch4_fluxrate',"ch4_fluxrate", "ch4_fluxrate_std",
'interval_ch4_fluxrate_str','year', 'year_month',"split_name", "country", "case_study",
'satellite_constellation']],
on="id_loc_image")
dataframe_stats["offshorestr"] = dataframe_stats["offshore"].apply(lambda x: "YES" if x else "NO")
dataframe_stats["isplumestr"] = dataframe_stats.isplume.apply(lambda x: "YES" if x >.1 else "NO")
# Sanity check: isplume only has npixelsplume > 0
dataframe_stats[dataframe_stats.isplume & (dataframe_stats.npixelsplume == 0)]
dataframe_stats_test = dataframe_stats[dataframe_stats.id_loc_image.isin(ids_test)].copy()
dataframe_stats_test.shape
dataframe_stats_test_isplume = dataframe_stats_test[dataframe_stats_test.isplume.astype(bool)].copy()
dataframe_stats_test_isplume["snr"] = dataframe_stats_test_isplume["ch4_mean_plume"] / dataframe_stats_test_isplume["ch4_mean_noplume"]
dataframe_stats_test_isplume.shape
dataframe_stats["MBMP_mean"].min(), dataframe_stats["MBMP_mean"].max()
Load CloudSEN12 stats
csv_path_cloudsen12 = f"datasets/{REPO_ID}/cloudsen12_clear_images.csv"
with fs.open(csv_path_cloudsen12, "r") as fh:
cloudsen12 = pd.read_csv(fh)
stats_path_cloudsen12 = f"datasets/{REPO_ID}/cloudsen12_data/cloudsen12_stats_dataset.csv"
with fs.open(stats_path) as fh:
cloudsen12_stats = pd.read_csv(fh)
# cloudsen12["id_loc_image"] = cloudsen12["id_loc_image"].apply(uuid.UUID)
cloudsen12["isplume"] = cloudsen12.isplume.astype(bool)
cloudsen12.shape, cloudsen12_stats.shape
cloudsen12_stats = pd.merge(cloudsen12_stats,
cloudsen12[["id_loc_image","ch4_fluxrate", "ch4_fluxrate_std", "country"]],
on="id_loc_image")
cloudsen12_stats["isplumestr"] = cloudsen12_stats.isplume.apply(lambda x: "YES" if x >.1 else "NO")
cloudsen12_stats["ch4_fluxrate"] = 0
cloudsen12_stats["Q"] = 0
cloudsen12_stats["ch4_fluxrate_std"] = 0
cloudsen12_stats["case_study"] = "CloudSEN12"
cloudsen12_stats["MBMP_std_noplume"] = cloudsen12_stats["MBMP_std"]
cloudsen12_stats["B12_mean_noplume"] = cloudsen12_stats["B12_mean"]
cloudsen12_stats["ch4_std_noplume"] = cloudsen12_stats["ch4_std"]
cloudsen12_stats["ch4_mean_noplume"] = cloudsen12_stats["ch4_mean"]
print(list(cloudsen12_stats.columns))
dataframe_stats_test_withcloudsen12 = pd.concat([dataframe_stats_test, cloudsen12_stats], axis=0,
ignore_index=True)
dataframe_stats_test_withcloudsen12.shape
Stats by case study
import matplotlib.pyplot as plt
import seaborn as sns
from marss2l.plot import C0, C1, C2
import numpy as np
fig, ax = plt.subplots(1,3, figsize=(12,6),
sharey=True,tight_layout=True)
case_studies_plot = [c for c in loaders.ORDER_CASE_STUDIES if c != "Rest"]
sns.boxplot(data=dataframe_stats_test,
x="MBMP_std_noplume", hue="isplumestr", y="case_study", ax=ax[0],legend=False,
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
# ax[0].set_xlim(0,0.027)
ax[0].set_xlim(0,0.08)
sns.boxplot(data=dataframe_stats_test,
x="B12_mean_noplume", hue="isplumestr", y="case_study", ax=ax[1],
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
sns.move_legend(ax[1], "lower right" ,title="Image with emission")
ax[1].set_xlabel(r"ToA reflectance SWIR2 outside plume")
ax[0].set_xlabel(r"$\sigma$ MBMP outside plume")
ax[0].set_ylabel("")
ax[1].set_xlim(0,0.7)
sns.boxplot(data=dataframe_stats_test,
x="Q",y="case_study", ax=ax[2],
color=C0,
order=case_studies_plot)
for axs in ax:
axs.xaxis.grid(True)
ax[2].set_xticks(range(0, 50_000, 10_000), [f"{d/1000:.0f}" for d in range(0, 50_000, 10_000)])
ax[2].set_xlabel("Flux rate (t/h)")
ax[2].set_xlim(0,50_000)
fig.savefig("figures/stats_toa_mbmp.pdf")
Stats by case study with CloudSEN12
dataframe_stats_test_withcloudsen12.case_study.value_counts()
fig, ax = plt.subplots(1,3, figsize=(12,6),
sharey=True,tight_layout=True)
case_studies_plot = [c for c in loaders.ORDER_CASE_STUDIES if c != "Rest"] + ["CloudSEN12"]
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="MBMP_std_noplume", hue="isplumestr", y="case_study", ax=ax[0],legend=False,
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
# ax[0].set_xlim(0,0.027)
ax[0].set_xlim(0,0.08)
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="B12_mean_noplume", hue="isplumestr", y="case_study", ax=ax[1],
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
sns.move_legend(ax[1], "lower right" ,title="Image with emission")
ax[1].set_xlabel(r"ToA reflectance SWIR2 outside plume")
ax[0].set_xlabel(r"$\sigma$ MBMP outside plume")
ax[0].set_ylabel("")
ax[1].set_xlim(0,0.7)
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="Q",y="case_study", ax=ax[2],
color=C0,
order=case_studies_plot)
for axs in ax:
axs.xaxis.grid(True)
ax[2].set_xticks(range(0, 50_000, 10_000), [f"{d/1000:.0f}" for d in range(0, 50_000, 10_000)])
ax[2].set_xlabel("Flux rate (t/h)")
ax[2].set_xlim(0,50_000)
fig.savefig("figures/stats_toa_mbmp_with_cloudsen12.pdf")
Stats $\Delta$XCH$_4$
fig, ax = plt.subplots(1,2, figsize=(12,6),
sharey=True,tight_layout=True)
case_studies_plot = [c for c in loaders.ORDER_CASE_STUDIES if c != "Rest"] + ["CloudSEN12"]
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="ch4_mean_noplume", hue="isplumestr", y="case_study", ax=ax[0],legend=False,
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
# ax[0].set_xlim(0,0.027)
ax[0].set_xlim(0,3000)
ticks = np.arange(0,1000,100).tolist() + np.arange(1000,3000,200).tolist()
ax[0].set_xticks(ticks)
ax[0].set_xticklabels(ticks, rotation=45)
ax[0].set_xlabel(r"$\Delta$XCH$_4$ outside plume (ppb)")
ax[0].set_ylabel("")
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="Q",y="case_study", ax=ax[1],
color=C0,
order=case_studies_plot)
for axs in ax:
axs.xaxis.grid(True)
ax[1].set_xticks(range(0, 50_000, 10_000), [f"{d/1000:.0f}" for d in range(0, 50_000, 10_000)])
ax[1].set_xlabel("Flux rate (t/h)")
ax[1].set_xlim(0,50_000)
# fig.savefig("figures/stats_toa_dxch4_with_cloudsen12.pdf")
# fig, ax = plt.subplots(1,3, figsize=(12,6),
# sharey=True,tight_layout=True)
fig, ax = plt.subplots(1, 3, figsize=(12, 5), sharey=True, tight_layout=True,
gridspec_kw={'width_ratios': [2, 1, 1]})
case_studies_plot = [c for c in loaders.ORDER_CASE_STUDIES if c != "Rest"] + ["CloudSEN12"]
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="ch4_mean_noplume", hue="isplumestr", y="case_study", ax=ax[0],legend=False,
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
# ax[0].set_xlim(0,0.027)
ax[0].set_xlim(0,3000)
ticks = np.arange(0,1000,100).tolist() + np.arange(1000,3000,200).tolist()
ax[0].set_xticks(ticks)
ax[0].set_xticklabels(ticks, rotation=45)
ax[0].set_xlabel(r"$\Delta$XCH$_4$ outside plume (ppb)")
ax[0].set_ylabel("")
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="B12_mean_noplume", hue="isplumestr", y="case_study", ax=ax[1],
palette=[C0, C2], hue_order= ["YES", "NO"],
order=case_studies_plot)
sns.move_legend(ax[1], "lower right" ,title="Image with emission")
ax[1].set_xlabel(r"ToA reflectance SWIR2 outside plume")
ax[1].set_xlim(0,0.7)
sns.boxplot(data=dataframe_stats_test_withcloudsen12,
x="Q",y="case_study", ax=ax[2],
color=C0,
order=case_studies_plot)
for axs in ax:
axs.xaxis.grid(True)
ax[2].set_xticks(range(0, 50_000, 10_000), [f"{d/1000:.0f}" for d in range(0, 50_000, 10_000)])
ax[2].set_xlabel("Flux rate (t/h)")
ax[2].set_xlim(0,50_000)
SNR by case study for plume images
fig, ax = plt.subplots(1,2, figsize=(12,6),
sharey=True,tight_layout=True)
case_studies_plot = [c for c in loaders.ORDER_CASE_STUDIES if c != "Rest"] + ["CloudSEN12"]
sns.boxplot(data=dataframe_stats_test_isplume,
x="snr",y="case_study", ax=ax[0],legend=False,
color=C0,
order=case_studies_plot)
ax[0].set_xlim(0,10)
# ax[0].set_xlim(0,3000)
# ticks = np.arange(0,1000,100).tolist() + np.arange(1000,3000,200).tolist()
ticks = np.arange(0,10,.5)
ax[0].set_xticks(ticks)
ax[0].set_xticklabels(ticks, rotation=45)
ax[0].set_xlabel(r"SNR $\Delta$XCH$_4$")
ax[0].set_ylabel("")
sns.boxplot(data=dataframe_stats_test_isplume,
x="Q",y="case_study", ax=ax[1],
color=C0,
order=case_studies_plot)
for axs in ax:
axs.xaxis.grid(True)
ax[1].set_xticks(range(0, 50_000, 10_000), [f"{d/1000:.0f}" for d in range(0, 50_000, 10_000)])
ax[1].set_xlabel("Flux rate (t/h)")
ax[1].set_xlim(0,50_000)
Plumes detected with low ToA reflectance
weird_onshore_plumes = dataframe_stats_test_isplume[~dataframe_stats_test_isplume.offshore & \
(dataframe_stats_test_isplume.B12_mean_noplume < 0.2)].copy()
weird_onshore_plumes[["location_name", "tile", "id_loc_image", "Q", "B12_mean_noplume"]].sort_values("Q", ascending=False)
Stats size of the plume
dataframe_stats_isplume = dataframe_stats[dataframe_stats.isplume].copy()
# & (dataframe_stats.npixelsplume == 0)]
dataframe_stats_isplume.shape
ax = sns.histplot(data=dataframe_stats_isplume, x="npixelsplume",
common_norm=False)
ax.set_xlabel("# pixels plume (10m resolution)")
import numpy as np
ax = sns.histplot(data=dataframe_stats_isplume[dataframe_stats_isplume.npixelsplume < 300],
x="npixelsplume",
bins = np.arange(0,301,50),
# hue_order=[False, True],
common_norm=False)
ax.set_xlabel("# pixels plume (10m resolution)")