Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ESDC revisions #29

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ESDC/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ inputs-preprocess/FLUXCOM/fluxcom-data-cube-8d-0.25deg.py # Resample to 0.25 deg
# GOSIF: Preprocessing
inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py # Convert .tif to .zarr
inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py # Concatenate .zarr files
inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees
inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py # Add initial metadata
inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees

# GOME-2 JJ Method: Preprocessing
inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py # Concatenate .nc files
Expand Down
4 changes: 3 additions & 1 deletion ESDC/inputs-collect/download-GOME2-SIF.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import numpy as np
import os

URL = "http://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ECOCLIM/Downscaled-GOME2-SIF/v2.0/"
# pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/data/"
Expand All @@ -18,4 +19,5 @@
file_to_download = f"GOME_{rm}_dcSIF_005deg_8day_{year}.nc"
print(f"Downloading {file_to_download}")
response = requests.get(URL + file_to_download)
open(pathOut + file_to_download, "wb").write(response.content)
file_path = os.path.join(pathOut, file_to_download)
open(file_path, "wb").write(response.content)
8 changes: 5 additions & 3 deletions ESDC/inputs-collect/download-GOSIF.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from urllib.request import Request, urlopen
import re
import os
from os.path import exists

URL = "http://data.globalecology.unh.edu/data/GOSIF_v2/8day/"
Expand All @@ -24,9 +25,10 @@
link = link.get('href')
if link.endswith(".tif.gz"):
file_to_download = link.split("/")[-1]
if not exists(pathOut + file_to_download):
file_path = os.path.join(pathOut, file_to_download)
if not exists(file_path):
print(f"Downloading {file_to_download}")
response = requests.get(URL + file_to_download)
open(pathOut + file_to_download, "wb").write(response.content)
open(file_path, "wb").write(response.content)
else:
pass
pass
5 changes: 2 additions & 3 deletions ESDC/inputs-collect/extract-gz-gosif.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import gzip
import shutil
import glob
import os

from tqdm import tqdm

pathOut = "~/data/SIF/GOSIF/source"
pathOut = os.path.expanduser(pathOut)

files = glob.glob(f"{pathOut}/*")
files.sort()

for file in tqdm(files):
with gzip.open(file, 'rb') as f_in:
with open(file.replace(".gz",""), 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
shutil.copyfileobj(f_in, f_out)
44 changes: 24 additions & 20 deletions ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,53 @@
from xcube.core.store import find_data_store_extensions
from xcube.core.store import get_data_store_params_schema
from xcube.core.store import new_data_store
import os

import shapely.geometry
from IPython.display import JSON
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import xarray as xr
from tqdm import tqdm
from xcube.core.store import new_data_store

pathOut = "~/data/CCI/aerosol/preprocess"
pathOut = os.path.expanduser(pathOut)

print("Reading")
store = new_data_store('cciodp')

dataset = store.open_data(
'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1',
'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1',
variable_names=['AOD550_mean'],
time_range=['2002-05-20','2012-04-08']
time_range=['2002-05-20', '2012-04-08']
)

dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]")
dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"),
np.timedelta64(8, "D")).astype("datetime64[ns]")

last_year = 2012
first_year = 2002

years = np.arange(first_year,last_year + 1)
years = np.arange(first_year, last_year + 1)


def resample_weekly(ds,year):
def resample_weekly(ds, year):
keep_attrs = ds.time.attrs
ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean()
ds['time'] = ds.time + np.timedelta64(4,"D")
ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample(
time="8D").mean()
ds['time'] = ds.time + np.timedelta64(4, "D")
ds.time.attrs = keep_attrs
if year==2002:
if year == 2002:
ds = ds.interp(coords=dict(time=dates_2002))
return ds


print("Resampling in time")
dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)]
dataset_8d = xr.concat(dataset_8d,dim="time")
dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)]
dataset_8d = xr.concat(dataset_8d, dim="time")

new_lats = np.load("lat.npy")
new_lons = np.load("lon.npy")

print("Resampling in space")
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=512,lat=128,lon=128))
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons),
method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=512, lat=128, lon=128))

print("Saving")
dataset_8d.to_zarr("~/data/cci-aod550-8d-0.083deg-512x128x128.zarr")
dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr")
45 changes: 23 additions & 22 deletions ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from xcube.core.store import find_data_store_extensions
from xcube.core.store import get_data_store_params_schema
from xcube.core.store import new_data_store
import os

import shapely.geometry
from IPython.display import JSON
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import xarray as xr
from tqdm import tqdm
from xcube.core.store import new_data_store

pathOut = "~/data/CCI/aerosol/preprocess"
pathOut = os.path.expanduser(pathOut)
Expand All @@ -19,37 +15,42 @@
store = new_data_store('cciodp')

dataset = store.open_data(
'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1',
'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1',
variable_names=['AOD550_mean'],
time_range=['2002-05-20','2012-04-08']
time_range=['2002-05-20', '2012-04-08']
)

dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]")
dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"),
np.timedelta64(8, "D")).astype("datetime64[ns]")

last_year = 2012
first_year = 2002

years = np.arange(first_year,last_year + 1)
years = np.arange(first_year, last_year + 1)


def resample_weekly(ds,year):
def resample_weekly(ds, year):
keep_attrs = ds.time.attrs
ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean()
ds['time'] = ds.time + np.timedelta64(4,"D")
ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample(
time="8D").mean()
ds['time'] = ds.time + np.timedelta64(4, "D")
ds.time.attrs = keep_attrs
if year==2002:
if year == 2002:
ds = ds.interp(coords=dict(time=dates_2002))
return ds


print("Resampling in time")
dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)]
dataset_8d = xr.concat(dataset_8d,dim="time")
dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)]
dataset_8d = xr.concat(dataset_8d, dim="time")

new_lats = np.arange(-89.875,90,0.25)
new_lons = np.arange(-179.875,180,0.25)
new_lats = np.arange(-89.875, 90, 0.25)
new_lons = np.arange(-179.875, 180, 0.25)

print("Resampling in space")
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128))
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons),
method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128))

print("Saving")
dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr")
dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr")
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
from tqdm import tqdm
import os
from datetime import datetime
import yaml
import rioxarray

import xarray as xr
import numpy as np
import yaml

pathIn = "~/data/CCI/aerosol/preprocess"
pathIn = os.path.expanduser(pathIn)

pathOut = "~/data/CCI/aerosol/output"
pathOut = os.path.expanduser(pathOut)

if not os.path.exists(pathOut):
os.makedirs(pathOut)

with open("cci-aod550-metadata-0.0833deg.yaml", "r") as stream:
with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.yaml",
"r") as stream:
try:
metadata = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)

datacube = xr.open_zarr("/home/davemlz/data/cci-aod550-8d-0.083deg-512x128x128.zarr")
datacube = xr.open_zarr(f"{pathIn}/cci-aod550-8d-0.083deg-512x128x128.zarr")

datacube = datacube.rio.write_crs(
"epsg:4326", grid_mapping_name="crs"
).reset_coords()
).reset_coords()
del datacube.crs.attrs["spatial_ref"]

datacube.attrs = metadata["global"]
Expand Down Expand Up @@ -44,4 +52,4 @@
sorted({**datacube.attrs, **additional_attrs}.items())
)

datacube.to_zarr("/home/davemlz/data/metadata/cci-aod550-8d-0.083deg-512x128x128.zarr")
datacube.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr")
13 changes: 6 additions & 7 deletions ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from tqdm import tqdm
import os
from datetime import datetime
import yaml
import rioxarray
import xarray as xr
import numpy as np

import xarray as xr
import yaml

pathIn = "~/data/CCI/aerosol/preprocess"
pathIn = os.path.expanduser(pathIn)
Expand All @@ -15,7 +13,8 @@
if not os.path.exists(pathOut):
os.makedirs(pathOut)

with open("cci-aod550-metadata.yaml", "r") as stream:
with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata.yaml",
"r") as stream:
try:
metadata = yaml.safe_load(stream)
except yaml.YAMLError as exc:
Expand All @@ -25,7 +24,7 @@

datacube = datacube.rio.write_crs(
"epsg:4326", grid_mapping_name="crs"
).reset_coords()
).reset_coords()
del datacube.crs.attrs["spatial_ref"]

datacube.attrs = metadata["global"]
Expand Down
47 changes: 27 additions & 20 deletions ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,49 @@
from xcube.core.store import find_data_store_extensions
from xcube.core.store import get_data_store_params_schema
from xcube.core.store import new_data_store
import os

import shapely.geometry
from IPython.display import JSON
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import xarray as xr
from xcube.core.store import new_data_store

pathOut = "~/data/CCI/cloud/preprocess"
pathOut = os.path.expanduser(pathOut)

if not os.path.exists(pathOut):
os.makedirs(pathOut)

print("Reading")
store = new_data_store('cciodp')

dataset = store.open_data(
'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1',
variable_names=['cot','cth','ctt'],
time_range=["2000-02-01","2014-12-31"]
'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1',
variable_names=['cot', 'cth', 'ctt'],
time_range=["2000-02-01", "2014-12-31"]
)

dataset = dataset.drop([x for x in list(dataset.variables) if x not in ['time','lat','lon','cot','cth','ctt']])
dataset = dataset.drop([x for x in list(dataset.variables) if
x not in ['time', 'lat', 'lon', 'cot', 'cth', 'ctt']])

dataset = dataset.chunk(dict(time=-1, lat=64, lon=64))

dataset = dataset.chunk(dict(time=-1,lat=64,lon=64))

def get_dates_8d(year):
return np.arange(np.datetime64(f"{year}-01-05"), np.datetime64(f"{year+1}-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]")
return np.arange(np.datetime64(f"{year}-01-05"),
np.datetime64(f"{year + 1}-01-01"),
np.timedelta64(8, "D")).astype("datetime64[ns]")


dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000,2015)])
dates = dates[(dates >= np.datetime64("2000-02-15")) & (dates <= np.datetime64("2014-12-16"))]
dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000, 2015)])
dates = dates[(dates >= np.datetime64("2000-02-15")) & (
dates <= np.datetime64("2014-12-16"))]

print("Resampling in time")
dataset_8d = dataset.interp(coords=dict(time=dates),method="nearest")
dataset_8d = dataset.interp(coords=dict(time=dates), method="nearest")

new_lats = np.load("lat.npy")
new_lons = np.load("lon.npy")

print("Resampling in space")
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128))
dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons),
method="nearest")
dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128))

print("Saving")
dataset_8d.to_zarr("/net/scratch/dmontero/CCI/cci-cloud-8d-0.083deg-256x128x128.zarr")
dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr")
Loading