diff --git a/python/examples/hrrr_hourly.ipynb b/python/examples/hrrr_hourly.ipynb
new file mode 100644
index 0000000..2e14a6b
--- /dev/null
+++ b/python/examples/hrrr_hourly.ipynb
@@ -0,0 +1,1936 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import fsspec\n",
+ "\n",
+ "fs_read = fsspec.filesystem('s3', anon=True, skip_instance_cache=True, use_ssl=False) # For now SSL false is solving my cert issues **shrug**\n",
+ "fs_write = fsspec.filesystem('')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Read 49 HRRR files\n"
+ ]
+ }
+ ],
+ "source": [
+ "hrr_hourly_member_files = fs_read.glob('s3://noaa-hrrr-bdp-pds/hrrr.20230722/conus/hrrr.t18z.wrfsfcf*.grib2')\n",
+ "\n",
+ "files = sorted(['s3://'+f for f in hrr_hourly_member_files])\n",
+ "print(f'Read {len(files)} HRRR files')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "import datetime\n",
+ "\n",
+ "def parse_model_run_datestamp_offset(key: str):\n",
+ " '''\n",
+ " Parse the model run forecast time key from the key of the file in the HRRR S3 bucket, given the HRRR naming convention: \n",
+ " 's3://noaa-hrrr-bdp-pds/hrrr.20230722/conus/hrrr.t12z.wrfsfcf01.grib2' \n",
+ " where the model_date is 20230315 and the model_hour is 00 and the offset is 1, this would result in a key of 20230315T01\n",
+ " '''\n",
+ " model_date, model_hour, offset = re.search(r'hrrr\\.(\\d{8})\\/conus\\/hrrr\\.t(\\d{2})z\\.wrfsfcf(\\d{2})', key).groups()\n",
+ " model_date = datetime.datetime.strptime(f'{model_date}T{model_hour}', '%Y%m%dT%H') + datetime.timedelta(hours=int(offset))\n",
+ " model_date_key = model_date.strftime('%Y%m%dT%H')\n",
+ " return model_date_key, int(offset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ujson\n",
+ "from gribberish.kerchunk import scan_gribberish\n",
+ "\n",
+ "so = {\"anon\": True, \"use_ssl\": False}\n",
+ "json_dir = 'hrrr_hourly/'\n",
+ "\n",
+ "def make_json_name(file_url, message_number): #create a unique name for each reference file\n",
+ " date, _ = parse_model_run_datestamp_offset(file_url)\n",
+ " name = file_url.split('/')[5].split('.')[0:3]\n",
+ " return f'{json_dir}{name[0]}_{date}_message{message_number}.json'\n",
+ "\n",
+ "def gen_json(file_url):\n",
+ " out = scan_gribberish(\n",
+ " file_url, \n",
+ " storage_options=so, \n",
+ " only_variables=['apcp', 'prate', 'cpofp', 'vis', 'tcdc', 'tmp', 'pres', 'ugrd', 'vgrd', 'dpt'], \n",
+ " skip=10,\n",
+ " filter_by_variable_attrs={\n",
+ " 'ugrd': {\n",
+ " 'statistical_process': '', \n",
+ " 'fixed_surface_value': '10',\n",
+ " }, \n",
+ " 'vgrd': {\n",
+ " 'statistical_process': '', \n",
+ " 'fixed_surface_value': '10',\n",
+ " }, \n",
+ " 'tmp': {\n",
+ " 'fixed_surface_type': 'ground or water surface',\n",
+ " }, \n",
+ " 'dpt': {\n",
+ " 'fixed_surface_type': 'specific height level above ground',\n",
+ " 'fixed_surface_value': '2'\n",
+ " },\n",
+ " 'tcdc': {\n",
+ " 'fixed_surface_value': 'entire atmosphere',\n",
+ " },\n",
+ " 'pres': {\n",
+ " 'fixed_surface_type': 'ground or water surface',\n",
+ " }\n",
+ " }\n",
+ " )\n",
+ " for i, message in enumerate(out):\n",
+ " out_file_name = make_json_name(file_url, i) # get name\n",
+ " with fs_write.open(out_file_name, \"w\") as f: \n",
+ " f.write(ujson.dumps(message)) # write to file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from dask.distributed import Client, progress\n",
+ "\n",
+ "client = Client(processes=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "87038a6acb304d06b21141e5ab81a955",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox()"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "futures = client.map(gen_json, files[1:], retries=1)\n",
+ "progress(futures)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client.shutdown()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Found 479 reference files\n"
+ ]
+ }
+ ],
+ "source": [
+ "from kerchunk.combine import MultiZarrToZarr\n",
+ "\n",
+ "reference_jsons = sorted(fs_write.ls(json_dir)) #get list of file names\n",
+ "\n",
+ "print(f'Found {len(reference_jsons)} reference files')\n",
+ "\n",
+ "# combine individual references into single consolidated reference\n",
+ "mzz = MultiZarrToZarr(reference_jsons,\n",
+ " concat_dims = ['time'],\n",
+ " identical_dims=['x', 'y', 'latitude', 'longitude'])\n",
+ "\n",
+ "d = mzz.translate()\n",
+ "\n",
+ "with open(f'{json_dir}/hrrr_kerchunk.json', 'w') as f:\n",
+ " f.write(ujson.dumps(d))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
<xarray.Dataset>\n",
+ "Dimensions: (time: 48, y: 1059, x: 1799)\n",
+ "Coordinates:\n",
+ " latitude (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray>\n",
+ " longitude (y, x) float64 dask.array<chunksize=(1059, 1799), meta=np.ndarray>\n",
+ " * time (time) datetime64[s] 2023-07-22T19:00:00 ... 2023-07-24T18:00:00\n",
+ " * x (x) float64 -2.701e+06 -2.698e+06 ... 2.69e+06 2.693e+06\n",
+ " * y (y) float64 -1.581e+06 -1.578e+06 ... 1.59e+06 1.593e+06\n",
+ "Data variables:\n",
+ " apcp (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " cpofp (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " dpt (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " prate (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " pres (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " tmp (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " ugrd (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " vgrd (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ " vis (time, y, x) float64 dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>\n",
+ "Attributes:\n",
+ " meta: Generated with gribberishpy
latitude
(y, x)
float64
dask.array<chunksize=(1059, 1799), meta=np.ndarray>
- long_name :
- latitude
- standard_name :
- latitude
- unit :
- degrees_north
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 14.54 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (1059, 1799) | \n",
+ " (1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 1 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
longitude
(y, x)
float64
dask.array<chunksize=(1059, 1799), meta=np.ndarray>
- long_name :
- longitude
- standard_name :
- longitude
- unit :
- degrees_east
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 14.54 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (1059, 1799) | \n",
+ " (1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 1 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
time
(time)
datetime64[s]
2023-07-22T19:00:00 ... 2023-07-...
- axis :
- T
- long_name :
- time
- standard_name :
- time
- unit :
- seconds since 1970-01-01 00:00:00
- _FillValue :
- 1970-01-01T00:00:00
array(['2023-07-22T19:00:00', '2023-07-22T20:00:00', '2023-07-22T21:00:00',\n",
+ " '2023-07-22T22:00:00', '2023-07-22T23:00:00', '2023-07-23T00:00:00',\n",
+ " '2023-07-23T01:00:00', '2023-07-23T02:00:00', '2023-07-23T03:00:00',\n",
+ " '2023-07-23T04:00:00', '2023-07-23T05:00:00', '2023-07-23T06:00:00',\n",
+ " '2023-07-23T07:00:00', '2023-07-23T08:00:00', '2023-07-23T09:00:00',\n",
+ " '2023-07-23T10:00:00', '2023-07-23T11:00:00', '2023-07-23T12:00:00',\n",
+ " '2023-07-23T13:00:00', '2023-07-23T14:00:00', '2023-07-23T15:00:00',\n",
+ " '2023-07-23T16:00:00', '2023-07-23T17:00:00', '2023-07-23T18:00:00',\n",
+ " '2023-07-23T19:00:00', '2023-07-23T20:00:00', '2023-07-23T21:00:00',\n",
+ " '2023-07-23T22:00:00', '2023-07-23T23:00:00', '2023-07-24T00:00:00',\n",
+ " '2023-07-24T01:00:00', '2023-07-24T02:00:00', '2023-07-24T03:00:00',\n",
+ " '2023-07-24T04:00:00', '2023-07-24T05:00:00', '2023-07-24T06:00:00',\n",
+ " '2023-07-24T07:00:00', '2023-07-24T08:00:00', '2023-07-24T09:00:00',\n",
+ " '2023-07-24T10:00:00', '2023-07-24T11:00:00', '2023-07-24T12:00:00',\n",
+ " '2023-07-24T13:00:00', '2023-07-24T14:00:00', '2023-07-24T15:00:00',\n",
+ " '2023-07-24T16:00:00', '2023-07-24T17:00:00', '2023-07-24T18:00:00'],\n",
+ " dtype='datetime64[s]')
x
(x)
float64
-2.701e+06 -2.698e+06 ... 2.693e+06
- axis :
- X
- long_name :
- x coordinate of projection
- standard_name :
- projection_x_coordinate
- unit :
- m
array([-2701000.130325, -2698000.130325, -2695000.130325, ..., 2686999.869675,\n",
+ " 2689999.869675, 2692999.869675])
y
(y)
float64
-1.581e+06 -1.578e+06 ... 1.593e+06
- axis :
- Y
- long_name :
- y coordinate of projection
- standard_name :
- projection_y_coordinate
- unit :
- m
array([-1580581.336877, -1577581.336877, -1574581.336877, ..., 1587418.663123,\n",
+ " 1590418.663123, 1593418.663123])
apcp
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T18:00:00+00:00
- generating_process :
- forecast
- long_name :
- totalprecipitation
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- totalprecipitation
- statistical_process :
- accumulation
- time_interval_end :
- 2023-07-22T19:00:00+00:00
- unit :
- kgm-2
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
cpofp
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- percentfrozenprecipitation
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- percentfrozenprecipitation
- statistical_process :
- time_interval_end :
- unit :
- %
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
dpt
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- specific height level above ground
- fixed_surface_value :
- 2
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- dewpointtemperature
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- dewpointtemperature
- statistical_process :
- time_interval_end :
- unit :
- K
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
prate
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- precipitationrate
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- precipitationrate
- statistical_process :
- time_interval_end :
- unit :
- kgm-2s-1
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
pres
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- pressure
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- pressure
- statistical_process :
- time_interval_end :
- unit :
- pa
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
tmp
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- temperature
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- temperature
- statistical_process :
- time_interval_end :
- unit :
- K
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
ugrd
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- specific height level above ground
- fixed_surface_value :
- 10
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- ucomponentwindspeed
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- ucomponentwindspeed
- statistical_process :
- time_interval_end :
- unit :
- ms-1
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
vgrd
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- specific height level above ground
- fixed_surface_value :
- 10
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- vcomponentwindspeed
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- vcomponentwindspeed
- statistical_process :
- time_interval_end :
- unit :
- ms-1
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
vis
(time, y, x)
float64
dask.array<chunksize=(1, 1059, 1799), meta=np.ndarray>
- crs :
- +proj=lcc lon_0=262.5 lat_0=38.5 lat_1=38.5 lat_2=38.5
- fixed_surface_type :
- ground or water surface
- fixed_surface_value :
- 0
- forecast_date :
- 2023-07-22T19:00:00+00:00
- generating_process :
- forecast
- long_name :
- visibility
- proj_params :
- {'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
- reference_date :
- 2023-07-22T18:00:00+00:00
- standard_name :
- visibility
- statistical_process :
- time_interval_end :
- unit :
- m
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " Array | \n",
+ " Chunk | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes | \n",
+ " 697.68 MiB | \n",
+ " 14.54 MiB | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape | \n",
+ " (48, 1059, 1799) | \n",
+ " (1, 1059, 1799) | \n",
+ " \n",
+ " \n",
+ " Dask graph | \n",
+ " 48 chunks in 2 graph layers | \n",
+ " \n",
+ " \n",
+ " Data type | \n",
+ " float64 numpy.ndarray | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
PandasIndex
PandasIndex(DatetimeIndex(['2023-07-22 19:00:00', '2023-07-22 20:00:00',\n",
+ " '2023-07-22 21:00:00', '2023-07-22 22:00:00',\n",
+ " '2023-07-22 23:00:00', '2023-07-23 00:00:00',\n",
+ " '2023-07-23 01:00:00', '2023-07-23 02:00:00',\n",
+ " '2023-07-23 03:00:00', '2023-07-23 04:00:00',\n",
+ " '2023-07-23 05:00:00', '2023-07-23 06:00:00',\n",
+ " '2023-07-23 07:00:00', '2023-07-23 08:00:00',\n",
+ " '2023-07-23 09:00:00', '2023-07-23 10:00:00',\n",
+ " '2023-07-23 11:00:00', '2023-07-23 12:00:00',\n",
+ " '2023-07-23 13:00:00', '2023-07-23 14:00:00',\n",
+ " '2023-07-23 15:00:00', '2023-07-23 16:00:00',\n",
+ " '2023-07-23 17:00:00', '2023-07-23 18:00:00',\n",
+ " '2023-07-23 19:00:00', '2023-07-23 20:00:00',\n",
+ " '2023-07-23 21:00:00', '2023-07-23 22:00:00',\n",
+ " '2023-07-23 23:00:00', '2023-07-24 00:00:00',\n",
+ " '2023-07-24 01:00:00', '2023-07-24 02:00:00',\n",
+ " '2023-07-24 03:00:00', '2023-07-24 04:00:00',\n",
+ " '2023-07-24 05:00:00', '2023-07-24 06:00:00',\n",
+ " '2023-07-24 07:00:00', '2023-07-24 08:00:00',\n",
+ " '2023-07-24 09:00:00', '2023-07-24 10:00:00',\n",
+ " '2023-07-24 11:00:00', '2023-07-24 12:00:00',\n",
+ " '2023-07-24 13:00:00', '2023-07-24 14:00:00',\n",
+ " '2023-07-24 15:00:00', '2023-07-24 16:00:00',\n",
+ " '2023-07-24 17:00:00', '2023-07-24 18:00:00'],\n",
+ " dtype='datetime64[ns]', name='time', freq=None))
PandasIndex
PandasIndex(Float64Index([-2701000.130325057, -2698000.130325057, -2695000.130325057,\n",
+ " -2692000.130325057, -2689000.130325057, -2686000.130325057,\n",
+ " -2683000.130325057, -2680000.130325057, -2677000.130325057,\n",
+ " -2674000.130325057,\n",
+ " ...\n",
+ " 2665999.869674943, 2668999.869674943, 2671999.869674943,\n",
+ " 2674999.869674943, 2677999.869674943, 2680999.869674943,\n",
+ " 2683999.869674943, 2686999.869674943, 2689999.869674943,\n",
+ " 2692999.869674943],\n",
+ " dtype='float64', name='x', length=1799))
PandasIndex
PandasIndex(Float64Index([-1580581.3368766531, -1577581.3368766531, -1574581.3368766531,\n",
+ " -1571581.3368766531, -1568581.3368766531, -1565581.3368766531,\n",
+ " -1562581.3368766531, -1559581.3368766531, -1556581.3368766531,\n",
+ " -1553581.3368766531,\n",
+ " ...\n",
+ " 1566418.6631233469, 1569418.6631233469, 1572418.6631233469,\n",
+ " 1575418.6631233469, 1578418.6631233469, 1581418.6631233469,\n",
+ " 1584418.6631233469, 1587418.6631233469, 1590418.6631233469,\n",
+ " 1593418.6631233469],\n",
+ " dtype='float64', name='y', length=1059))
- meta :
- Generated with gribberishpy
"
+ ],
+ "text/plain": [
+ "\n",
+ "Dimensions: (time: 48, y: 1059, x: 1799)\n",
+ "Coordinates:\n",
+ " latitude (y, x) float64 dask.array\n",
+ " longitude (y, x) float64 dask.array\n",
+ " * time (time) datetime64[s] 2023-07-22T19:00:00 ... 2023-07-24T18:00:00\n",
+ " * x (x) float64 -2.701e+06 -2.698e+06 ... 2.69e+06 2.693e+06\n",
+ " * y (y) float64 -1.581e+06 -1.578e+06 ... 1.59e+06 1.593e+06\n",
+ "Data variables:\n",
+ " apcp (time, y, x) float64 dask.array\n",
+ " cpofp (time, y, x) float64 dask.array\n",
+ " dpt (time, y, x) float64 dask.array\n",
+ " prate (time, y, x) float64 dask.array\n",
+ " pres (time, y, x) float64 dask.array\n",
+ " tmp (time, y, x) float64 dask.array\n",
+ " ugrd (time, y, x) float64 dask.array\n",
+ " vgrd (time, y, x) float64 dask.array\n",
+ " vis (time, y, x) float64 dask.array\n",
+ "Attributes:\n",
+ " meta: Generated with gribberishpy"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import xarray as xr\n",
+ "\n",
+ "# open dataset as zarr object using fsspec reference file system and xarray\n",
+ "fs = fsspec.filesystem(\"reference\", fo=f'./hrrr_hourly/hrrr_kerchunk.json', remote_protocol='s3', remote_options={'anon':True, 'use_ssl': False})\n",
+ "m = fs.get_mapper(\"\")\n",
+ "ds = xr.open_dataset(m, engine=\"zarr\", backend_kwargs=dict(consolidated=False), chunks={'time': 1})\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pyproj\n",
+ "to_xy = pyproj.Transformer.from_crs('epsg:4326', ds.prate.crs, always_xy=True).transform\n",
+ "lat, lng = 41.42717976016072, -71.462122760827\n",
+ "x_sel, y_sel = to_xy(lng, lat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "