%%time
import s3fs
import zarr

url = "s3://hrrrzarr/sfc/20210601/20210601_00z_anl.zarr"
fs = s3fs.S3FileSystem(anon=True)
store = zarr.open(s3fs.S3Map(url, s3=fs))

CPU times: user 24.1 ms, sys: 3.03 ms, total: 27.1 ms
Wall time: 363 ms


%%time
thousand_mb_tmp = store["1000mb/TMP/1000mb/TMP"]
surface_tmp = store["surface/TMP/surface/TMP"]
hcdc = store["high_cloud_layer/HCDC/high_cloud_layer/HCDC"]
rh = store["2m_above_ground/RH/2m_above_ground/RH"]
spfh = store["2m_above_ground/SPFH/2m_above_ground/SPFH"]
dpt = store["2m_above_ground/DPT/2m_above_ground/DPT"]
pot = store["2m_above_ground/POT/2m_above_ground/POT"]
tmp = store["2m_above_ground/TMP/2m_above_ground/TMP"]

CPU times: user 94.8 ms, sys: 7.29 ms, total: 102 ms
Wall time: 1.18 s


%%time
thousand_mb_tmp[...] # load into memory

CPU times: user 283 ms, sys: 18.8 ms, total: 302 ms
Wall time: 767 ms

array([[290.5, 290.5, 290.5, ..., 299.5, 299.5, 299.5],
       [290.5, 290.5, 290.5, ..., 299.5, 299.5, 299.5],
       [290.5, 290.5, 290.5, ..., 299.5, 299.5, 299.5],
       ...,
       [286.5, 286.5, 286.5, ..., 286. , 286. , 285.8],
       [286.5, 286.5, 286.5, ..., 285.8, 285.8, 285.5],
       [286.5, 286.5, 286.5, ..., 285.5, 285.5, 285.5]], dtype=float16)


import xarray as xr
chunk_index = xr.open_zarr(s3fs.S3Map("s3://hrrrzarr/grid/HRRR_chunk_index.zarr", s3=fs))
chunk_index

<xarray.Dataset>
Dimensions:     (x: 1799, y: 1059)
Coordinates:
  * x           (x) float64 -2.701e+06 -2.698e+06 ... 2.697e+06 2.7e+06
  * y           (y) float64 -1.581e+06 -1.578e+06 ... 1.584e+06 1.587e+06
Data variables:
    chunk_id    (x, y) object dask.array<chunksize=(450, 265), meta=np.ndarray>
    chunk_x     (x) int32 dask.array<chunksize=(1799,), meta=np.ndarray>
    chunk_y     (y) int32 dask.array<chunksize=(1059,), meta=np.ndarray>
    in_chunk_x  (x) int32 dask.array<chunksize=(1799,), meta=np.ndarray>
    in_chunk_y  (y) int32 dask.array<chunksize=(1059,), meta=np.ndarray>
    index_x     (x) int32 dask.array<chunksize=(1799,), meta=np.ndarray>
    index_y     (y) int32 dask.array<chunksize=(1059,), meta=np.ndarray>
    latitude    (y, x) float64 dask.array<chunksize=(133, 450), meta=np.ndarray>
    longitude   (y, x) float64 dask.array<chunksize=(133, 450), meta=np.ndarray>

array([-2701053.257253, -2698050.223641, -2695047.733711, ...,  2693900.826919,
        2696902.916452,  2699905.861651])

array([-1580581.173442, -1577593.610939, -1574605.325642, ...,  1581280.553967,
        1584281.066625,  1587281.898354])


import matplotlib.pyplot as plt
import cartopy.crs as ccrs

ax = plt.axes(projection=ccrs.PlateCarree())
ax.contourf(chunk_index.longitude, chunk_index.latitude, thousand_mb_tmp)
ax.coastlines()

plt.show()
plt.close()


ax = plt.axes(projection=ccrs.PlateCarree())
ax.contourf(chunk_index.longitude, chunk_index.latitude, rh)
ax.coastlines()

plt.show()
plt.close()


projection = ccrs.LambertConformal(central_longitude=-97.5,
                             central_latitude=38.5,
                             standard_parallels=[38.5])

def get_nearest_point(projection, chunk_index, longitude, latitude):
    x, y = projection.transform_point(longitude, latitude, ccrs.PlateCarree())
    return chunk_index.sel(x=x, y=y, method="nearest")

nearest_point = get_nearest_point(projection, chunk_index, -111.8910, 40.7608)

first_index = int(nearest_point.index_y.values)
second_index = int(nearest_point.index_x.values)


pot[first_index, second_index]

313.5


url = "s3://hrrrzarr/sfc/20210108/20210108_00z_fcst.zarr"
store = zarr.open(s3fs.S3Map(url, s3=fs))
tmp_forecast = store["surface/TMP/surface/TMP"][:, first_index, second_index]
tmp_forecast

array([273.8, 272.2, 271.2, 268.8, 267. , 267. , 267. , 267.2, 267.5,
       267. , 267.2, 266.5, 265.8, 265.2, 264.8, 267.5, 274. , 278.5,
       282. , 284.2, 284.8, 284. , 281.5, 278. , 274. , 272.5, 272.8,
       272.8, 272.8, 271. , 269. , 268. , 269. , 269.2, 269.2, 269.8,
       270.5, 271. , 271.5, 272.5, 272.8, 273.2, 273.8, 277. , 277.8,
       277.8, 276.8, 274.2], dtype=float16)


plt.title('HRRR Zarr Example TMP Forecast')
plt.xlabel('Forecast Hour')
plt.ylabel("TMP")
plt.plot(tmp_forecast)
plt.show()
plt.close()


%%time
import datetime

start = datetime.datetime.strptime("20210108_00", '%Y%m%d_%H')
forecast_length = len(tmp_forecast)
tmp_analysis = []

for time_delta in range(forecast_length):
    # construct the aws path
    analysis_time = start + datetime.timedelta(hours=time_delta)
    string_date = analysis_time.strftime('%Y%m%d')
    string_hour   = analysis_time.strftime('%H')
    url = 's3://hrrrzarr/sfc/%s/%s_%sz_anl.zarr' %(string_date, string_date, string_hour)
    
    # open the zarr group
    store = zarr.open(s3fs.S3Map(url, s3=fs))
    
    # append the data we need
    tmp_analysis.append(store["surface/TMP/surface/TMP"][first_index, second_index])

CPU times: user 1.94 s, sys: 78.5 ms, total: 2.02 s
Wall time: 29.9 s


plt.title('HRRR Zarr Example TMP Analysis vs. Forecast')
plt.xlabel('Hour')
plt.ylabel("TMP")
plt.plot(tmp_analysis, color='blue', label='HRRR Analysis')
plt.plot(tmp_forecast, color='red', label='HRRR Forecast')
plt.legend()
plt.show()
plt.close()

Zarr API For Accessing HRRR Data¶