This page provides basic information on browsing and accessing the HRRR Zarr archive.
You may access the AWS Explorer web page and browse through all available data. While direct download through the browser is supported for individual chunks, you'll need to write code to decompress the data.
[sfc|prs]/[YYYYMMDD]/[YYYYMMDD][00-23]z[type].zarr/[level]/[var]/[level]/[var]/[chunk]
[YYYYMMDD] - represents the UTC date format (e.g. 20210106).
[00-23]z - The two digits are the model run hour/reference time in UTC
[type] - This is either anl (analysis) for a zarr array with just the F00 analysis data or fcst (forecast) for a zarr array with forecast data at all available nonzero lead times (e.g. F01 - F48)
[level] - This is the model level. More information in the link on the next line
[var] - This is the variable stored in that level. For a complete list of the variables and levels click here
[chunk] - The conus HRRR data is broken up into 96 chunks or sections. View the map for an idea of the size and location of the chunks. The chunk identifier format differs depending on whether you have a anl or a fcst file. For fcst files the format is 0.y.x and for anl the format is y.x where the y and x are the corresponding numbers.
We recommend three main ways of accessing the data:
The following code snippets are provided for "quick start" purposes. See the rest of the code documentation for further explanation and usage.
import s3fs
import numcodecs as ncd
import numpy as np
url = "s3://hrrrzarr/sfc/20200801/20200801_00z_anl.zarr/1000mb/TMP/1000mb/TMP/4.3"
s3 = s3fs.S3FileSystem(anon=True)
def retrieve_data(s3_url):
with s3.open(s3_url, 'rb') as compressed_data:
buffer = ncd.blosc.decompress(compressed_data.read())
dtype = "<f2"
if ("surface/PRES" in s3_url
or "mean_sea_level/MSLMA" in s3_url
or "0C_isotherm/PRES" in s3_url): # Pressures above 1000hPa use a larger data type
dtype = "<f4"
chunk = np.frombuffer(buffer, dtype=dtype)
gridpoints_in_chunk = 150*150
number_hours = len(chunk)//gridpoints_in_chunk
if number_hours == 1: # analysis data is 2d
data_array = np.reshape(chunk, (150, 150))
else: # forecast data is 3d but of varying length in time
data_array = np.reshape(chunk, (number_hours, 150, 150))
return data_array
chunk_data = retrieve_data(url)
print(np.mean(chunk_data))
316.2
import s3fs
import zarr
url = "s3://hrrrzarr"
fs = s3fs.S3FileSystem(anon=True)
store = zarr.open(s3fs.S3Map(url, s3=fs))
temperature = store["sfc/20200801/20200801_00z_anl.zarr/1000mb/TMP/1000mb/TMP"]
index = store["grid/HRRR_chunk_index.zarr"]
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
ax = plt.axes(projection=ccrs.Mercator())
ax.contourf(index["longitude"], index["latitude"], temperature, transform=ccrs.PlateCarree())
ax.coastlines()
plt.show()
import s3fs
import xarray as xr
import cartopy.crs as ccrs
import metpy
group_url = 's3://hrrrzarr/sfc/20200801/20200801_00z_anl.zarr/surface/GUST'
subgroup_url = f"{group_url}/surface"
fs = s3fs.S3FileSystem(anon=True)
ds = xr.open_mfdataset([s3fs.S3Map(url, s3=fs) for url in [group_url, subgroup_url]], engine='zarr')
projection = ccrs.LambertConformal(central_longitude=262.5,
central_latitude=38.5,
standard_parallels=(38.5, 38.5),
globe=ccrs.Globe(semimajor_axis=6371229,
semiminor_axis=6371229))
ds = ds.rename(projection_x_coordinate="x", projection_y_coordinate="y")
ds = ds.metpy.assign_crs(projection.to_cf())
ds = ds.metpy.assign_latitude_longitude()
ds
<xarray.Dataset> Dimensions: (x: 1799, y: 1059) Coordinates: * x (x) float64 -2.698e+06 -2.695e+06 ... 2.696e+06 * y (y) float64 -1.587e+06 -1.584e+06 ... 1.587e+06 metpy_crs object Projection: lambert_conformal_conic latitude (y, x) float64 21.14 21.15 21.15 ... 47.85 47.84 longitude (y, x) float64 -122.7 -122.7 ... -60.95 -60.92 Data variables: GUST (y, x) float16 dask.array<chunksize=(150, 150), meta=np.ndarray> forecast_period timedelta64[ns] ... forecast_reference_time datetime64[ns] ... height float64 ... pressure float64 ... time datetime64[ns] ...
array([-2697520.142522, -2694520.142522, -2691520.142522, ..., 2690479.857478, 2693479.857478, 2696479.857478])
array([-1587306.152557, -1584306.152557, -1581306.152557, ..., 1580693.847443, 1583693.847443, 1586693.847443])
array(<metpy.plots.mapping.CFProjection object at 0x10ae38730>, dtype=object)
array([[21.138123 , 21.14511004, 21.1520901 , ..., 21.1545089 , 21.14753125, 21.14054663], [21.16299459, 21.1699845 , 21.17696744, ..., 21.17938723, 21.1724067 , 21.16541921], [21.18786863, 21.19486142, 21.20184723, ..., 21.20426802, 21.19728462, 21.19029425], ..., [47.78955926, 47.799849 , 47.81012868, ..., 47.81369093, 47.80341474, 47.79312849], [47.81409316, 47.82438621, 47.8346692 , ..., 47.83823259, 47.8279531 , 47.81766354], [47.8386235 , 47.84891986, 47.85920615, ..., 47.86277069, 47.85248789, 47.84219502]])
array([[-122.719528 , -122.69286132, -122.6661903 , ..., -72.3430592 , -72.31638668, -72.28971849], [-122.72702499, -122.70035119, -122.67367305, ..., -72.33557892, -72.30889927, -72.28222397], [-122.73452632, -122.7078454 , -122.68116014, ..., -72.3280943 , -72.30140753, -72.2747251 ], ..., [-134.0648096 , -134.02828423, -133.99174671, ..., -61.02092594, -60.9843842 , -60.94785462], [-134.08013858, -134.04360126, -134.00705178, ..., -61.00562502, -60.96907132, -60.93252978], [-134.09547973, -134.05893046, -134.02236901, ..., -60.99031194, -60.95374627, -60.91719277]])
|
array(0, dtype='timedelta64[ns]')
array('2020-08-01T00:00:00.000000000', dtype='datetime64[ns]')
array(1000.)
array(25000.)
array('2020-08-01T00:00:00.000000000', dtype='datetime64[ns]')
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
ax = plt.axes(projection=projection)
ax.contourf(ds.x, ds.y, ds.GUST)
ax.coastlines()
plt.show()