import s3fs
import numcodecs as ncd
import numpy as np

url = "s3://hrrrzarr/sfc/20200801/20200801_00z_anl.zarr/1000mb/TMP/1000mb/TMP/4.3"
s3 = s3fs.S3FileSystem(anon=True)

def retrieve_data(s3_url):
    with s3.open(s3_url, 'rb') as compressed_data:
        buffer = ncd.blosc.decompress(compressed_data.read())

        dtype = "<f2"
        if ("surface/PRES" in s3_url
            or "mean_sea_level/MSLMA" in s3_url
            or "0C_isotherm/PRES" in s3_url): # Pressures above 1000hPa use a larger data type
            dtype = "<f4"

        chunk = np.frombuffer(buffer, dtype=dtype)
        
        gridpoints_in_chunk = 150*150
        number_hours = len(chunk)//gridpoints_in_chunk

        if number_hours == 1: # analysis data is 2d
            data_array = np.reshape(chunk, (150, 150))
        else: # forecast data is 3d but of varying length in time
            data_array = np.reshape(chunk, (number_hours, 150, 150))

    return data_array

chunk_data = retrieve_data(url)
print(np.mean(chunk_data))

316.2


import s3fs
import zarr

url = "s3://hrrrzarr"
fs = s3fs.S3FileSystem(anon=True)
store = zarr.open(s3fs.S3Map(url, s3=fs))


temperature = store["sfc/20200801/20200801_00z_anl.zarr/1000mb/TMP/1000mb/TMP"]
index = store["grid/HRRR_chunk_index.zarr"]


import cartopy.crs as ccrs
import matplotlib.pyplot as plt

ax = plt.axes(projection=ccrs.Mercator())
ax.contourf(index["longitude"], index["latitude"], temperature, transform=ccrs.PlateCarree())
ax.coastlines()
plt.show()


import s3fs
import xarray as xr
import cartopy.crs as ccrs
import metpy

group_url = 's3://hrrrzarr/sfc/20200801/20200801_00z_anl.zarr/surface/GUST'
subgroup_url = f"{group_url}/surface"

fs = s3fs.S3FileSystem(anon=True)    
ds = xr.open_mfdataset([s3fs.S3Map(url, s3=fs) for url in [group_url, subgroup_url]], engine='zarr')

projection = ccrs.LambertConformal(central_longitude=262.5, 
                                   central_latitude=38.5, 
                                   standard_parallels=(38.5, 38.5),
                                    globe=ccrs.Globe(semimajor_axis=6371229,
                                                     semiminor_axis=6371229))

ds = ds.rename(projection_x_coordinate="x", projection_y_coordinate="y")
ds = ds.metpy.assign_crs(projection.to_cf())
ds = ds.metpy.assign_latitude_longitude()    
ds

<xarray.Dataset>
Dimensions:                  (x: 1799, y: 1059)
Coordinates:
  * x                        (x) float64 -2.698e+06 -2.695e+06 ... 2.696e+06
  * y                        (y) float64 -1.587e+06 -1.584e+06 ... 1.587e+06
    metpy_crs                object Projection: lambert_conformal_conic
    latitude                 (y, x) float64 21.14 21.15 21.15 ... 47.85 47.84
    longitude                (y, x) float64 -122.7 -122.7 ... -60.95 -60.92
Data variables:
    GUST                     (y, x) float16 dask.array<chunksize=(150, 150), meta=np.ndarray>
    forecast_period          timedelta64[ns] ...
    forecast_reference_time  datetime64[ns] ...
    height                   float64 ...
    pressure                 float64 ...
    time                     datetime64[ns] ...

array([-2697520.142522, -2694520.142522, -2691520.142522, ...,  2690479.857478,
        2693479.857478,  2696479.857478])

array([-1587306.152557, -1584306.152557, -1581306.152557, ...,  1580693.847443,
        1583693.847443,  1586693.847443])

array(<metpy.plots.mapping.CFProjection object at 0x10ae38730>,
      dtype=object)

array([[21.138123  , 21.14511004, 21.1520901 , ..., 21.1545089 ,
        21.14753125, 21.14054663],
       [21.16299459, 21.1699845 , 21.17696744, ..., 21.17938723,
        21.1724067 , 21.16541921],
       [21.18786863, 21.19486142, 21.20184723, ..., 21.20426802,
        21.19728462, 21.19029425],
       ...,
       [47.78955926, 47.799849  , 47.81012868, ..., 47.81369093,
        47.80341474, 47.79312849],
       [47.81409316, 47.82438621, 47.8346692 , ..., 47.83823259,
        47.8279531 , 47.81766354],
       [47.8386235 , 47.84891986, 47.85920615, ..., 47.86277069,
        47.85248789, 47.84219502]])

array([[-122.719528  , -122.69286132, -122.6661903 , ...,  -72.3430592 ,
         -72.31638668,  -72.28971849],
       [-122.72702499, -122.70035119, -122.67367305, ...,  -72.33557892,
         -72.30889927,  -72.28222397],
       [-122.73452632, -122.7078454 , -122.68116014, ...,  -72.3280943 ,
         -72.30140753,  -72.2747251 ],
       ...,
       [-134.0648096 , -134.02828423, -133.99174671, ...,  -61.02092594,
         -60.9843842 ,  -60.94785462],
       [-134.08013858, -134.04360126, -134.00705178, ...,  -61.00562502,
         -60.96907132,  -60.93252978],
       [-134.09547973, -134.05893046, -134.02236901, ...,  -60.99031194,
         -60.95374627,  -60.91719277]])

array(0, dtype='timedelta64[ns]')

array('2020-08-01T00:00:00.000000000', dtype='datetime64[ns]')

array(1000.)

array(25000.)


import cartopy.crs as ccrs
import matplotlib.pyplot as plt

ax = plt.axes(projection=projection)
ax.contourf(ds.x, ds.y, ds.GUST)
ax.coastlines()
plt.show()

Accessing and Reading Zarr¶

AWS Explorer¶

Path Format¶

Python Access¶

Option 1 - Chunk Access¶

Option 2 - Zarr library¶

Option 3 - xarray¶