You can run this notebook in , in
, in
or in
.
[29]:
!pip install --quiet climetlab matplotlib
WARNING: You are using pip version 21.1.1; however, version 21.1.2 is available.
You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.
WeatherBench¶
This is an attempt to reproduce this research: https://arxiv.org/abs/2002.00469. There is a notebook available at: https://binder.pangeo.io/v2/gh/pangeo-data/WeatherBench/master?filepath=quickstart.ipynb
[30]:
import matplotlib.pyplot as plt
[31]:
import climetlab as cml
[28]:
ds = cml.load_dataset("weather-bench")
/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py:981: InsecureRequestWarning: Unverified HTTPS request is being made to host 'dataserv.ub.tum.de'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
warnings.warn(
/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py:981: InsecureRequestWarning: Unverified HTTPS request is being made to host 'dataserv.ub.tum.de'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
warnings.warn(
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-28-c4fee7942768> in <module>
----> 1 ds = cml.load_dataset("weather-bench")
~/git/climetlab/climetlab/datasets/__init__.py in load_dataset(name, *args, **kwargs)
221 TERMS_OF_USE_SHOWN.add(name)
222
--> 223 ds = klass(*args, **kwargs).mutate()
224 if getattr(ds, "name", None) is None:
225 ds.name = name
~/git/climetlab/climetlab/datasets/weather_bench.py in __init__(self, parameter, resolution)
61 "/download?path=%2F{resolution}deg%2F{parameter}&files={parameter}_{resolution}deg.zip"
62 ).format(resolution=resolution, parameter=parameter)
---> 63 self.source = load_source("url", url, unpack=True, verify=False)
64
65
~/git/climetlab/climetlab/sources/__init__.py in load_source(name, *args, **kwargs)
129
130 def load_source(name: str, *args, **kwargs) -> Source:
--> 131 return source(name, *args, **kwargs).mutate()
132
133
~/git/climetlab/climetlab/sources/__init__.py in __call__(self, name, *args, **kwargs)
110 klass = find_plugin(os.path.dirname(__file__), name, loader)
111
--> 112 source = klass(*args, **kwargs)
113
114 if getattr(source, "name", None) is None:
~/git/climetlab/climetlab/sources/url.py in __init__(self, url, unpack, verify, watcher, force, **kwargs)
261 download(archive, url)
262 LOG.info("Unpacking...")
--> 263 shutil.unpack_archive(archive, target)
264 LOG.info("Done.")
265 os.unlink(archive)
~/git/climetlab/climetlab/sources/__init__.py in cache_file(self, create, args, hash_extra, extension, force)
58 if owner is None:
59 owner = self.__class__.__name__.lower()
---> 60 return cache_file(owner, create, args, hash_extra, extension, force)
61
62 @property
~/git/climetlab/climetlab/core/caching.py in cache_file(owner, create, args, hash_extra, extension, force)
467 if not os.path.exists(path):
468
--> 469 create(path + ".tmp", args)
470
471 # take care of race condition when two processes
~/git/climetlab/climetlab/sources/url.py in download_and_unpack(target, url)
254 def download(target, url):
255 o = urlparse(self.url)
--> 256 return downloader.download(url, target)
257
258 def download_and_unpack(target, url):
~/git/climetlab/climetlab/sources/url.py in download(target, url)
249
250
--> 251 if unpack is None:
252 unpack = extension in (".tar", ".tar.gz")
253
~/git/climetlab/climetlab/sources/url.py in download(self, url, target)
72
73 with open(download, mode) as f:
---> 74 total = self.transfer(f, pbar, self.owner.watcher)
75
76 pbar.close()
~/git/climetlab/climetlab/sources/url.py in transfer(self, f, pbar, watcher)
153
154 self.request = r
--> 155
156 return size
157
/usr/local/lib/python3.9/site-packages/requests/models.py in generate()
749 if hasattr(self.raw, 'stream'):
750 try:
--> 751 for chunk in self.raw.stream(chunk_size, decode_content=True):
752 yield chunk
753 except ProtocolError as e:
/usr/local/lib/python3.9/site-packages/urllib3/response.py in stream(self, amt, decode_content)
573 else:
574 while not is_fp_closed(self._fp):
--> 575 data = self.read(amt=amt, decode_content=decode_content)
576
577 if data:
/usr/local/lib/python3.9/site-packages/urllib3/response.py in read(self, amt, decode_content, cache_content)
516 else:
517 cache_content = False
--> 518 data = self._fp.read(amt) if not fp_closed else b""
519 if (
520 amt != 0 and not data
/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py in read(self, amt)
453 # Amount is given, implement using readinto
454 b = bytearray(amt)
--> 455 n = self.readinto(b)
456 return memoryview(b)[:n].tobytes()
457 else:
/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py in readinto(self, b)
497 # connection, and the user is reading more bytes than will be provided
498 # (for example, reading in 1k chunks)
--> 499 n = self.fp.readinto(b)
500 if not n and b:
501 # Ideally, we would raise IncompleteRead if the content-length
/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/socket.py in readinto(self, b)
702 while True:
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
706 self._timeout_occurred = True
/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py in recv_into(self, buffer, nbytes, flags)
1239 "non-zero flags not allowed in calls to recv_into() on %s" %
1240 self.__class__)
-> 1241 return self.read(nbytes, buffer)
1242 else:
1243 return super().recv_into(buffer, nbytes, flags)
/usr/local/Cellar/python@3.9/3.9.5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py in read(self, len, buffer)
1097 try:
1098 if buffer is not None:
-> 1099 return self._sslobj.read(len, buffer)
1100 else:
1101 return self._sslobj.read(len)
KeyboardInterrupt:
[12]:
ds
[12]:
weather-bench
| Home page | https://raspstephan.github.io/blog/weatherbench/ |
| Documentation | - |
| Citation | @article{rasp2020weatherbench,
title={WeatherBench: A benchmark dataset for data-driven weather forecasting},
author={Rasp, Stephan and Dueben, Peter D and Scher, Sebastian and Weyn,
Jonathan A and Mouatadid, Soukayna and Thuerey, Nils},
journal={arXiv preprint arXiv:2002.00469},
year={2020}
}
|
| Licence | - |
[13]:
print(ds.citation)
@article{rasp2020weatherbench,
title={WeatherBench: A benchmark dataset for data-driven weather forecasting},
author={Rasp, Stephan and Dueben, Peter D and Scher, Sebastian and Weyn,
Jonathan A and Mouatadid, Soukayna and Thuerey, Nils},
journal={arXiv preprint arXiv:2002.00469},
year={2020}
}
[14]:
z500 = ds.to_xarray()
[15]:
z500
[15]:
<xarray.Dataset>
Dimensions: (lat: 32, lon: 64, time: 350640)
Coordinates:
level int32 500
* lon (lon) float64 0.0 5.625 11.25 16.88 ... 337.5 343.1 348.8 354.4
* lat (lat) float64 -87.19 -81.56 -75.94 -70.31 ... 75.94 81.56 87.19
* time (time) datetime64[ns] 1979-01-01 ... 2018-12-31T23:00:00
Data variables:
z (time, lat, lon) float32 dask.array<chunksize=(8760, 32, 64), meta=np.ndarray>
Attributes:
Conventions: CF-1.6
history: 2019-11-10 11:30:13 GMT by grib_to_netcdf-2.14.0: /opt/ecmw...xarray.Dataset
- lat: 32
- lon: 64
- time: 350640
- level()int32500
- units :
- millibars
- long_name :
- pressure_level
array(500, dtype=int32)
- lon(lon)float640.0 5.625 11.25 ... 348.8 354.4
array([ 0. , 5.625, 11.25 , 16.875, 22.5 , 28.125, 33.75 , 39.375, 45. , 50.625, 56.25 , 61.875, 67.5 , 73.125, 78.75 , 84.375, 90. , 95.625, 101.25 , 106.875, 112.5 , 118.125, 123.75 , 129.375, 135. , 140.625, 146.25 , 151.875, 157.5 , 163.125, 168.75 , 174.375, 180. , 185.625, 191.25 , 196.875, 202.5 , 208.125, 213.75 , 219.375, 225. , 230.625, 236.25 , 241.875, 247.5 , 253.125, 258.75 , 264.375, 270. , 275.625, 281.25 , 286.875, 292.5 , 298.125, 303.75 , 309.375, 315. , 320.625, 326.25 , 331.875, 337.5 , 343.125, 348.75 , 354.375]) - lat(lat)float64-87.19 -81.56 ... 81.56 87.19
array([-87.1875, -81.5625, -75.9375, -70.3125, -64.6875, -59.0625, -53.4375, -47.8125, -42.1875, -36.5625, -30.9375, -25.3125, -19.6875, -14.0625, -8.4375, -2.8125, 2.8125, 8.4375, 14.0625, 19.6875, 25.3125, 30.9375, 36.5625, 42.1875, 47.8125, 53.4375, 59.0625, 64.6875, 70.3125, 75.9375, 81.5625, 87.1875]) - time(time)datetime64[ns]1979-01-01 ... 2018-12-31T23:00:00
- long_name :
- time
array(['1979-01-01T00:00:00.000000000', '1979-01-01T01:00:00.000000000', '1979-01-01T02:00:00.000000000', ..., '2018-12-31T21:00:00.000000000', '2018-12-31T22:00:00.000000000', '2018-12-31T23:00:00.000000000'], dtype='datetime64[ns]')
- z(time, lat, lon)float32dask.array<chunksize=(8760, 32, 64), meta=np.ndarray>
- units :
- m**2 s**-2
- long_name :
- Geopotential
- standard_name :
- geopotential
Array Chunk Bytes 2.87 GB 71.96 MB Shape (350640, 32, 64) (8784, 32, 64) Count 120 Tasks 40 Chunks Type float32 numpy.ndarray
- Conventions :
- CF-1.6
- history :
- 2019-11-10 11:30:13 GMT by grib_to_netcdf-2.14.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf -o /cache/data8/adaptor.mars.internal-1573376422.6709263-24770-9-f4666372-a7af-441c-8bd8-90971a244a59.nc /cache/tmp/f4666372-a7af-441c-8bd8-90971a244a59-adaptor.mars.internal-1573376422.6717255-24770-3-tmp.grib
[16]:
cml.plot_map(z500)
[17]:
z500.z.isel(time=0).plot()
[17]:
<matplotlib.collections.QuadMesh at 0x120868370>
[18]:
cml.plot_map(z500.z.isel(time=0))
[19]:
climatology = z500.sel(time=slice('2016', '2016')).mean('time').load()
[20]:
climatology.z.plot()
[20]:
<matplotlib.collections.QuadMesh at 0x1209adf40>
[86]:
cml.plot_map(climatology.z)
[87]:
climatology.z
[87]:
<xarray.DataArray 'z' (lat: 32, lon: 64)>
array([[48765.18 , 48774.066, 48782.83 , ..., 48745.145, 48750.582,
48757.645],
[48755.164, 48798.348, 48845.02 , ..., 48690.23 , 48700.72 ,
48721.727],
[48890.742, 48941.703, 48997.42 , ..., 48815.234, 48822.87 ,
48850.105],
...,
[52378.613, 52398.484, 52420.074, ..., 52361.254, 52363.426,
52366.344],
[51937.207, 51943.78 , 51950.414, ..., 51906.508, 51919.223,
51928.72 ],
[51565.38 , 51571.426, 51578.15 , ..., 51543.97 , 51551.11 ,
51558.305]], dtype=float32)
Coordinates:
level int32 500
* lon (lon) float64 0.0 5.625 11.25 16.88 ... 337.5 343.1 348.8 354.4
* lat (lat) float64 -87.19 -81.56 -75.94 -70.31 ... 75.94 81.56 87.19xarray.DataArray
'z'
- lat: 32
- lon: 64
- 4.877e+04 4.877e+04 4.878e+04 ... 5.154e+04 5.155e+04 5.156e+04
array([[48765.18 , 48774.066, 48782.83 , ..., 48745.145, 48750.582, 48757.645], [48755.164, 48798.348, 48845.02 , ..., 48690.23 , 48700.72 , 48721.727], [48890.742, 48941.703, 48997.42 , ..., 48815.234, 48822.87 , 48850.105], ..., [52378.613, 52398.484, 52420.074, ..., 52361.254, 52363.426, 52366.344], [51937.207, 51943.78 , 51950.414, ..., 51906.508, 51919.223, 51928.72 ], [51565.38 , 51571.426, 51578.15 , ..., 51543.97 , 51551.11 , 51558.305]], dtype=float32) - level()int32500
- units :
- millibars
- long_name :
- pressure_level
array(500, dtype=int32)
- lon(lon)float640.0 5.625 11.25 ... 348.8 354.4
- standard_name :
- longitude
array([ 0. , 5.625, 11.25 , 16.875, 22.5 , 28.125, 33.75 , 39.375, 45. , 50.625, 56.25 , 61.875, 67.5 , 73.125, 78.75 , 84.375, 90. , 95.625, 101.25 , 106.875, 112.5 , 118.125, 123.75 , 129.375, 135. , 140.625, 146.25 , 151.875, 157.5 , 163.125, 168.75 , 174.375, 180. , 185.625, 191.25 , 196.875, 202.5 , 208.125, 213.75 , 219.375, 225. , 230.625, 236.25 , 241.875, 247.5 , 253.125, 258.75 , 264.375, 270. , 275.625, 281.25 , 286.875, 292.5 , 298.125, 303.75 , 309.375, 315. , 320.625, 326.25 , 331.875, 337.5 , 343.125, 348.75 , 354.375]) - lat(lat)float64-87.19 -81.56 ... 81.56 87.19
- standard_name :
- latitude
array([-87.1875, -81.5625, -75.9375, -70.3125, -64.6875, -59.0625, -53.4375, -47.8125, -42.1875, -36.5625, -30.9375, -25.3125, -19.6875, -14.0625, -8.4375, -2.8125, 2.8125, 8.4375, 14.0625, 19.6875, 25.3125, 30.9375, 36.5625, 42.1875, 47.8125, 53.4375, 59.0625, 64.6875, 70.3125, 75.9375, 81.5625, 87.1875])


