from numpy.core.numeric import True_
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import netCDF4 as nc
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeat
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.projections import get_projection_class
from cartopy.io import shapereader
from scipy.interpolate import interp2d
from scipy.ndimage import zoom
from .subroutines import *
[docs]class ExposureMap:
""" A class for calculating maps based on user specifications
Each instance of this class contains information required to calculate and illustrate a map
of exposure information, be that simple averages or more advanced mathematical representations
of exposure risk. The class is designed to have three core functions run in sequence, with
room for flexibility should more advanced users desire it. First, the data is read and a pixel
histogram is calculated. This allows much more data to be stored in memory and is the basis
for performing this kind of data analysis on personal computers.
Parameters
----------
units : str
The units of the quantity to be mapped. Must be "SED", "J m-2" or "UVIh" for doses or "UVI",
"W m-2" or "mW m-2" for irradiances. Defaults to "SED".
exposure_schedule : array
A vector of values describing the relative exposure of each hour of the day. 0 indicates no
exposure, 1 indicates full exposure, and a fractional value such as 0.5 would indicate
exposure for a total of 30 minutes within the hour, or a 50% partial exposure for the full
hour, or anything equivalent. Values greater than 1 are allowed. Must have a length of 24 (for
each hour of the day) although a length of 1 is also allowed, in which case that number will
be immediately replicated across a 24-length vector. When not calculating doses, hours with
any non-zero entry in this vector are included, with the irradiance values being
multiplied by the corresponding non-zero value in the exposure schedule.
bin_width : float
The width of each bin in the pixel histogram. Value assumed to be in the same units as
defined by the units parameter. *Making bin_width excessively small can lead to high
memory usage,* consider the underlying accuracy of the source data and be sure not to
substantially exceed its precision with this parameter.
statistic : str
The statistical descriptor to be calculated from the pixel histograms to be later
represented on the rendered map. Must contain at least one of these keywords:
"mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or
"minimum".
*Planned:* the string can be a formula using any of the keywords above,
as well at "prct" or "percentile" preceeded by a number between 0 and 100, and
basic mathematical operators (+, -, *, /, **) and numeric factors.
date_selection : list of dates
The dates for which the irradiances are retrieved or the daily doses are calculated.
Defaults to None whereby the program selects all data within the src_directory that
matches the src_filename_format.
src_filename_format : str
Describes the filename of the netCDF files containing the data with 'yyyy' in place
of the year.
src_directory : str
The directory where the data is stored. Must end with a slash.
Example
-------
The code below shows a typical use case for the ExposureMap class. The long-term average daily doses
(i.e. the chronic doses) for typical school children are calculated across Switzerland asssuming certain
hours of exposure for journeying to and from school and having breaks for morning tea and lunch time. ::
import python_tamer as pt
import pandas as pd
import numpy as np
src_directory = 'C:/enter_your_src_directory_here'
ER = pt.ER_Vernez_2015("Forehead","Standing") # Long-term average ER for foreheads in standing posture
map = pt.ExposureMap(
src_directory=src_directory,
units = "J m-2",
exposure_schedule = np.array([0 ,0 ,0 ,0 ,0 ,0 ,
0 ,0 ,0.5,0 ,0.5,0 ,
0.5,0.5,0 ,0 ,0.5,0 ,
0 ,0 ,0 ,0 ,0 ,0 ])*ER,
bin_width = 25,
date_selection = pd.date_range(start="2005-01-01",end="2014-12-31"),
statistic = "mean",
map_options={"title": "Chronic daily UV dose for typical school children, 2005-2014",
"save": False})
map = map.collect_data().calculate_map()
map.plot_map()
"""
def __init__(self,units="SED",
exposure_schedule=1,
statistic="mean",
bin_width = None,
date_selection=None,
map_options=None,
src_filename_format='UVery.AS_ch02.lonlat_yyyy01010000.nc',
src_directory='C:/Data/UV/'):
# assigning options to fields in class with a few basic checks
self.units = units
self.exposure_schedule=np.array(exposure_schedule)
if len(np.atleast_1d(self.exposure_schedule)) == 1 :
self.exposure_schedule = np.repeat(self.exposure_schedule,24)
self.statistic = statistic
self.map_options = {
"title" : "Test map",
"save" : True,
"img_size" : [20,15],
"img_dpi" : 300,
"img_dir" : "",
"img_filename" : "timestamp",
"img_filetype" : "png",
"brdr_nation" : True,
"brdr_nation_rgba" : [0,0,0,0],
"brdr_state" : False,
"brdr_state_rgba" : [0,0,0,0.67],
"cmap" : "jet",
"cmap_limits" : None,
"cbar" : True,
"cbar_limits" : None
}
if map_options is not None :
self.map_options.update(map_options)
self.src_filename_format = src_filename_format
self.src_directory = src_directory
self.date_selection = date_selection
if bin_width is None :
self.bin_width = {
"SED" : 0.1,
"J m-2" : 10,
"UVI" : 0.1,
"W m-2" : 0.0025,
"mW m-2" : 2.5
}[self.units]
else :
self.bin_width = bin_width
[docs] def collect_data(self, src_directory=None,src_filename_format=None,
date_selection=None,units=None,exposure_schedule=None,bin_width=None) :
"""Loads and manipulates data into histograms for each pixel of the underlying data
In order to handle large amounts of data without exceeding memory limitations, files are
loaded one at a time and the time dimension is removed, either by calculating daily doses
or by simply taking the data as is. The resulting information is then stored not as a
list of specific values but rather binned into a histogram for each pixel. This process
is repeated for each file required by the user input, building up the pixel histograms
with more information that does not require additional memory.
Parameters
----------
src_filename_format : str
Describes the filename of the netCDF files containing the data with 'yyyy' in place
of the year.
src_directory : str
The directory where the data is stored. Must end with a slash.
date_selection : list of dates
The dates for which the irradiances are retrieved or the daily doses are calculated.
Defaults to None whereby the program selects all data within the src_directory that
matches the src_filename_format.
units : str
Name of units of desired output. This also indicates whether daily doses must be
calculated or not. Units of "SED", "J m-2", or "UVIh" will produce daily doses,
units of "UVI", "W m-2" or "mW m-2" will not.
exposure_schedule : array
A vector of values describing the relative exposure of each hour of the day. 0 indicates no
exposure, 1 indicates full exposure, and a fractional value such as 0.5 would indicate
exposure for a total of 30 minutes within the hour, or a 50% partial exposure for the full
hour, or anything equivalent. Values greater than 1 are allowed. Must have a length of 24 (for
each hour of the day) although a length of 1 is also allowed, in which case that number will
be immediately replicated across a 24-length vector. When not calculating doses, hours with
any non-zero entry in this vector are included, with the irradiance values being
multiplied by the corresponding non-zero value in the exposure schedule.
bin_width : float
The width of each bin in the pixel histogram. Value assumed to be in the same units as
defined by the units parameter. *Making bin_width excessively small can lead to high
memory usage,* consider the underlying accuracy of the source data and be sure not to
substantially exceed its precision with this parameter.
Returns
-------
python_tamer.ExposureMap
The input ExposureMap object is appended with new fields, `pix_hist` contains
the counts for the histogram, and `bin_edges`, `bin_centers`, and `num_bins`
all serve as metadata for the pixel histograms. `lat` and `lon` are also
added from the multi-file dataset to inform the pixel locations for map making
further down the typical pipeline.
Example
-------
The example code below shows how an ExposureMap class can be declared with the default parameters that
can then be later redefined by collect_data() and the other class functions. ::
import python_tamer as pt
import pandas as pd
import numpy as np
src_directory = 'C:/enter_your_src_directory_here'
ER = pt.ER_Vernez_2015("Forehead","Standing") # Long-term average ER for foreheads in standing posture
map = pt.ExposureMap()
map = map.collect_data(
src_directory=src_directory,
units = "J m-2",
exposure_schedule = np.array([0 ,0 ,0 ,0 ,0 ,0 ,
0 ,0 ,0.5,0 ,0.5,0 ,
0.5,0.5,0 ,0 ,0.5,0 ,
0 ,0 ,0 ,0 ,0 ,0 ])*ER,
bin_width = 25,
date_selection = pd.date_range(start="2005-01-01",end="2014-12-31")
)
map = map.calculate_map(statistic = "mean")
map.plot_map(map_options={"title": "Chronic daily UV dose for typical school children, 2005-2014",
"save": False})
"""
# TODO: There must be a better way to do this
if not (src_directory is None) :
self.src_directory = src_directory
if not (src_filename_format is None) :
self.src_filename_format = src_filename_format
if not (date_selection is None) :
self.date_selection = date_selection
if not (units is None) :
self.units = units
if not (exposure_schedule is None) :
self.exposure_schedule = exposure_schedule
if not (bin_width is None) :
self.bin_width = bin_width
# first we read the src_directory to check the total number of unique years available
data_dir_contents = os.listdir(self.src_directory)
# TODO: improve jankiness of this format-matching search for filenames
char_year = self.src_filename_format.find('yyyy')
dataset_years = [ x for x in data_dir_contents if re.findall(self.src_filename_format.replace("yyyy","[0-9]{4}"),x)]
dataset_years = [ int(x[char_year:char_year+4]) for x in dataset_years ]
# Now we can handle default options like "all"
if type(self.date_selection) == str and self.date_selection == "all" :
date_selection = pd.date_range(start=str(dataset_years[0])+"-01-01",
end=str(dataset_years[-1])+"-12-31")
else :
date_selection = self.date_selection # TODO: much more interpretation options here
#now we find unique years
list_of_years = sorted(set(date_selection.year))
for i in range(len(list_of_years)) :
year = list_of_years[i]
print("Processing year "+str(year)) #should use logging, don't yet know how
dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year)))
dataset.set_auto_mask(False) #to get normal arrays (faster than default masked arrays)
if dataset.dimensions['time'].size == 24 :
# needed if just a single day
time_subset = [True for i in range(dataset.dimensions['time'].size)]
else :
# Next we pull a subset from the netCDF file
# declare false array with same length of time dimension from netCDF
time_subset = [False for i in range(dataset.dimensions['time'].size)]
# reshape false array to have first dimension 24 (hours in day)
time_subset = assert_data_shape_24(time_subset)
# set the appropriate days as true
time_subset[:,date_selection[date_selection.year == year].dayofyear-1] = True
# flatten time_subset array back to one dimension
time_subset = time_subset.flatten(order='F')
# load subset of data
print(" Slicing netcdf data with time subset")
data = dataset['UV_AS'][time_subset,:,:] #work in UVI by default because it's easy to read
# TODO: check units of dataset files, CF conventions for UVI or W/m2
# now to calculate doses if requested
if self.units in ["SED","J m-2","UVIh"] :
# if calculating doses
print(' Calculating doses')
data = assert_data_shape_24(data)
data = np.sum(np.reshape(self.exposure_schedule,[24,1,1,1]) * data,axis=0)
elif (self.exposure_schedule != np.ones(24)).any() :
# assume elsewise calculating intensity (i.e. UV-index) then limit data selection according
# to schedule (remembering that default schedule is just ones)
print(' Slicing data with exposure schedule')
# reshape so first dimension is 24 hours
data = assert_data_shape_24(data)
# select only those hours with nonzero entry in exposure schedule
data = data[self.exposure_schedule != 0,:,:,:]
# select nonzero values from exposure schedule
exposure_schedule_nonzero = self.exposure_schedule[self.exposure_schedule != 0]
# if any nonzero entries aren't 1, multiply data accordingly
if (exposure_schedule_nonzero != 1).any() :
data *= np.reshape(exposure_schedule_nonzero,[len(exposure_schedule_nonzero),1,1,1])
# recombine first two dimensions (hour and day) back into time ready for histogram
data = assert_data_shape_24(data,reverse=True)
# now multiply data by conversion factor according to desired untis
# TODO: Should expand upon this in reference files
data *= {"SED":0.9, "J m-2":90, "UVIh":1, "UVI":1, "W m-2":0.025, "mW m-2":25}[self.units]
# if this is the first iteration, declare a hist
if i == 0 :
# seems like useful metadata to know bin n and edges
# TODO: reconsider where this belongs in the code (__init__?)
self.num_bins = int(np.nanmax(data) // self.bin_width ) + 2
self.bin_edges = (np.array(range(self.num_bins+1)) - 0.5) * self.bin_width
# this form allows for weird custom bin edges, but probably will never use that
self.bin_centers = self.bin_edges[:-1] + 0.5 * np.diff(self.bin_edges)
# TODO: think about possible cases where dimensions could differ
self.pix_hist=np.zeros([self.num_bins,
np.shape(data)[-2],np.shape(data)[-1]], dtype=np.int16)
# TODO: this should also be done by some initial dataset analysis, but that's a drastic
# design overhaul
self.lat = dataset['lat'][:]
self.lon = dataset['lon'][:]
else :
new_num_bins = int(np.nanmax(data) // self.bin_width) + 2 - self.num_bins
# check if new data requires extra bins in pix_hist
if new_num_bins > 0 :
# append zeros to pix hist to make room for larger values
self.pix_hist = np.concatenate((self.pix_hist,np.zeros(
[new_num_bins,np.shape(self.pix_hist)[-2],np.shape(self.pix_hist)[-1]],
dtype=np.int16)),axis=0)
# update bin information
self.num_bins = self.num_bins + new_num_bins
self.bin_edges = (np.array(range(self.num_bins+1)) - 0.5) * self.bin_width
self.bin_centers = self.bin_edges[:-1] + 0.5 * np.diff(self.bin_edges)
# TODO: Add check in case bins get "full" (i.e. approach int16 max value)
# now put data into hist using apply_along_axis to perform histogram for each pixel
print(" Calculating and adding to pixel histograms")
self.pix_hist[:,:,:] += np.apply_along_axis(lambda x:
np.histogram(x,bins=self.bin_edges)[0],0,data)
return self
[docs] def calculate_map(self,pix_hist=None,statistic=None,bin_centers=None) :
"""Calculates statistical descriptor values for pixel histograms to produce a map
This function interprets the statistic string, which can either be a simple command
such as "mean" or a more advanced formula of keywords. The corresponding function is
applied to each pixel of the pix_hist object within the ExposureMap class, essentially
removing the first dimension and resulting in straightforward map to be plotted.
Parameters
----------
pix_hist : array
A 3D array with the first dimension containing vectors of counts for histograms
and the next two dimensions serving as pixel coordinates. See
`ExposureMap.collect_data()` for more information.
statistic : str
The statistical descriptor to be calculated from the pixel histograms to be later
represented on the rendered map. Must contain at least one of these keywords:
"mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or
"minimum".
*Planned:* the string can be a formula using any of the keywords above,
as well at "prct" or "percentile" preceeded by a number between 0 and 100, and
basic mathematical operators (+, -, *, /, **) and numeric factors.
bin_centers : array
The central numeric values corresponding to the bins in pix_hist. The
`ExposureMap.collect_data` function typically calculates these values from the
given `bin_width` input.
Returns
-------
python_tamer.ExposureMap
The ExposureMap class object is appended with a map field containing a 2D array
Example
-------
In the example below, the user imports some pre-calculated pixel histograms, thereby
completing the ExposureMap workflow without using the `ExposureMap.ExposureMap.collect_data()`
function. This can be useful if the data collection is timely and the user wants to
produce multiple different maps. Note that the "custom data" used in this example is not
included in the python-TAMER package, this simply illustrates a unique use-case. ::
import python_tamer as pt
# load custom data from an external file (not included)
from custom_user_data import pix_hist, bin_centers, map_options
map = pt.ExposureMap(map_options = map_options)
map = map.calculate_map(
statistic = "median",
pix_hist = data,
bin_centers = bin_centers
).plot_map(save = False)
map.calculate_map(statistic = "max").plot_map(map_options={"save" = False})
map.calculate_map(statistic = "std").plot_map(map_options={"save" = False})
"""
if not (pix_hist is None) :
self.pix_hist = pix_hist
if not (statistic is None) :
self.statistic = statistic
if not (bin_centers is None) :
self.bin_centers = bin_centers
# Begin by defining the easy options that only require two inputs
basic_descriptor_functions = {
"mean": hist_mean,
"median": lambda x,y: hist_percentile(x,y,0.5),
"med": lambda x,y: hist_percentile(x,y,0.5),
"sd": hist_stdev,
"std": hist_stdev,
"stdev": hist_stdev,
"max": hist_max,
"maximum": hist_max,
"min": hist_min,
"minimum":hist_min
}
# we can check if the chosen statistic is basic or advanced
if self.statistic.lower() in basic_descriptor_functions.keys() :
# in this case, we can simply select the basic function from the dict...
descriptor_function = basic_descriptor_functions[self.statistic.lower()]
# ...and execute it across the map
self.map = np.apply_along_axis(lambda x: descriptor_function(x,self.bin_centers),0,self.pix_hist)
# TODO: a loose space could ruin this, need shunting yard algorithm of sorts
elif self.statistic.lower()[2:] == "prct" or self.statistic.lower()[2:] == "percentile" :
prct = int(self.statistic[0:1]) / 100
self.map = np.apply_along_axis(lambda x: hist_percentile(x,self.bin_centers,prct),0,self.pix_hist)
else :
# TODO: interpret self.statistic to build advanced functions (y i k e s)
print("WARNING: ExposureMap.statistic not recognised.")
return self
[docs] def plot_map(self,map_options=None) :
"""Renders and optionally saves a map of the ``map`` field in an ExposureMap object
This function caps off the typical workflow for the ExposureMap class by rendering the contents
of the map field. Many aesthetic factors are accounted for, contained within the
`ExposureMap.map_options` dictionary.
Parameters
----------
map_options : dict, optional
A collection of many typical options such as image and font sizes, colormaps, etc.
The full range of options is listed below with their default values.
"title" : "Test map"
The title to be rendered above the map. Can be left blank for no title. Can be
used to inform img_filename
"save" : True
Boolean to declare whether the map should be saved as an image file or not.
"img_size" : [20,15]
The size [width,height] of the image in cm.
"img_dpi" : 300
The dots per inch of the saved image.
"img_dir" : ""
The directory for the image to be saved in, leaving it blank should result
in images being saved in the working directory.
"img_filename" : "timestamp"
The image filename as a string. The default value of "timestamp" is a keyword
indicating that the function should generate a filename based on the time at
the moment of the calculation, specified with the format %Y%m%d_%H%M%S_%f
which includes millisecond precision.
"img_filetype" : "png"
The image filetype, must be acceptable to `matplotlib.pyplot.savefig()`.
"brdr_nation" : True
Boolean for drawing national borders on the map.
"brdr_nation_rgba" : [0,0,0,0]
The red, green, blue, and alpha values for the national borders.
"brdr_state" : False
Boolean for drawing state borders as defined by Natural Earth dataset.
"brdr_state_rgba" : [0,0,0,0.67]
The red, green, blue, and alpha values for the national borders.
"cmap" : "jet"
The name of the colourmap to be used when rendering the map.
"cmap_limits" : None
The numeric limits of the colourmap. Defaults to None, where the lower
and upper limits of the plotted data are used as the colourmap limits.
"cbar" : True
Boolean for rendering a colourbar.
"cbar_limits" : None
The numeric limits of the colourbar. Defaults to None, where the lower
and upper limits of the plotted data are used as the colourbar limits.
Returns
-------
python_tamer.ExposureMap
Returns the ExposureMap object that was input with an updated map_options
field (if the user has specificied any changes to the default map_options).
"""
if map_options is not None :
self.map_options.update(map_options)
# TODO: Add custom sizing and resolution specifications
fig = plt.figure(figsize=(self.map_options['img_size'][0]/2.54,
self.map_options['img_size'][1]/2.54))
# TODO: Accept custom projections
proj = ccrs.Mercator()
# TODO: Add support for multiple plots per figure (too complex? consider use cases)
ax = fig.add_subplot(1,1,1,projection = proj)
# TODO: Increase flexibility of borders consideration
if self.map_options['brdr_nation'] :
ax.add_feature(cfeat.BORDERS)
# TODO: Consider first-last versus min-max - how can we avoid accidentally flipping images
extents=[self.lon[0],self.lon[-1],self.lat[0],self.lat[-1]]
ax.set_extent(extents)
# Confusingly, this code correctly translate the lat/lon limits into the projected coordinates
extents_proj = proj.transform_points(ccrs.Geodetic(),np.array(extents[:2]),np.array(extents[2:]))
extents_proj = extents_proj[:,:2].flatten(order='F')
# TODO: Custom colormaps, interpolation, cropping
im = ax.imshow(self.map,extent=extents_proj,transform=proj,origin='lower',
cmap=self.map_options['cmap'],interpolation='bicubic')
# TODO: Add more advanced title interpretation (i.e. smart date placeholder)
if self.map_options['title'] is not None :
ax.set_title(self.map_options['title'])
# TODO: Add support for horizontal
if self.map_options['cbar'] :
cb = plt.colorbar(im, ax=ax, orientation='horizontal',pad=0.05,fraction=0.05)
cb.ax.set_xlabel(self.units)
# TODO: Add plot title, small textbox description, copyright from dataset, ticks and gridlines
if self.map_options['save'] :
# Generate timestamp filename if relying on default
if self.map_options['img_filename'] == "timestamp" :
img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
plt.savefig(self.map_options['img_dir']+img_filename+"."+self.map_options['img_filetype'],
bbox_inches="tight",dpi=self.map_options['img_dpi'])
plt.show()
return self
[docs]class ExposureMapSequence :
""" Class for generating multiple Exposure Maps in a single operation
The ExposureMapSequence class is a framework for generating multiple maps following
a given sequence. The basic workflow begins by declaring an object of this class and
collecting the data from the source NetCDF files. The process is designed with
memory efficiency in mind, so data is loaded one year at a time and put into pixel
histograms akin to the ExposureMap class behaviour. However, in this class we allow for
multiple histograms to be stored within a single ExposureMapSequence object. Next,
the maps are calculated by the calculate_maps function. Multiple maps can be calculated for each histogram if the user
has specified multiple statistics that they want to calculate. Lastly, the maps are
rendered and saved by the save_maps function.
Parameters
----------
src_filename_format : str
Describes the filename of the netCDF files containing the data with 'yyyy' in place
of the year.
src_directory : str
The directory where the data is stored. Must end with a slash.
Example
-------
In this example, we produce a basic sequence of monthly average doses for 2020::
example = ExposureMapSequence()
example = example.collect_data('monthly',year_selection=[2020],units=["SED"])
example = example.calculate_maps(statistic='Mean')
example.save_maps(save=True,show=True)
In this example, we produce a basic sequence of annual average doses for each year
of the dataset::
example = ExposureMapSequence()
example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
example = example.calculate_maps(statistic='Mean')
example.save_maps(save=True,show=True)
"""
def __init__(self,
src_filename_format='UVery.AS_ch02.lonlat_yyyy01010000.nc',
src_directory='C:/Data/UV/',
units=None,
bin_width=None,
map_options=None,
):
# start with data location to quickly get some metadata
self.src_filename_format = src_filename_format
self.src_directory = src_directory
# first we read the src_directory to check the total number of unique years available
data_dir_contents = os.listdir(self.src_directory)
# match filename format to find years
dataset_years = [ x for x in data_dir_contents
if re.findall(self.src_filename_format.replace("yyyy","[1-2][0-9]{3}"),x)]
char_year = self.src_filename_format.find('yyyy')
self.dataset_years = [ int(x[char_year:char_year+4]) for x in dataset_years ]
self.bin_width = bin_width
self.units = units
# declare an empty dictionary for map options
self.map_options={}
# if any input, update dictionary
if map_options is not None :
self.map_options = self.map_options.update(map_options)
[docs] def interpret_parameters(self) :
"""Interprets some parameter inputs and adjusts for consistency
This function will check that parameters are correctly entered and do some basic interpretation.
It checks the exposure_schedule, year_selection, units, and bin_width input. All input is converted
to lists as required.
"""
if hasattr(self,'exposure_schedule') and self.exposure_schedule is not None :
if isinstance(self.exposure_schedule,float) :
self.exposure_schedule = [np.repeat(self.exposure_schedule,24)]
elif isinstance(self.exposure_schedule,int) :
temp = self.exposure_schedule
self.exposure_schedule = [np.zeros(24)]
self.exposure_schedule[0][temp] = 1
elif isinstance(self.exposure_schedule,dict) :
temp = self.exposure_schedule
self.exposure_schedule = [np.zeros(24)]
for x in temp.items() :
self.exposure_schedule[0][int(x[0])] = x[1]
elif isinstance(self.exposure_schedule,np.ndarray) :
if len(np.shape(self.exposure_schedule)) == 1 and np.shape(self.exposure_schedule)[0] == 24 :
self.exposure_schedule = [self.exposure_schedule]
elif len(np.shape(self.exposure_schedule)) == 2 and np.shape(self.exposure_schedule)[1] == 24 :
# split an array of multiple schedules into a list of single schedule arrays
self.exposure_schedule = np.split(self.exposure_schedule,np.shape(self.exposure_schedule)[0])
else :
raise ValueError("Exposure schedule not a comprehensible numpy array, " +
"must be length 24 in first or second dimension")
elif isinstance(self.exposure_schedule,list) :
if len(self.exposure_schedule) == 24 and all(isinstance(x,(int,float)) for x in self.exposure_schedule) :
self.exposure_schedule = [np.array(self.exposure_schedule)]
for i in range(len(self.exposure_schedule)) :
if isinstance(self.exposure_schedule[i],float) :
self.exposure_schedule[i] = np.repeat(self.exposure_schedule[i],24)
elif isinstance(self.exposure_schedule[i],int) :
temp = self.exposure_schedule[i]
self.exposure_schedule[i] = np.zeros(24)
self.exposure_schedule[i][temp] = 1
elif isinstance(self.exposure_schedule[i],dict) :
temp = self.exposure_schedule[i]
self.exposure_schedule[i] = np.zeros(24)
for x in temp.items() :
self.exposure_schedule[i][int(x[0])] = x[1]
elif isinstance(self.exposure_schedule[i],np.ndarray) :
if not (len(np.shape(self.exposure_schedule[i])) == 1
and np.shape(self.exposure_schedule[i])[0] == 24 ):
raise ValueError("Exposure schedule list contains an incomprehensible entry, " +
"a numpy array that is not length 24")
elif isinstance(self.exposure_schedule[i],list) :
if len(self.exposure_schedule[i]) == 24 :
self.exposure_schedule[i] = np.array(self.exposure_schedule[i])
else :
raise ValueError("Exposure schedule list contains an incomprehensible entry, " +
"a list that is not length 24")
else :
raise TypeError("Exposure schedule list contains an incomprehensible entry")
else :
raise TypeError("Exposure schedule must be a list of length-24 numpy arrays or similar")
######################################################################################################
if hasattr(self,'year_selection') and self.year_selection is not None :
if isinstance(self.year_selection,int) :
if self.year_selection==0:
self.year_selection = [np.array([x]) for x in self.dataset_years]
else:
self.year_selection = [np.array([self.year_selection])]
elif isinstance(self.year_selection,np.ndarray) :
if len(np.shape(self.year_selection)) == 1 :
self.year_selection = [self.year_selection]
else :
raise ValueError("Year selection should be a list of numpy arrays, " +
"provided numpy array has incomprehensible shape")
elif isinstance(self.year_selection,list) :
if all([isinstance(x,int) for x in self.year_selection]) and all(x!=0 for x in self.year_selection) :
self.year_selection = [np.array(self.year_selection)]
else :
i=0
for k in range(len(self.year_selection)) :
if isinstance(self.year_selection[i],int) :
if self.year_selection[i] == 0 :
temp = self.year_selection[0:i] + [np.array([x]) for x in self.dataset_years]
if i != len(self.year_selection)-1 :
temp = temp + self.year_selection[i+1:]
self.year_selection = temp
i = i + len(self.dataset_years) - 1
else :
self.year_selection[i] = np.array([self.year_selection[i]])
elif isinstance(self.year_selection[i],list) :
self.year_selection[i] = np.array(self.year_selection[i])
elif not isinstance(self.year_selection[i],np.ndarray) :
raise TypeError("Year selection list must contain ints, lists, or numpy arrays")
i=i+1
else :
raise TypeError("Year selection must be an int, numpy array, or list of numpy arrays")
for i in range(len(self.year_selection)) :
if all(self.year_selection[i] == 0) :
self.year_selection[i] = np.array(self.dataset_years)
#####################################################################################################
if hasattr(self,'units') and self.units is not None :
if isinstance(self.units,str) :
self.units = [self.units]
elif isinstance(self.units,list) :
if not all(isinstance(x,str) for x in self.units) :
raise TypeError("Units input must be a list of strings")
else :
raise TypeError("Units input must be a list of strings")
for i in range(len(self.units)) :
if not isinstance(self.units[i],str) :
raise TypeError("Units input must be a list of strings")
if self.units[i] not in ["SED","UVIh","UVI","J m-2","W m-2","mW m-2"] :
raise ValueError("Units input must be list of accepted unit strings, " +
"those being SED, UVIh, J m-2, UVI, W m-2, or mW m-2")
if hasattr(self,'bin_width') :
if self.bin_width is None :
self.bin_width = []
for unit in self.units :
self.bin_width.append({
"SED" : 0.1,
"J m-2" : 10,
"UVI" : 0.1,
"W m-2" : 0.0025,
"mW m-2" : 2.5
}[unit])
elif isinstance(self.bin_width,(int,float)) :
self.bin_width = [self.bin_width]
return self
[docs] def collect_data(self,
day_selection,
exposure_schedule=[1.0],
year_selection=[0],
units=["SED"],
bin_width=None):
"""Loads data into multiple pixel histograms
This function loads all of the necessary data and compiles it into one or
multiple histograms. All parameters are designed to be interpreted as lists
of arrays or lists of strings, where each list entry corresponding to a
different histogram in the sequence. So to create a sequence of maps
corresponding to the months of the year, the day_selection input would be a
list of 12 arrays, the first containing numbers from 1 to 31, the second
containing numbers from 32 to 59, and so on.
The user specifies the day_selection and the year_selection as two separate
numerical inputs, rather than specifying dates. This make the interpretation
of the sequence simpler. However, to simplify the user experience, the
day_selection input can include keywords to be automatically interpreted as
days of the year.
Parameters
----------
day_selection : list, str, array
A list of arrays and/or strings. Keywords interpretable in such a list
include the (english) names of the 12 months (at least the first three
letters), the names of the four seasons (fall or autumn is accepted),
or the words "year" or "annual" to indicate the full year. These
keywords are replaced by the corresponding array of days in the year.
Note that the 29th of February is removed from consideration should it
arise. Note also that the seasons are the meteorological seasons, i.e.
the three month blocks JJA, SON, DJF, and MAM for summer, autumn,
winter, and spring respectively.
The user can alternatively enter a special string instead of a list.
The string "monthly" generates a list of 12 arrays according to the
months whereas the string "seasons" generates a list of 4 arrays
according to the four seasons.
exposure_schedule : list, float, int, dict, array
If the user enters a float, this float value will be repeated across a
length-24 array to make the exposure schedule. For example, entering
1.0 (not 1) will generate an array of 24 ones.
If the user enters an int, i, a length-24 array of zeroes will be
generated with the ith entry being set to 1. For example, entering 1
(not 1.0) will generate an array that reads [0,1,0,0...] (length 24).
If the user enters a dict, they can specify the values of a few
particular entries in a length-24 array where unspecified entries have
a value of zero. For example, entering {0:0.5, 2:0.8, 3:1} will
generate and array the reads [0.5, 0, 0.8, 1, 0...] (length 24).
If the user enters an array, it must be 1 dimensional with length 24
or 2 dimensional with the second dimension having length 24 (allowing
the user to specify multiple schedules).
If the user enters a list, each entry of that list is interpreted using
the rules listed above, with the caveat that arrays within a list cannot
be 2 dimensional.
year_selection : list, array, int
The years across which the data should be pulled. Input should be a list
of arrays of ints corresponding to years available in the dataset. Each
list entry corresponds to a pixel histogram. The user can enter 0 as a
shortcut for using all available years. For example, an input might be
[numpy.arange(2010,2020),[0],0]. The first list entry is an array of a
decade of years, straightforward enough. The second list entry is [0].
This is equivalent to writing numpy.arange(2004,2021) i.e. it produces
an array of the available years of the dataset. The last entry is 0,
this produces a sequence of individual years. So the input could be
equivalently written as [numpy.arange(2010,2020),numpy.arange(2004,2021),
numpy.array([2004]),numpy.array([2005]),numpy.array([2006])...] and so
on until 2020.
units : list, optional
The list of units for each pixel histogram. Acceptable strings are "SED",
"J m-2", "UVIh", "UVI", "W m-2", and "mW m-2". Defaults to SED.
bin_width : list, optional
The bin width for each histogram. By default, these values are defined
automatically according to the units input.
Returns
-------
ExposureMapSequence
The object has the hist_specs and hists fields added detailing the pixel
histograms.
Example
-------
In this example, we produce a basic sequence of monthly average doses for 2020::
example = ExposureMapSequence()
example = example.collect_data('monthly',year_selection=[2020],units=["SED"])
example = example.calculate_maps(statistic='Mean')
example.save_maps(save=True,show=True)
In this example, we produce a basic sequence of annual average doses for each year
of the dataset::
example = ExposureMapSequence()
example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
example = example.calculate_maps(statistic='Mean')
example.save_maps(save=True,show=True)
"""
# this subroutine handles keyword inputs (monthly, seasonal, etc)
self.day_selection, self.day_input_flt, self.day_nonstring = str2daysofyear(day_selection)
self.exposure_schedule = exposure_schedule
self.year_selection = year_selection
if units is not None :
self.units = units
if bin_width is not None :
self.bin_width = bin_width
self = self.interpret_parameters()
############################################################################
lengths = {'day_selection' : len(self.day_selection),
'exposure_schedule' : len(self.exposure_schedule),
'year_selection' : len(self.year_selection),
'units' : len(self.units),
'bin_width' : len(self.bin_width)}
self.num_hists = max(lengths.items(), key=lambda x: x[1])[1]
assert all(x == self.num_hists or x == 1 for x in lengths.values()), (
"Inputs must be lists of length 1 or num_hists")
self.iterators = [x[0] for x in lengths.items() if x[1]==self.num_hists]
self.hist_specs = []
for i in range(self.num_hists) :
hist_spec = {
'day_selection' : self.day_selection[0],
'exposure_schedule' : self.exposure_schedule[0],
'year_selection' : self.year_selection[0],
'units' : self.units[0],
'bin_width' : self.bin_width[0]}
for x in self.iterators :
hist_spec[x] = self.__dict__[x][i]
self.hist_specs = self.hist_specs + [hist_spec]
# find unique years to be loaded (probably all years but have to check)
unique_years = set(self.year_selection[0])
if len(self.year_selection) > 1 :
for i in range(1,len(self.year_selection)) :
unique_years.update(self.year_selection[i])
unique_years = sorted(unique_years)
# declare empty hists
self.hists = [None for x in range(self.num_hists)]
for i in range(len(unique_years)) :
year = unique_years[i]
print("Processing year "+str(year)) #should use logging, don't yet know how
dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year)))
dataset.set_auto_mask(False) #to get normal arrays (faster than default masked arrays)
if i == 0 :
# TODO: this should also be done by some initial dataset analysis, but that's a drastic
# design overhaul
self.lat = dataset['lat'][:]
self.lon = dataset['lon'][:]
# now to determine the unique days for the specific year
unique_days = set()
for j in range(self.num_hists) :
if year in self.hist_specs[j]['year_selection'] :
unique_days.update(self.hist_specs[j]['day_selection'])
unique_days = sorted(unique_days)
# TODO: when metadata fixed, update this to actually interpret dates (cftime)
# reformat to index for netCDF
nc_day_sel = [False for i in range(365*24)]
# reshape false array to have first dimension 24 (hours in day)
nc_day_sel = assert_data_shape_24(nc_day_sel)
# set the appropriate days as true
nc_day_sel[:,np.array(unique_days)-1] = True
# correct for leap years (skip feb 29)
if year % 4 == 0 :
nc_day_sel = np.concatenate(
(nc_day_sel[:,0:59],np.full((24,1),False),nc_day_sel[:,59:]),axis=1)
# flatten time_subset array back to one dimension
nc_day_sel = nc_day_sel.flatten(order='F')
#load data
data_year = assert_data_shape_24(dataset['UV_AS'][nc_day_sel,:,:])
#sort data into histograms
for j in range(self.num_hists) :
if year in self.hist_specs[j]['year_selection'] :
sub_day_sel = [ True if x in self.hist_specs[j]['day_selection']
else False for x in unique_days ]
temp_data = data_year[:,sub_day_sel,:,:]
# Apply the exposure schedule, differently for doses vs intensity
if self.hist_specs[j]['units'] in ["SED","J m-2","UVIh"] :
# if calculating doses
print(' Calculating doses')
temp_data = np.sum(np.reshape(
self.hist_specs[j]['exposure_schedule'],[24,1,1,1]) * temp_data,axis=0)
# more complex when doing intensity
else :
# assume elsewise calculating intensity (i.e. UV-index) then limit data selection
# to schedule (remembering that default schedule is just ones)
print(' Slicing data with exposure schedule')
# select only those hours with nonzero entry in exposure schedule
temp_data = temp_data[self.hist_specs[j]['exposure_schedule'] != 0,:,:,:]
# select nonzero values from exposure schedule
exposure_schedule_nonzero = self.hist_specs[j]['exposure_schedule'][
self.hist_specs[j]['exposure_schedule'] != 0]
# if any nonzero entries aren't 1, multiply data accordingly
if (exposure_schedule_nonzero != 1).any() :
temp_data *= np.reshape(exposure_schedule_nonzero,[len(exposure_schedule_nonzero),1,1,1])
# recombine first two dimensions (hour and day) back into time ready for histogram
temp_data = assert_data_shape_24(temp_data,reverse=True)
# now multiply data by conversion factor according to desired untis
# TODO: Should expand upon this in reference files
temp_data *= {"SED":0.9, "J m-2":90, "UVIh":1, "UVI":1, "W m-2":0.025, "mW m-2":25}[self.hist_specs[j]['units']]
# if this is the first iteration, declare a hist
if 'num_bins' not in self.hist_specs[j] :
# seems like useful metadata to know bin n and edges
self.hist_specs[j]['num_bins'] = int(np.nanmax(temp_data) // self.hist_specs[j]['bin_width'] ) + 2
self.hist_specs[j]['bin_edges'] = (np.array(range(self.hist_specs[j]['num_bins']+1))
- 0.5) * self.hist_specs[j]['bin_width']
# this form allows for weird custom bin edges, but probably will never use that
self.hist_specs[j]['bin_centers'] = (self.hist_specs[j]['bin_edges'][:-1]
+ 0.5 * np.diff(self.hist_specs[j]['bin_edges']))
# TODO: think about possible cases where dimensions could differ
self.hists[j]=np.zeros([self.hist_specs[j]['num_bins'],
np.shape(temp_data)[-2],np.shape(temp_data)[-1]], dtype=np.int16)
else :
new_num_bins = int(np.nanmax(temp_data) // self.hist_specs[j]['bin_width']) + 2 - self.hist_specs[j]['num_bins']
# check if new data requires extra bins in pix_hist
if new_num_bins > 0 :
# append zeros to pix hist to make room for larger values
self.hists[j] = np.concatenate((self.hists[j],np.zeros(
[new_num_bins,np.shape(self.hists[j])[-2],np.shape(self.hists[j])[-1]],
dtype=np.int16)),axis=0)
# update bin information
self.hist_specs[j]['num_bins'] = self.hist_specs[j]['num_bins'] + new_num_bins
self.hist_specs[j]['bin_edges'] = (np.array(range(self.hist_specs[j]['num_bins']+1))
- 0.5) * self.hist_specs[j]['bin_width']
self.hist_specs[j]['bin_centers'] = (self.hist_specs[j]['bin_edges'][:-1]
+ 0.5 * np.diff(self.hist_specs[j]['bin_edges']))
# TODO: Add check in case bins get "full" (i.e. approach int16 max value)
# now put data into hist using apply_along_axis to perform histogram for each pixel
print(" Calculating and adding to pixel histograms")
self.hists[j][:,:,:] += np.apply_along_axis(lambda x:
np.histogram(x,bins=self.hist_specs[j]['bin_edges'])[0],0,temp_data)
return self
[docs] def calculate_maps(self,statistic=None,titles=None,filenames="auto") :
"""Calcualte the maps from the pixel histograms
This function calculates maps from the pixel histograms and generates
titles and filenames for each map. Note that the number of maps can
be greater than the number of pixel histograms if more than one
statistic is specified.
Parameters
----------
statistic : list, str
The statistical descriptor to be calculated from the pixel histograms to be later
represented on the rendered map. Must contain at least one of these keywords:
"mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or
"minimum". The keyword "prct" or "percentile" is also accepted so long as it is
preceded by a two-digit integer specifying the desired percentile from 01 to 99.
*Planned:* the string can be a formula using any of the keywords above, and
basic mathematical operators (+, -, *, /, **) and numeric factors.
titles : list, optional
If the user does not wish to use the automatically generated map titles,
they can enter them with this parameter. This must be a list of strings
with a length equal to the number of maps produced.
filenames : str, optional
Filenames are generated to match the titles by default, but the user can
alternatively enter them manually with this parameter.
Returns
-------
ExposureMapSequence
The object is appended with maps, map_specs, and num_maps fields.
Example
-------
In this example, we produce a basic sequence of annual average doses for each year
of the dataset::
example = ExposureMapSequence()
example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
example = example.calculate_maps(statistic='Mean')
example.save_maps(save=True,show=True)
"""
if statistic is not None :
self.statistic = statistic
if isinstance(self.statistic,str) :
self.statistic = [self.statistic]
# declare array of nans to fill with maps
self.maps = np.full([self.num_hists * len(self.statistic)] +
list(np.shape(self.hists[0])[1:]),np.nan)
if titles is not None :
self.titles = titles
else :
self.titles = [str(x) for x in range(self.num_hists * len(self.statistic))]
if isinstance(filenames,str) and filenames == "auto" :
self.filenames = [str(x) for x in range(self.num_hists * len(self.statistic))]
else :
self.filenames = filenames
mapnum = 0
hist_inds = []
stat_inds = []
for i in range(len(self.statistic)) :
for j in range(self.num_hists) :
self.maps[mapnum,:,:] = calculate_map_from_hists(
self.hists[j],self.statistic[i],self.hist_specs[j]['bin_centers'])
if titles is None :
if filenames == "auto" :
self.titles[mapnum], self.filenames[mapnum] = gen_map_title(**{
**self.hist_specs[j],
'statistic':self.statistic[i]},filename=True)
else :
self.titles[mapnum] = gen_map_title(**{
**self.hist_specs[j],
'statistic':self.statistic[i]},filename=False)
hist_inds = hist_inds + [j]
stat_inds = stat_inds + [i]
mapnum += 1
self.num_maps = mapnum
self.map_specs = {'hist' : hist_inds, 'statistic' : stat_inds}
return self
[docs] def save_maps(self,map_options=None,save=None,show=True,match_cmap_limits=True,schedule_diagram=True) :
"""Renders and saves the pre-calculated maps stored in the object
With the maps calculated, this function renders the maps with broad flexibility on aesthetic
options. It is mostly a wrapper for the render_map function.
Parameters
----------
map_options : dict, optional
A dictionary containing options for the render map function.
save : bool, optional
Although technically contained within map_options, this option is here so users can
more easily say whether they want the images to be saved or not.
show : bool, optional
An option to show the maps in a python figure window or not.
match_cmap_limits : bool, optional
When producing multiple maps, it can sometimes be desirable for the colormap limits
to be consistent across the set of images. This boolean enables that.
schedule_diagram : bool, optional
If true, a circular diagram is rendered on the map illustrating the schedule that
generated the map.
"""
if map_options is not None :
self.map_options.update(map_options)
if save is not None and isinstance(save,bool) :
self.map_options['save'] = save
if match_cmap_limits :
self.map_options['cmap_limits'] = [np.nanmin(self.maps),np.nanmax(self.maps)]
if self.map_options['cmap_limits'][0] < 0.1 * self.map_options['cmap_limits'][1] :
self.map_options['cmap_limits'][0] = 0
for i in range(self.num_maps) :
opts = self.map_options
opts['title'] = self.titles[i]
if self.filenames is not None :
opts['img_filename'] = self.filenames[i]
if schedule_diagram :
opts['schedule'] = self.hist_specs[self.map_specs['hist'][i]]['exposure_schedule']
render_map(
self.maps[i,:,:],
lat=self.lat,
lon=self.lon,
cbar_label=self.hist_specs[self.map_specs['hist'][i]]['units'],
show=show,
**opts)
[docs]def render_map(map,
lat=None,
lon=None,
title=None,
save=True,
show=True,
schedule=None,
schedule_bbox=(-0.03,0,1,0.91),
img_filename=None,
img_dir="",
img_size=[20,15],
img_dpi=300,
img_filetype="png",
brdr_nation=True,
brdr_nation_rgba=[0,0,0,1],
brdr_state=True,
brdr_state_rgba=[0,0,0,0.75],
cmap="gist_ncar",
cmap_limits=None,
cbar=True,
cbar_limits=None,
cbar_label=None,
country_focus="CHE",
gridlines=True,
gridlines_dms=False,
mch_logo=True) :
"""Renders and saves maps
Renders and saves maps with a wide variety of aesthetic options.
Parameters
----------
map : array
The map to be rendered
lat : [type], optional
[description], by default None
lon : [type], optional
[description], by default None
title : [type], optional
[description], by default None
save : bool, optional
[description], by default True
show : bool, optional
[description], by default True
schedule : [type], optional
[description], by default None
schedule_bbox : tuple, optional
[description], by default (-0.03,0,1,0.91)
img_filename : [type], optional
[description], by default None
img_dir : str, optional
[description], by default ""
img_size : list, optional
[description], by default [20,15]
img_dpi : int, optional
[description], by default 300
img_filetype : str, optional
[description], by default "png"
brdr_nation : bool, optional
[description], by default True
brdr_nation_rgba : list, optional
[description], by default [0,0,0,1]
brdr_state : bool, optional
[description], by default True
brdr_state_rgba : list, optional
[description], by default [0,0,0,0.75]
cmap : str, optional
[description], by default "gist_ncar"
cmap_limits : [type], optional
[description], by default None
cbar : bool, optional
[description], by default True
cbar_limits : [type], optional
[description], by default None
cbar_label : [type], optional
[description], by default None
country_focus : str, optional
[description], by default "CHE"
gridlines : bool, optional
[description], by default True
gridlines_dms : bool, optional
[description], by default False
mch_logo : bool, optional
[description], by default True
"""
# TODO: Add custom sizing and resolution specifications
fig = plt.figure(figsize=(img_size[0]/2.54,img_size[1]/2.54))
# TODO: Accept custom projections
# proj = ccrs.Mercator()
proj = ccrs.Orthographic(central_longitude=(lon[0]+lon[-1])/2, central_latitude=(lat[0]+lat[-1])/2)
# TODO: Add support for multiple plots per figure (too complex? consider use cases)
ax = fig.add_subplot(1,1,1,projection = proj)
# TODO: Increase flexibility of borders consideration
if brdr_state :
state_brdrs = cfeat.NaturalEarthFeature(
category='cultural',
name='admin_1_states_provinces_lines',
scale='10m',
facecolor='none')
ax.add_feature(state_brdrs,linestyle="--",edgecolor=tuple(brdr_state_rgba),linewidth=0.5)
if brdr_nation :
ax.add_feature(cfeat.BORDERS,edgecolor=tuple(brdr_nation_rgba))
if country_focus is not None :
shpfilename = shapereader.natural_earth(resolution='10m',
category='cultural',name='admin_0_countries')
reader = shapereader.Reader(shpfilename)
countries = reader.records()
# this is a very janky search for Switzerland, but it's ultimately simpler than
# making geopandas a requirement for the library
for country in countries :
if country.attributes['ADM0_A3'] == country_focus :
break
assert country.attributes['ADM0_A3'] == country_focus, "country_focus input not recognised"
poly = country.geometry
msk_proj = proj.project_geometry (poly, ccrs.Geodetic()) # project geometry to the projection used by stamen
# plot the mask using semi-transparency (alpha=0.65) on the masked-out portion
ax.add_geometries( msk_proj, proj, facecolor='white', edgecolor='none', alpha=0.8)
# TODO: Consider first-last versus min-max - how can we avoid accidentally flipping images
extents=[lon[0],lon[-1],lat[0],lat[-1]]
ax.set_extent(extents,crs=ccrs.Geodetic())
# this code correctly translate the lat/lon limits into the projected coordinates
extents_proj = proj.transform_points(ccrs.Geodetic(),np.array(extents[:2]),np.array(extents[2:]))
extents_proj = extents_proj[:,:2].flatten(order='F')
if gridlines :
ax.gridlines(draw_labels=True, dms=gridlines_dms, x_inline=False, y_inline=False,linewidth=0.25,
ylocs=[46,46.5,47,47.5])
# TODO: Custom colormaps, interpolation, cropping
# Upscale matrix for better reprojection
# f = interp2d(lon, lat, map, kind='linear')
# latnew = np.linspace(lat[0], lat[-1], (len(lat)-1)*3+1)
# lonnew = np.linspace(lon[0], lon[-1], (len(lon)-1)*3+1)
# mapnew = f(lonnew, latnew)
# Upscale matrix for better reprojection
mapnew = zoom(map,3)
# show map with given cmap and set cmap limits
im = ax.imshow(mapnew,extent=extents,transform=ccrs.PlateCarree(),
origin='lower',cmap=cmap)
if cmap_limits is not None :
im.set_clim(cmap_limits[0],cmap_limits[1])
# colorbar
# TODO: Add support for horizontal vertical option
if cbar :
cb = plt.colorbar(im, ax=ax, orientation='horizontal',pad=0.05,fraction=0.05)
cb.ax.set_xlabel(cbar_label)
# show schedule diagram
if schedule is not None :
ax2 = inset_axes(ax, width="25%", height="25%", loc=2,
axes_class = get_projection_class('polar'),
bbox_to_anchor=tuple(schedule_bbox),
bbox_transform=ax.transAxes)
schedule_clock(ax2,schedule,title="Exposure schedule")
# TODO: Add more advanced title interpretation (i.e. smart date placeholder)
if title is not None :
ax.set_title(title)
if mch_logo :
ex = ax.get_extent()
mch_logo_img = plt.imread('python_tamer/mch_logo.png')
mch_logo_width = 0.15
mch_logo_pad = 0
# some maths to work out position, note image aspect ratio 5:1
mch_extents = [ex[1]-(ex[1]-ex[0])*mch_logo_width-(ex[1]-ex[0])*mch_logo_pad,
ex[1]-(ex[1]-ex[0])*mch_logo_pad,
ex[2]+(ex[3]-ex[2])*mch_logo_pad,
ex[2]+0.2*(ex[1]-ex[0])*mch_logo_width+(ex[3]-ex[2])*mch_logo_pad]
# zorder puts image on top (behind mask otherwise for some reason)
ax.imshow(mch_logo_img,extent=mch_extents,zorder=12)
# TODO: Add plot title, small textbox description, copyright from dataset, ticks and gridlines
if save :
# Generate timestamp filename if relying on default
if img_filename is None :
if title is not None :
img_filename = format_filename(title)
else :
img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
elif img_filename == "timestamp" :
img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
plt.savefig(img_dir+img_filename+"."+img_filetype,
bbox_inches="tight",dpi=img_dpi)
if show :
plt.show()
[docs]def schedule_clock(axes,schedule,title=None,title_size=9,center=0.25,rmax=1) :
"""Generates a clock-style representation of an exposure schedule
[extended_summary]
Parameters
----------
axes : matplotlib.axes.Axes
The polar axes upon which the clock will be plotted
schedule : list or numpy.ndarray
The exposure schedule - a length-24 vector of hourly exposure proportions
title : str, optional
Title of the exposure clock
title_size : int, optional
[description], by default 9
center : float, optional
[description], by default 0.25
rmax : int, optional
[description], by default 1
Returns
-------
[type]
[description]
"""
axes.bar(
np.arange(24)/24*2*np.pi,
schedule,
width=2*np.pi/24,
align='edge',
bottom=center)
axes.bar(0,0.25,width=2*np.pi,color='k')
# Set the circumference labels
axes.set_xticks(np.linspace(0, 2*np.pi, 8, endpoint=False))
axes.set_xticklabels(np.linspace(0, 24, 8, endpoint=False,dtype=int),fontsize=8)
axes.tick_params(axis='both',which='major', pad=-3)
axes.set_yticks([0.5+center])
axes.set_yticklabels(['0.5'],fontsize=5)
axes.grid(True,color='black',linewidth=0.25)
# Make the labels go clockwise
axes.set_theta_direction(-1)
# Place 0 at the top
axes.set_theta_offset(np.pi/2)
# format grid and fake ticks
for t in np.linspace(0, 2*np.pi, 24, endpoint=False):
axes.plot([t,t], np.array([0.95,1.1])*rmax+center, lw=0.5, color="k")
for t in np.linspace(0, 2*np.pi, 8, endpoint=False):
axes.plot([t,t], np.array([0.9,1.1])*rmax+center, color="k")
if title is not None :
axes.set_title(title,fontsize=title_size)
axes.set_rmax(rmax+center)
return axes
[docs]def gen_map_title(
statistic = None,
exposure_schedule=None,
hour=None,
units=None,
year_selection=None,
day_selection=None,
filename=False,
**kwargs) :
if units in ['SED','J m-2','UVIh'] :
if all(exposure_schedule == np.ones(24)) :
title = 'UV daily doses'
else :
title = 'UV doses'
elif units in ['W m-2','UVI','mW m-2'] :
if np.sum(exposure_schedule)==1 and all(x in [0,1] for x in exposure_schedule) :
#user chosen just one hour
hour=np.where(exposure_schedule)[0][0]
title = 'UV intensity between ' + str(hour) + 'h-' + str(hour+1) + 'h'
else :
title = 'UV intensity'
else :
raise ValueError('Units must be SED, J m-2, UVIh, UVI, W m-2, or mW m-2')
title = statistic + ' of ' + title
ayear = pd.date_range(start="2010-01-01",end="2010-12-31")
ds = {'year' : ayear.dayofyear.values.tolist()}
ds['winter (DJF)'] = [x for i in [12,1,2] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
ds['spring (MAM)'] = [x for i in [3,4,5] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
ds['summer (JJA)'] = [x for i in [6,7,8] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
ds['autumn (SON)'] = [x for i in [9,10,11] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
ds['January'] = ayear[ayear.month == 1].dayofyear.values.tolist()
ds["February"] = ayear[ayear.month == 2].dayofyear.values.tolist()
ds["March"] = ayear[ayear.month == 3].dayofyear.values.tolist()
ds["April"] = ayear[ayear.month == 4].dayofyear.values.tolist()
ds["May"] = ayear[ayear.month == 5].dayofyear.values.tolist()
ds["June"] = ayear[ayear.month == 6].dayofyear.values.tolist()
ds["July"] = ayear[ayear.month == 7].dayofyear.values.tolist()
ds["August"] = ayear[ayear.month == 8].dayofyear.values.tolist()
ds["September"] = ayear[ayear.month == 9].dayofyear.values.tolist()
ds["October"] = ayear[ayear.month == 10].dayofyear.values.tolist()
ds["November"] = ayear[ayear.month == 11].dayofyear.values.tolist()
ds["December"] = ayear[ayear.month == 12].dayofyear.values.tolist()
day_str = None
for item in ds.items() :
if set(day_selection) == set(item[1]) :
day_str = item[0]
break
if day_str == 'year' :
if len(year_selection) == 1 :
title = title + ' for the year of ' + str(year_selection[0])
elif all(np.diff(year_selection)==1) :
title = title + ' for the years ' + str(np.min(year_selection)) + '-' + str(np.max(year_selection))
else :
title = title + ' for the years: ' + np.array2string(year_selection,separator=', ')
elif day_str is not None :
title = title + ' for ' + day_str
if len(year_selection) == 1 :
title = title + ' ' + str(year_selection[0])
elif all(np.diff(year_selection)==1) :
title = title + ', ' + str(np.min(year_selection)) + '-' + str(np.max(year_selection))
else :
title = title + ' ' + np.array2string(year_selection,separator=', ')
else :
# TODO: potentially make this workable with "custom day selection" placeholder in title
raise ValueError("Day selection not recognised, auto-title cannot proceed")
if filename :
custom = False
filename = "UV." + units + '.' + statistic + '.'
if len(year_selection) == 1 :
filename = filename + str(year_selection[0]) + '.'
elif all(np.diff(year_selection)==1) :
filename = filename + str(np.min(year_selection)) + '-' + str(np.max(year_selection)) + '.'
else :
filename = filename + str(year_selection[0]) + '-custom' + '.'
custom = True
day_str_filenamer = {
"January" : "01",
"February" : "02",
"March" : "03",
"April" : "04",
"May" : "05",
"June" : "06",
"July" : "07",
"August" : "08",
"September" : "09",
"October" : "10",
"November" : "11",
"December" : "12",
"winter (DJF)" : "s-",
"spring (MAM)" : "s-",
"summer (JJA)" : "s-",
"autumn (SON)" : "s-",
"year" : "year"}
filename = filename + day_str_filenamer[day_str]
if hour is not None :
filename = filename + '.' + str(hour) + 'h'
if custom :
filename = filename + '.created_' + dt.datetime.now().strftime('%Y%m%d_%H%M%S')
filename = format_filename(filename)
return title, filename
else :
return title
[docs]def calculate_map_from_hists(pix_hist,statistic,bin_centers) :
# Begin by defining the easy options that only require two inputs
basic_descriptor_functions = {
"mean": hist_mean,
"median": lambda x,y: hist_percentile(x,y,0.5),
"med": lambda x,y: hist_percentile(x,y,0.5),
"sd": hist_stdev,
"std": hist_stdev,
"stdev": hist_stdev,
"max": hist_max,
"maximum": hist_max,
"min": hist_min,
"minimum":hist_min
}
# we can check if the chosen statistic is basic or advanced
if statistic.lower() in basic_descriptor_functions.keys() :
# in this case, we can simply select the basic function from the dict...
descriptor_function = basic_descriptor_functions[statistic.lower()]
# ...and execute it across the map
map = np.apply_along_axis(lambda x: descriptor_function(x,bin_centers),0,pix_hist)
# TODO: a loose space could ruin this, need shunting yard algorithm of sorts
elif statistic.lower()[3:] == "prct" or statistic.lower()[3:] == "percentile" :
prct = int(statistic[0:2]) / 100
map = np.apply_along_axis(lambda x: hist_percentile(x,bin_centers,prct),0,pix_hist)
else :
# TODO: interpret self.statistic to build advanced functions (y i k e s)
raise ValueError("Statistic string not recognised")
return map