Source code for python_tamer.ExposureMap

from numpy.core.numeric import True_
import pandas as pd
import numpy as np
import datetime as dt
import os
import re
import netCDF4 as nc
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeat
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.projections import get_projection_class
from cartopy.io import shapereader
from scipy.interpolate import interp2d
from scipy.ndimage import zoom
from .subroutines import *


[docs]class ExposureMap:
    """ A class for calculating maps based on user specifications

    Each instance of this class contains information required to calculate and illustrate a map
    of exposure information, be that simple averages or more advanced mathematical representations
    of exposure risk. The class is designed to have three core functions run in sequence, with
    room for flexibility should more advanced users desire it. First, the data is read and a pixel
    histogram is calculated. This allows much more data to be stored in memory and is the basis
    for performing this kind of data analysis on personal computers.


    Parameters
    ----------
    
    units : str
        The units of the quantity to be mapped. Must be "SED", "J m-2" or "UVIh" for doses or "UVI",
        "W m-2" or "mW m-2" for irradiances. Defaults to "SED".

    exposure_schedule : array
        A vector of values describing the relative exposure of each hour of the day. 0 indicates no
        exposure, 1 indicates full exposure, and a fractional value such as 0.5 would indicate 
        exposure for a total of 30 minutes within the hour, or a 50% partial exposure for the full
        hour, or anything equivalent. Values greater than 1 are allowed. Must have a length of 24 (for 
        each hour of the day) although a length of 1 is also allowed, in which case that number will 
        be immediately replicated across a 24-length vector. When not calculating doses, hours with 
        any non-zero entry in this vector are included, with the irradiance values being 
        multiplied by the corresponding non-zero value in the exposure schedule.

    bin_width : float
        The width of each bin in the pixel histogram. Value assumed to be in the same units as 
        defined by the units parameter. *Making bin_width excessively small can lead to high
        memory usage,* consider the underlying accuracy of the source data and be sure not to
        substantially exceed its precision with this parameter.

    statistic : str
        The statistical descriptor to be calculated from the pixel histograms to be later 
        represented on the rendered map. Must contain at least one of these keywords:
        "mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or 
        "minimum". 

        *Planned:* the string can be a formula using any of the keywords above,
        as well at "prct" or "percentile" preceeded by a number between 0 and 100, and 
        basic mathematical operators (+, -, *, /, **) and numeric factors.

    date_selection : list of dates
        The dates for which the irradiances are retrieved or the daily doses are calculated. 
        Defaults to None whereby the program selects all data within the src_directory that
        matches the src_filename_format.

    src_filename_format : str
        Describes the filename of the netCDF files containing the data with 'yyyy' in place 
        of the year.
    
    src_directory : str
        The directory where the data is stored. Must end with a slash.


    Example
    -------

    The code below shows a typical use case for the ExposureMap class. The long-term average daily doses
    (i.e. the chronic doses) for typical school children are calculated across Switzerland asssuming certain
    hours of exposure for journeying to and from school and having breaks for morning tea and lunch time. ::

        import python_tamer as pt
        import pandas as pd
        import numpy as np
        src_directory = 'C:/enter_your_src_directory_here'
        ER = pt.ER_Vernez_2015("Forehead","Standing") # Long-term average ER for foreheads in standing posture
        map = pt.ExposureMap(
            src_directory=src_directory,
            units = "J m-2",
            exposure_schedule = np.array([0  ,0  ,0  ,0  ,0  ,0  ,
                                0  ,0  ,0.5,0  ,0.5,0  ,
                                0.5,0.5,0  ,0  ,0.5,0  ,
                                0  ,0  ,0  ,0  ,0  ,0  ])*ER,
            bin_width = 25,
            date_selection = pd.date_range(start="2005-01-01",end="2014-12-31"),
            statistic = "mean",
            map_options={"title": "Chronic daily UV dose for typical school children, 2005-2014",
                        "save": False})
        map = map.collect_data().calculate_map()
        map.plot_map()


    """

    def __init__(self,units="SED",
    exposure_schedule=1,
    statistic="mean",
    bin_width = None,
    date_selection=None,
    map_options=None,
    src_filename_format='UVery.AS_ch02.lonlat_yyyy01010000.nc',
    src_directory='C:/Data/UV/'):
        # assigning options to fields in class with a few basic checks
        self.units = units

        self.exposure_schedule=np.array(exposure_schedule)
        if len(np.atleast_1d(self.exposure_schedule)) == 1 :
            self.exposure_schedule = np.repeat(self.exposure_schedule,24)

        self.statistic = statistic

        self.map_options = {
            "title" : "Test map",
            "save" : True,
            "img_size" : [20,15],
            "img_dpi" : 300,
            "img_dir" : "",
            "img_filename" : "timestamp",
            "img_filetype" : "png",
            "brdr_nation" : True,
            "brdr_nation_rgba" : [0,0,0,0],
            "brdr_state" : False,
            "brdr_state_rgba" : [0,0,0,0.67],
            "cmap" : "jet",
            "cmap_limits" : None,
            "cbar" : True,
            "cbar_limits" : None
        }
        if map_options is not None :
            self.map_options.update(map_options)

        self.src_filename_format = src_filename_format
        self.src_directory = src_directory

        self.date_selection = date_selection

        if bin_width is None :
            self.bin_width = {
                "SED" : 0.1, 
                "J m-2" : 10, 
                "UVI" : 0.1, 
                "W m-2" : 0.0025, 
                "mW m-2" : 2.5
            }[self.units]
        else :
            self.bin_width = bin_width
        
    
[docs]    def collect_data(self, src_directory=None,src_filename_format=None,
    date_selection=None,units=None,exposure_schedule=None,bin_width=None) :
        """Loads and manipulates data into histograms for each pixel of the underlying data

        In order to handle large amounts of data without exceeding memory limitations, files are
        loaded one at a time and the time dimension is removed, either by calculating daily doses
        or by simply taking the data as is. The resulting information is then stored not as a 
        list of specific values but rather binned into a histogram for each pixel. This process
        is repeated for each file required by the user input, building up the pixel histograms
        with more information that does not require additional memory.


        Parameters
        ----------

        src_filename_format : str
            Describes the filename of the netCDF files containing the data with 'yyyy' in place 
            of the year.
        
        src_directory : str
            The directory where the data is stored. Must end with a slash.

        date_selection : list of dates
            The dates for which the irradiances are retrieved or the daily doses are calculated. 
            Defaults to None whereby the program selects all data within the src_directory that
            matches the src_filename_format.

        units : str
            Name of units of desired output. This also indicates whether daily doses must be 
            calculated or not. Units of "SED", "J m-2", or "UVIh" will produce daily doses,
            units of "UVI", "W m-2" or "mW m-2" will not. 

        exposure_schedule : array
            A vector of values describing the relative exposure of each hour of the day. 0 indicates no
            exposure, 1 indicates full exposure, and a fractional value such as 0.5 would indicate 
            exposure for a total of 30 minutes within the hour, or a 50% partial exposure for the full
            hour, or anything equivalent. Values greater than 1 are allowed. Must have a length of 24 (for 
            each hour of the day) although a length of 1 is also allowed, in which case that number will 
            be immediately replicated across a 24-length vector. When not calculating doses, hours with 
            any non-zero entry in this vector are included, with the irradiance values being 
            multiplied by the corresponding non-zero value in the exposure schedule.

        bin_width : float
            The width of each bin in the pixel histogram. Value assumed to be in the same units as 
            defined by the units parameter. *Making bin_width excessively small can lead to high
            memory usage,* consider the underlying accuracy of the source data and be sure not to
            substantially exceed its precision with this parameter.


        Returns
        -------

        python_tamer.ExposureMap
            The input ExposureMap object is appended with new fields, `pix_hist` contains
            the counts for the histogram, and `bin_edges`, `bin_centers`, and `num_bins`
            all serve as metadata for the pixel histograms. `lat` and `lon` are also 
            added from the multi-file dataset to inform the pixel locations for map making
            further down the typical pipeline.
        
        
        Example
        -------

        The example code below shows how an ExposureMap class can be declared with the default parameters that
        can then be later redefined by collect_data() and the other class functions. ::

            import python_tamer as pt
            import pandas as pd
            import numpy as np
            src_directory = 'C:/enter_your_src_directory_here'
            ER = pt.ER_Vernez_2015("Forehead","Standing") # Long-term average ER for foreheads in standing posture
            map = pt.ExposureMap()
            map = map.collect_data(
                src_directory=src_directory,
                units = "J m-2",
                exposure_schedule = np.array([0  ,0  ,0  ,0  ,0  ,0  ,
                                     0  ,0  ,0.5,0  ,0.5,0  ,
                                     0.5,0.5,0  ,0  ,0.5,0  ,
                                     0  ,0  ,0  ,0  ,0  ,0  ])*ER,
                bin_width = 25,
                date_selection = pd.date_range(start="2005-01-01",end="2014-12-31")
            )
            map = map.calculate_map(statistic = "mean")
            map.plot_map(map_options={"title": "Chronic daily UV dose for typical school children, 2005-2014",
                                      "save": False})


        """

        # TODO: There must be a better way to do this
        if not (src_directory is None) :
            self.src_directory = src_directory
        if not (src_filename_format is None) :
            self.src_filename_format = src_filename_format
        if not (date_selection is None) :
            self.date_selection = date_selection
        if not (units is None) :
            self.units = units
        if not (exposure_schedule is None) :
            self.exposure_schedule = exposure_schedule
        if not (bin_width is None) :
            self.bin_width = bin_width

        # first we read the src_directory to check the total number of unique years available
        data_dir_contents = os.listdir(self.src_directory)
        # TODO: improve jankiness of this format-matching search for filenames
        char_year = self.src_filename_format.find('yyyy')
        dataset_years = [ x for x in data_dir_contents if re.findall(self.src_filename_format.replace("yyyy","[0-9]{4}"),x)]
        dataset_years = [ int(x[char_year:char_year+4]) for x in dataset_years ]

        # Now we can handle default options like "all"
        if type(self.date_selection) == str and self.date_selection == "all" :
            date_selection = pd.date_range(start=str(dataset_years[0])+"-01-01",
                end=str(dataset_years[-1])+"-12-31")
        else :
            date_selection = self.date_selection # TODO: much more interpretation options here

        #now we find unique years 
        list_of_years = sorted(set(date_selection.year))

        for i in range(len(list_of_years)) :
            year = list_of_years[i]
            print("Processing year "+str(year)) #should use logging, don't yet know how
            dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year))) 
            dataset.set_auto_mask(False) #to get normal arrays (faster than default masked arrays)

            if dataset.dimensions['time'].size == 24 :
                # needed if just a single day
                time_subset = [True for i in range(dataset.dimensions['time'].size)]
            else :
                # Next we pull a subset from the netCDF file
                # declare false array with same length of time dimension from netCDF
                time_subset = [False for i in range(dataset.dimensions['time'].size)] 
                # reshape false array to have first dimension 24 (hours in day)
                time_subset = assert_data_shape_24(time_subset) 
                # set the appropriate days as true
                time_subset[:,date_selection[date_selection.year == year].dayofyear-1] = True 
                # flatten time_subset array back to one dimension
                time_subset = time_subset.flatten(order='F')

            # load subset of data
            print("   Slicing netcdf data with time subset")
            data = dataset['UV_AS'][time_subset,:,:] #work in UVI by default because it's easy to read
            # TODO: check units of dataset files, CF conventions for UVI or W/m2

            # now to calculate doses if requested
            if self.units in ["SED","J m-2","UVIh"] :
                # if calculating doses
                print('   Calculating doses')
                data = assert_data_shape_24(data)
                data = np.sum(np.reshape(self.exposure_schedule,[24,1,1,1]) * data,axis=0)

            elif (self.exposure_schedule != np.ones(24)).any() :
                # assume elsewise calculating intensity (i.e. UV-index) then limit data selection according
                # to schedule (remembering that default schedule is just ones)
                print('   Slicing data with exposure schedule')
                # reshape so first dimension is 24 hours
                data = assert_data_shape_24(data)
                # select only those hours with nonzero entry in exposure schedule
                data = data[self.exposure_schedule != 0,:,:,:]
                # select nonzero values from exposure schedule
                exposure_schedule_nonzero = self.exposure_schedule[self.exposure_schedule != 0]

                # if any nonzero entries aren't 1, multiply data accordingly
                if (exposure_schedule_nonzero != 1).any() :
                    data *= np.reshape(exposure_schedule_nonzero,[len(exposure_schedule_nonzero),1,1,1])

                # recombine first two dimensions (hour and day) back into time ready for histogram
                data = assert_data_shape_24(data,reverse=True) 

            # now multiply data by conversion factor according to desired untis
            # TODO: Should expand upon this in reference files
            data *= {"SED":0.9, "J m-2":90, "UVIh":1, "UVI":1, "W m-2":0.025, "mW m-2":25}[self.units]

            # if this is the first iteration, declare a hist
            if i == 0 :
                # seems like useful metadata to know bin n and edges
                # TODO: reconsider where this belongs in the code (__init__?)
                self.num_bins = int(np.nanmax(data) // self.bin_width ) + 2
                self.bin_edges = (np.array(range(self.num_bins+1)) - 0.5) * self.bin_width 
                # this form allows for weird custom bin edges, but probably will never use that
                self.bin_centers = self.bin_edges[:-1] + 0.5 * np.diff(self.bin_edges)

                # TODO: think about possible cases where dimensions could differ
                self.pix_hist=np.zeros([self.num_bins,
                    np.shape(data)[-2],np.shape(data)[-1]], dtype=np.int16)

                # TODO: this should also be done by some initial dataset analysis, but that's a drastic
                # design overhaul
                self.lat = dataset['lat'][:]
                self.lon = dataset['lon'][:]

            else :
                new_num_bins = int(np.nanmax(data) // self.bin_width) + 2 - self.num_bins
                # check if new data requires extra bins in pix_hist
                if new_num_bins > 0 :
                    # append zeros to pix hist to make room for larger values
                    self.pix_hist = np.concatenate((self.pix_hist,np.zeros(
                        [new_num_bins,np.shape(self.pix_hist)[-2],np.shape(self.pix_hist)[-1]],
                        dtype=np.int16)),axis=0)
                    # update bin information
                    self.num_bins = self.num_bins + new_num_bins
                    self.bin_edges = (np.array(range(self.num_bins+1)) - 0.5) * self.bin_width 
                    self.bin_centers = self.bin_edges[:-1] + 0.5 * np.diff(self.bin_edges)

            # TODO: Add check in case bins get "full" (i.e. approach int16 max value)
            # now put data into hist using apply_along_axis to perform histogram for each pixel
            print("   Calculating and adding to pixel histograms")
            self.pix_hist[:,:,:] += np.apply_along_axis(lambda x: 
                np.histogram(x,bins=self.bin_edges)[0],0,data)

        return self

[docs]    def calculate_map(self,pix_hist=None,statistic=None,bin_centers=None) :
        """Calculates statistical descriptor values for pixel histograms to produce a map

        This function interprets the statistic string, which can either be a simple command
        such as "mean" or a more advanced formula of keywords. The corresponding function is 
        applied to each pixel of the pix_hist object within the ExposureMap class, essentially
        removing the first dimension and resulting in straightforward map to be plotted.


        Parameters
        ----------

        pix_hist : array
            A 3D array with the first dimension containing vectors of counts for histograms
            and the next two dimensions serving as pixel coordinates. See 
            `ExposureMap.collect_data()` for more information.

        statistic : str
            The statistical descriptor to be calculated from the pixel histograms to be later 
            represented on the rendered map. Must contain at least one of these keywords:
            "mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or 
            "minimum". 

            *Planned:* the string can be a formula using any of the keywords above,
            as well at "prct" or "percentile" preceeded by a number between 0 and 100, and 
            basic mathematical operators (+, -, *, /, **) and numeric factors.
            
        bin_centers : array
            The central numeric values corresponding to the bins in pix_hist. The 
            `ExposureMap.collect_data` function typically calculates these values from the
            given `bin_width` input.


        Returns
        -------
        python_tamer.ExposureMap
            The ExposureMap class object is appended with a map field containing a 2D array


        Example
        -------

        In the example below, the user imports some pre-calculated pixel histograms, thereby
        completing the ExposureMap workflow without using the `ExposureMap.ExposureMap.collect_data()`
        function. This can be useful if the data collection is timely and the user wants to
        produce multiple different maps. Note that the "custom data" used in this example is not
        included in the python-TAMER package, this simply illustrates a unique use-case. ::

            import python_tamer as pt
            # load custom data from an external file (not included)
            from custom_user_data import pix_hist, bin_centers, map_options 
            map = pt.ExposureMap(map_options = map_options)
            map = map.calculate_map(
                statistic = "median", 
                pix_hist = data, 
                bin_centers = bin_centers
            ).plot_map(save = False)
            map.calculate_map(statistic = "max").plot_map(map_options={"save" = False})
            map.calculate_map(statistic = "std").plot_map(map_options={"save" = False})


        """

        if not (pix_hist is None) :
            self.pix_hist = pix_hist
        if not (statistic is None) :
            self.statistic = statistic
        if not (bin_centers is None) :
            self.bin_centers = bin_centers

        # Begin by defining the easy options that only require two inputs
        basic_descriptor_functions = {
            "mean": hist_mean,
            "median": lambda x,y: hist_percentile(x,y,0.5),
            "med": lambda x,y: hist_percentile(x,y,0.5),
            "sd": hist_stdev,
            "std": hist_stdev,
            "stdev": hist_stdev,
            "max": hist_max,
            "maximum": hist_max,
            "min": hist_min,
            "minimum":hist_min
        }
        # we can check if the chosen statistic is basic or advanced
        if self.statistic.lower() in basic_descriptor_functions.keys() :
            # in this case, we can simply select the basic function from the dict...
            descriptor_function = basic_descriptor_functions[self.statistic.lower()]
            # ...and execute it across the map
            self.map = np.apply_along_axis(lambda x: descriptor_function(x,self.bin_centers),0,self.pix_hist)
        # TODO: a loose space could ruin this, need shunting yard algorithm of sorts
        elif self.statistic.lower()[2:] == "prct" or self.statistic.lower()[2:] == "percentile" :
            prct = int(self.statistic[0:1]) / 100
            self.map = np.apply_along_axis(lambda x: hist_percentile(x,self.bin_centers,prct),0,self.pix_hist)
        else :
            # TODO: interpret self.statistic to build advanced functions (y i k e s)
            print("WARNING: ExposureMap.statistic not recognised.")
        

        return self

[docs]    def plot_map(self,map_options=None) :
        """Renders and optionally saves a map of the ``map`` field in an ExposureMap object

        This function caps off the typical workflow for the ExposureMap class by rendering the contents
        of the map field. Many aesthetic factors are accounted for, contained within the
        `ExposureMap.map_options` dictionary.


        Parameters
        ----------

        map_options : dict, optional
            A collection of many typical options such as image and font sizes, colormaps, etc.
            The full range of options is listed below with their default values.

            "title" : "Test map" 
            The title to be rendered above the map. Can be left blank for no title. Can be 
            used to inform img_filename

            "save" : True 
            Boolean to declare whether the map should be saved as an image file or not.
            
            "img_size" : [20,15] 
            The size [width,height] of the image in cm.

            "img_dpi" : 300 
            The dots per inch of the saved image.

            "img_dir" : "" 
            The directory for the image to be saved in, leaving it blank should result
            in images being saved in the working directory.

            "img_filename" : "timestamp" 
            The image filename as a string. The default value of "timestamp" is a keyword
            indicating that the function should generate a filename based on the time at
            the moment of the calculation, specified with the format %Y%m%d_%H%M%S_%f 
            which includes millisecond precision.

            "img_filetype" : "png" 
            The image filetype, must be acceptable to `matplotlib.pyplot.savefig()`.

            "brdr_nation" : True 
            Boolean for drawing national borders on the map.

            "brdr_nation_rgba" : [0,0,0,0] 
            The red, green, blue, and alpha values for the national borders.

            "brdr_state" : False 
            Boolean for drawing state borders as defined by Natural Earth dataset.

            "brdr_state_rgba" : [0,0,0,0.67] 
            The red, green, blue, and alpha values for the national borders.

            "cmap" : "jet" 
            The name of the colourmap to be used when rendering the map.

            "cmap_limits" : None 
            The numeric limits of the colourmap. Defaults to None, where the lower
            and upper limits of the plotted data are used as the colourmap limits.

            "cbar" : True 
            Boolean for rendering a colourbar.

            "cbar_limits" : None 
            The numeric limits of the colourbar. Defaults to None, where the lower
            and upper limits of the plotted data are used as the colourbar limits.
        

        Returns
        -------

        python_tamer.ExposureMap
            Returns the ExposureMap object that was input with an updated map_options
            field (if the user has specificied any changes to the default map_options).


        """

        if map_options is not None :
            self.map_options.update(map_options)

        # TODO: Add custom sizing and resolution specifications
        fig = plt.figure(figsize=(self.map_options['img_size'][0]/2.54,
            self.map_options['img_size'][1]/2.54))

        # TODO: Accept custom projections
        proj = ccrs.Mercator()

        # TODO: Add support for multiple plots per figure (too complex? consider use cases)
        ax = fig.add_subplot(1,1,1,projection = proj)

        # TODO: Increase flexibility of borders consideration
        if self.map_options['brdr_nation'] :
            ax.add_feature(cfeat.BORDERS)

        # TODO: Consider first-last versus min-max - how can we avoid accidentally flipping images
        extents=[self.lon[0],self.lon[-1],self.lat[0],self.lat[-1]]
        ax.set_extent(extents)

        # Confusingly, this code correctly translate the lat/lon limits into the projected coordinates
        extents_proj = proj.transform_points(ccrs.Geodetic(),np.array(extents[:2]),np.array(extents[2:]))
        extents_proj = extents_proj[:,:2].flatten(order='F')

        # TODO: Custom colormaps, interpolation, cropping
        im = ax.imshow(self.map,extent=extents_proj,transform=proj,origin='lower',
            cmap=self.map_options['cmap'],interpolation='bicubic')

        # TODO: Add more advanced title interpretation (i.e. smart date placeholder)
        if self.map_options['title'] is not None :
            ax.set_title(self.map_options['title'])

        # TODO: Add support for horizontal
        if self.map_options['cbar'] :
            cb = plt.colorbar(im, ax=ax, orientation='horizontal',pad=0.05,fraction=0.05)
            cb.ax.set_xlabel(self.units)

        # TODO: Add plot title, small textbox description, copyright from dataset, ticks and gridlines
        if self.map_options['save'] :
            # Generate timestamp filename if relying on default
            if self.map_options['img_filename'] == "timestamp" :
                img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')

            plt.savefig(self.map_options['img_dir']+img_filename+"."+self.map_options['img_filetype'],
                bbox_inches="tight",dpi=self.map_options['img_dpi'])

        plt.show()

        return self












[docs]class ExposureMapSequence :
    """ Class for generating multiple Exposure Maps in a single operation

    The ExposureMapSequence class is a framework for generating multiple maps following
    a given sequence. The basic workflow begins by declaring an object of this class and
    collecting the data from the source NetCDF files. The process is designed with 
    memory efficiency in mind, so data is loaded one year at a time and put into pixel
    histograms akin to the ExposureMap class behaviour. However, in this class we allow for
    multiple histograms to be stored within a single ExposureMapSequence object. Next,
    the maps are calculated by the calculate_maps function. Multiple maps can be calculated for each histogram if the user
    has specified multiple statistics that they want to calculate. Lastly, the maps are
    rendered and saved by the save_maps function.


    Parameters
    ----------

    src_filename_format : str
        Describes the filename of the netCDF files containing the data with 'yyyy' in place 
        of the year.
    
    src_directory : str
        The directory where the data is stored. Must end with a slash.


    Example
    -------

    In this example, we produce a basic sequence of monthly average doses for 2020::

        example = ExposureMapSequence()
        example = example.collect_data('monthly',year_selection=[2020],units=["SED"])
        example = example.calculate_maps(statistic='Mean')
        example.save_maps(save=True,show=True)

    In this example, we produce a basic sequence of annual average doses for each year
    of the dataset::

        example = ExposureMapSequence()
        example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
        example = example.calculate_maps(statistic='Mean')
        example.save_maps(save=True,show=True)        

    """

    def __init__(self,
    src_filename_format='UVery.AS_ch02.lonlat_yyyy01010000.nc',
    src_directory='C:/Data/UV/',
    units=None,
    bin_width=None,
    map_options=None,
    ):
        # start with data location to quickly get some metadata
        self.src_filename_format = src_filename_format
        self.src_directory = src_directory

        # first we read the src_directory to check the total number of unique years available
        data_dir_contents = os.listdir(self.src_directory)

        # match filename format to find years
        dataset_years = [ x for x in data_dir_contents 
            if re.findall(self.src_filename_format.replace("yyyy","[1-2][0-9]{3}"),x)]

        char_year = self.src_filename_format.find('yyyy')
        self.dataset_years = [ int(x[char_year:char_year+4]) for x in dataset_years ]

        self.bin_width = bin_width
        self.units = units

        # declare an empty dictionary for map options
        self.map_options={}
        # if any input, update dictionary
        if map_options is not None :
            self.map_options = self.map_options.update(map_options)


[docs]    def interpret_parameters(self) :
        """Interprets some parameter inputs and adjusts for consistency

        This function will check that parameters are correctly entered and do some basic interpretation.
        It checks the exposure_schedule, year_selection, units, and bin_width input. All input is converted
        to lists as required.
        """

        if hasattr(self,'exposure_schedule') and self.exposure_schedule is not None :
            if isinstance(self.exposure_schedule,float) :
                self.exposure_schedule = [np.repeat(self.exposure_schedule,24)]

            elif isinstance(self.exposure_schedule,int) :
                temp = self.exposure_schedule
                self.exposure_schedule = [np.zeros(24)]
                self.exposure_schedule[0][temp] = 1

            elif isinstance(self.exposure_schedule,dict) :
                temp = self.exposure_schedule
                self.exposure_schedule = [np.zeros(24)]
                for x in temp.items() :
                    self.exposure_schedule[0][int(x[0])] = x[1]                

            elif isinstance(self.exposure_schedule,np.ndarray) :
                if len(np.shape(self.exposure_schedule)) == 1 and np.shape(self.exposure_schedule)[0] == 24 :
                    self.exposure_schedule = [self.exposure_schedule]
                elif len(np.shape(self.exposure_schedule)) == 2 and np.shape(self.exposure_schedule)[1] == 24 :
                    # split an array of multiple schedules into a list of single schedule arrays
                    self.exposure_schedule = np.split(self.exposure_schedule,np.shape(self.exposure_schedule)[0])
                else :
                    raise ValueError("Exposure schedule not a comprehensible numpy array, " +
                                    "must be length 24 in first or second dimension")

            elif isinstance(self.exposure_schedule,list) :
                if len(self.exposure_schedule) == 24 and all(isinstance(x,(int,float)) for x in self.exposure_schedule) :
                    self.exposure_schedule = [np.array(self.exposure_schedule)]
                
                for i in range(len(self.exposure_schedule)) :
                    if isinstance(self.exposure_schedule[i],float) :
                        self.exposure_schedule[i] = np.repeat(self.exposure_schedule[i],24)

                    elif isinstance(self.exposure_schedule[i],int) :
                        temp = self.exposure_schedule[i]
                        self.exposure_schedule[i] = np.zeros(24)
                        self.exposure_schedule[i][temp] = 1

                    elif isinstance(self.exposure_schedule[i],dict) :
                        temp = self.exposure_schedule[i]
                        self.exposure_schedule[i] = np.zeros(24)
                        for x in temp.items() :
                            self.exposure_schedule[i][int(x[0])] = x[1]   

                    elif isinstance(self.exposure_schedule[i],np.ndarray) :
                        if not (len(np.shape(self.exposure_schedule[i])) == 1 
                                and np.shape(self.exposure_schedule[i])[0] == 24 ):
                            raise ValueError("Exposure schedule list contains an incomprehensible entry, " + 
                                            "a numpy array that is not length 24")
                    
                    elif isinstance(self.exposure_schedule[i],list) :
                        if len(self.exposure_schedule[i]) == 24 :
                            self.exposure_schedule[i] = np.array(self.exposure_schedule[i])
                        else :
                            raise ValueError("Exposure schedule list contains an incomprehensible entry, " + 
                                            "a list that is not length 24")
                    
                    else :
                        raise TypeError("Exposure schedule list contains an incomprehensible entry")

            else :
                raise TypeError("Exposure schedule must be a list of length-24 numpy arrays or similar")
        ######################################################################################################            
        if hasattr(self,'year_selection') and self.year_selection is not None :
            if isinstance(self.year_selection,int) :
                if self.year_selection==0:
                    self.year_selection = [np.array([x]) for x in self.dataset_years]
                else:
                    self.year_selection = [np.array([self.year_selection])]
            elif isinstance(self.year_selection,np.ndarray) :
                if len(np.shape(self.year_selection)) == 1 :
                    self.year_selection = [self.year_selection]
                else :
                    raise ValueError("Year selection should be a list of numpy arrays, " +
                                    "provided numpy array has incomprehensible shape")
            elif isinstance(self.year_selection,list) :
                if all([isinstance(x,int) for x in self.year_selection]) and all(x!=0 for x in self.year_selection) :
                    self.year_selection = [np.array(self.year_selection)]
                else :
                    i=0
                    for k in range(len(self.year_selection)) :
                        if isinstance(self.year_selection[i],int) :
                            if self.year_selection[i] == 0 :
                                temp = self.year_selection[0:i] + [np.array([x]) for x in self.dataset_years]
                                if i != len(self.year_selection)-1 : 
                                    temp = temp + self.year_selection[i+1:]
                                self.year_selection = temp
                                i = i + len(self.dataset_years) - 1
                            else :
                                self.year_selection[i] = np.array([self.year_selection[i]])
                        elif isinstance(self.year_selection[i],list) :
                            self.year_selection[i] = np.array(self.year_selection[i])
                        elif not isinstance(self.year_selection[i],np.ndarray) :
                            raise TypeError("Year selection list must contain ints, lists, or numpy arrays")
                        i=i+1
            else :
                raise TypeError("Year selection must be an int, numpy array, or list of numpy arrays")

            for i in range(len(self.year_selection)) :
                if all(self.year_selection[i] == 0) :
                    self.year_selection[i] = np.array(self.dataset_years)
        #####################################################################################################
        if hasattr(self,'units') and self.units is not None :
            if isinstance(self.units,str) :
                self.units = [self.units]
            elif isinstance(self.units,list) :
                if not all(isinstance(x,str) for x in self.units) :
                    raise TypeError("Units input must be a list of strings")
            else :
                raise TypeError("Units input must be a list of strings")

            for i in range(len(self.units)) :
                if not isinstance(self.units[i],str) :
                    raise TypeError("Units input must be a list of strings")
                if self.units[i] not in ["SED","UVIh","UVI","J m-2","W m-2","mW m-2"] :
                    raise ValueError("Units input must be list of accepted unit strings, " +
                                     "those being SED, UVIh, J m-2, UVI, W m-2, or mW m-2")


            if hasattr(self,'bin_width') :
                if self.bin_width is None :
                    self.bin_width = []
                    for unit in self.units :
                        self.bin_width.append({
                            "SED" : 0.1, 
                            "J m-2" : 10, 
                            "UVI" : 0.1, 
                            "W m-2" : 0.0025, 
                            "mW m-2" : 2.5
                            }[unit])
                elif isinstance(self.bin_width,(int,float)) :
                    self.bin_width = [self.bin_width]


        return self
        



[docs]    def collect_data(self,
    day_selection,
    exposure_schedule=[1.0],
    year_selection=[0],
    units=["SED"],
    bin_width=None):
        """Loads data into multiple pixel histograms

        This function loads all of the necessary data and compiles it into one or
        multiple histograms. All parameters are designed to be interpreted as lists
        of arrays or lists of strings, where each list entry corresponding to a 
        different histogram in the sequence. So to create a sequence of maps 
        corresponding to the months of the year, the day_selection input would be a
        list of 12 arrays, the first containing numbers from 1 to 31, the second
        containing numbers from 32 to 59, and so on.

        The user specifies the day_selection and the year_selection as two separate 
        numerical inputs, rather than specifying dates. This make the interpretation
        of the sequence simpler. However, to simplify the user experience, the 
        day_selection input can include keywords to be automatically interpreted as
        days of the year.


        Parameters
        ----------
        day_selection : list, str, array
            A list of arrays and/or strings. Keywords interpretable in such a list
            include the (english) names of the 12 months (at least the first three
            letters), the names of the four seasons (fall or autumn is accepted),
            or the words "year" or "annual" to indicate the full year. These 
            keywords are replaced by the corresponding array of days in the year.
            Note that the 29th of February is removed from consideration should it 
            arise. Note also that the seasons are the meteorological seasons, i.e.
            the three month blocks JJA, SON, DJF, and MAM for summer, autumn,
            winter, and spring respectively. 
            
            The user can alternatively enter a special string instead of a list. 
            The string "monthly" generates a list of 12 arrays according to the 
            months whereas the string "seasons" generates a list of 4 arrays 
            according to the four seasons. 

        exposure_schedule : list, float, int, dict, array
            If the user enters a float, this float value will be repeated across a
            length-24 array to make the exposure schedule. For example, entering
            1.0 (not 1) will generate an array of 24 ones.

            If the user enters an int, i, a length-24 array of zeroes will be 
            generated with the ith entry being set to 1. For example, entering 1 
            (not 1.0) will generate an array that reads [0,1,0,0...] (length 24).

            If the user enters a dict, they can specify the values of a few 
            particular entries in a length-24 array where unspecified entries have
            a value of zero. For example, entering {0:0.5, 2:0.8, 3:1} will 
            generate and array the reads [0.5, 0, 0.8, 1, 0...] (length 24).

            If the user enters an array, it must be 1 dimensional with length 24
            or 2 dimensional with the second dimension having length 24 (allowing
            the user to specify multiple schedules).

            If the user enters a list, each entry of that list is interpreted using
            the rules listed above, with the caveat that arrays within a list cannot
            be 2 dimensional.
            

        year_selection : list, array, int
            The years across which the data should be pulled. Input should be a list
            of arrays of ints corresponding to years available in the dataset. Each
            list entry corresponds to a pixel histogram. The user can enter 0 as a
            shortcut for using all available years. For example, an input might be
            [numpy.arange(2010,2020),[0],0]. The first list entry is an array of a
            decade of years, straightforward enough. The second list entry is [0]. 
            This is equivalent to writing numpy.arange(2004,2021) i.e. it produces 
            an array of the available years of the dataset. The last entry is 0,
            this produces a sequence of individual years. So the input could be
            equivalently written as [numpy.arange(2010,2020),numpy.arange(2004,2021),
            numpy.array([2004]),numpy.array([2005]),numpy.array([2006])...] and so
            on until 2020.

        units : list, optional
            The list of units for each pixel histogram. Acceptable strings are "SED",
            "J m-2", "UVIh", "UVI", "W m-2", and "mW m-2". Defaults to SED.

        bin_width : list, optional
            The bin width for each histogram. By default, these values are defined
            automatically according to the units input. 


        Returns
        -------
        ExposureMapSequence
            The object has the hist_specs and hists fields added detailing the pixel
            histograms.


        Example
        -------

        In this example, we produce a basic sequence of monthly average doses for 2020::

            example = ExposureMapSequence()
            example = example.collect_data('monthly',year_selection=[2020],units=["SED"])
            example = example.calculate_maps(statistic='Mean')
            example.save_maps(save=True,show=True)

        In this example, we produce a basic sequence of annual average doses for each year
        of the dataset::

            example = ExposureMapSequence()
            example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
            example = example.calculate_maps(statistic='Mean')
            example.save_maps(save=True,show=True)   

        """

        # this subroutine handles keyword inputs (monthly, seasonal, etc)
        self.day_selection, self.day_input_flt, self.day_nonstring = str2daysofyear(day_selection)

        self.exposure_schedule = exposure_schedule

        self.year_selection = year_selection

        if units is not None :
            self.units = units
        
        if bin_width is not None :
            self.bin_width = bin_width

        self = self.interpret_parameters()

        ############################################################################

        lengths = {'day_selection' : len(self.day_selection),
                   'exposure_schedule' : len(self.exposure_schedule),
                   'year_selection' : len(self.year_selection),
                   'units' : len(self.units),
                   'bin_width' : len(self.bin_width)}

        self.num_hists = max(lengths.items(), key=lambda x: x[1])[1]
        assert all(x == self.num_hists or x == 1 for x in lengths.values()), (
            "Inputs must be lists of length 1 or num_hists")
        
        self.iterators = [x[0] for x in lengths.items() if x[1]==self.num_hists]


        self.hist_specs = []

        for i in range(self.num_hists) :
            hist_spec = {
                'day_selection' : self.day_selection[0],
                'exposure_schedule' : self.exposure_schedule[0],
                'year_selection' : self.year_selection[0],
                'units' : self.units[0],
                'bin_width' : self.bin_width[0]}
            for x in self.iterators :
                hist_spec[x] = self.__dict__[x][i]
            self.hist_specs = self.hist_specs + [hist_spec]
            
            
        # find unique years to be loaded (probably all years but have to check)
        unique_years = set(self.year_selection[0])
        if len(self.year_selection) > 1 :
            for i in range(1,len(self.year_selection)) :
                unique_years.update(self.year_selection[i])
        unique_years = sorted(unique_years)

        # declare empty hists
        self.hists = [None for x in range(self.num_hists)]

        for i in range(len(unique_years)) :
            year = unique_years[i]
            print("Processing year "+str(year)) #should use logging, don't yet know how
            dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year))) 
            dataset.set_auto_mask(False) #to get normal arrays (faster than default masked arrays)

            if i == 0 :
                # TODO: this should also be done by some initial dataset analysis, but that's a drastic
                # design overhaul
                self.lat = dataset['lat'][:]
                self.lon = dataset['lon'][:]

            # now to determine the unique days for the specific year
            unique_days = set()
            for j in range(self.num_hists) :
                if year in self.hist_specs[j]['year_selection'] :
                    unique_days.update(self.hist_specs[j]['day_selection'])
            unique_days = sorted(unique_days)

            # TODO: when metadata fixed, update this to actually interpret dates (cftime)
            # reformat to index for netCDF
            nc_day_sel = [False for i in range(365*24)] 
            # reshape false array to have first dimension 24 (hours in day)
            nc_day_sel = assert_data_shape_24(nc_day_sel) 
            # set the appropriate days as true
            nc_day_sel[:,np.array(unique_days)-1] = True 
            # correct for leap years (skip feb 29)
            if year % 4 == 0 :
                nc_day_sel = np.concatenate(
                    (nc_day_sel[:,0:59],np.full((24,1),False),nc_day_sel[:,59:]),axis=1)
            # flatten time_subset array back to one dimension
            nc_day_sel = nc_day_sel.flatten(order='F')

            #load data
            data_year = assert_data_shape_24(dataset['UV_AS'][nc_day_sel,:,:])

            #sort data into histograms
            for j in range(self.num_hists) :
                if year in self.hist_specs[j]['year_selection'] :
                    sub_day_sel = [ True if x in self.hist_specs[j]['day_selection'] 
                        else False for x in unique_days ]
                    temp_data = data_year[:,sub_day_sel,:,:]

                    # Apply the exposure schedule, differently for doses vs intensity
                    if self.hist_specs[j]['units'] in ["SED","J m-2","UVIh"] :
                        # if calculating doses
                        print('   Calculating doses')
                        temp_data = np.sum(np.reshape(
                            self.hist_specs[j]['exposure_schedule'],[24,1,1,1]) * temp_data,axis=0)
                    # more complex when doing intensity
                    else :
                        # assume elsewise calculating intensity (i.e. UV-index) then limit data selection
                        # to schedule (remembering that default schedule is just ones)
                        print('   Slicing data with exposure schedule')
                        # select only those hours with nonzero entry in exposure schedule
                        temp_data = temp_data[self.hist_specs[j]['exposure_schedule'] != 0,:,:,:]
                        # select nonzero values from exposure schedule
                        exposure_schedule_nonzero = self.hist_specs[j]['exposure_schedule'][
                            self.hist_specs[j]['exposure_schedule'] != 0]
                        # if any nonzero entries aren't 1, multiply data accordingly
                        if (exposure_schedule_nonzero != 1).any() :
                            temp_data *= np.reshape(exposure_schedule_nonzero,[len(exposure_schedule_nonzero),1,1,1])
                        # recombine first two dimensions (hour and day) back into time ready for histogram
                        temp_data = assert_data_shape_24(temp_data,reverse=True) 

                    # now multiply data by conversion factor according to desired untis
                    # TODO: Should expand upon this in reference files
                    temp_data *= {"SED":0.9, "J m-2":90, "UVIh":1, "UVI":1, "W m-2":0.025, "mW m-2":25}[self.hist_specs[j]['units']]

                    # if this is the first iteration, declare a hist
                    if 'num_bins' not in self.hist_specs[j] :
                        # seems like useful metadata to know bin n and edges
                        self.hist_specs[j]['num_bins'] = int(np.nanmax(temp_data) // self.hist_specs[j]['bin_width'] ) + 2
                        self.hist_specs[j]['bin_edges'] = (np.array(range(self.hist_specs[j]['num_bins']+1))
                            - 0.5) * self.hist_specs[j]['bin_width'] 
                        # this form allows for weird custom bin edges, but probably will never use that
                        self.hist_specs[j]['bin_centers'] = (self.hist_specs[j]['bin_edges'][:-1] 
                            + 0.5 * np.diff(self.hist_specs[j]['bin_edges']))

                        # TODO: think about possible cases where dimensions could differ
                        self.hists[j]=np.zeros([self.hist_specs[j]['num_bins'],
                            np.shape(temp_data)[-2],np.shape(temp_data)[-1]], dtype=np.int16)

                    else :
                        new_num_bins = int(np.nanmax(temp_data) // self.hist_specs[j]['bin_width']) + 2 - self.hist_specs[j]['num_bins']
                        # check if new data requires extra bins in pix_hist
                        if new_num_bins > 0 :
                            # append zeros to pix hist to make room for larger values
                            self.hists[j] = np.concatenate((self.hists[j],np.zeros(
                                [new_num_bins,np.shape(self.hists[j])[-2],np.shape(self.hists[j])[-1]],
                                dtype=np.int16)),axis=0)
                            # update bin information
                            self.hist_specs[j]['num_bins'] = self.hist_specs[j]['num_bins'] + new_num_bins
                            self.hist_specs[j]['bin_edges'] = (np.array(range(self.hist_specs[j]['num_bins']+1))
                                - 0.5) * self.hist_specs[j]['bin_width'] 
                            self.hist_specs[j]['bin_centers'] = (self.hist_specs[j]['bin_edges'][:-1] 
                                + 0.5 * np.diff(self.hist_specs[j]['bin_edges']))

                    # TODO: Add check in case bins get "full" (i.e. approach int16 max value)
                    # now put data into hist using apply_along_axis to perform histogram for each pixel
                    print("   Calculating and adding to pixel histograms")
                    self.hists[j][:,:,:] += np.apply_along_axis(lambda x: 
                        np.histogram(x,bins=self.hist_specs[j]['bin_edges'])[0],0,temp_data)

        return self                    






[docs]    def calculate_maps(self,statistic=None,titles=None,filenames="auto") :
        """Calcualte the maps from the pixel histograms 

        This function calculates maps from the pixel histograms and generates
        titles and filenames for each map. Note that the number of maps can
        be greater than the number of pixel histograms if more than one 
        statistic is specified.


        Parameters
        ----------
        statistic : list, str
            The statistical descriptor to be calculated from the pixel histograms to be later 
            represented on the rendered map. Must contain at least one of these keywords:
            "mean", "median" or "med", "sd" or "std" or "stdev", "max" or "maximum", "min" or 
            "minimum". The keyword "prct" or "percentile" is also accepted so long as it is
            preceded by a two-digit integer specifying the desired percentile from 01 to 99.

            *Planned:* the string can be a formula using any of the keywords above, and 
            basic mathematical operators (+, -, *, /, **) and numeric factors.            

        titles : list, optional
            If the user does not wish to use the automatically generated map titles,
            they can enter them with this parameter. This must be a list of strings
            with a length equal to the number of maps produced.

        filenames : str, optional
            Filenames are generated to match the titles by default, but the user can
            alternatively enter them manually with this parameter.


        Returns
        -------
        ExposureMapSequence
            The object is appended with maps, map_specs, and num_maps fields.


        Example
        -------

        In this example, we produce a basic sequence of annual average doses for each year
        of the dataset::

            example = ExposureMapSequence()
            example = example.collect_data(['annual'],year_selection=[0],units=["SED"])
            example = example.calculate_maps(statistic='Mean')
            example.save_maps(save=True,show=True)   

        """

        if statistic is not None :
            self.statistic = statistic

        if isinstance(self.statistic,str) :
            self.statistic = [self.statistic]
        
        # declare array of nans to fill with maps
        self.maps = np.full([self.num_hists * len(self.statistic)] + 
            list(np.shape(self.hists[0])[1:]),np.nan)

        if titles is not None :
            self.titles = titles
        else :
            self.titles = [str(x) for x in range(self.num_hists * len(self.statistic))]

        if isinstance(filenames,str) and filenames == "auto" :
            self.filenames = [str(x) for x in range(self.num_hists * len(self.statistic))]
        else :
            self.filenames = filenames


        mapnum = 0
        hist_inds = []
        stat_inds = []
        for i in range(len(self.statistic)) :
            for j in range(self.num_hists) :
                
                self.maps[mapnum,:,:] = calculate_map_from_hists(
                    self.hists[j],self.statistic[i],self.hist_specs[j]['bin_centers'])

                if titles is None :
                    if filenames == "auto" :
                        self.titles[mapnum], self.filenames[mapnum] = gen_map_title(**{
                            **self.hist_specs[j],
                            'statistic':self.statistic[i]},filename=True)
                    else :
                        self.titles[mapnum] = gen_map_title(**{
                            **self.hist_specs[j],
                            'statistic':self.statistic[i]},filename=False)

                hist_inds = hist_inds + [j]
                stat_inds = stat_inds + [i]

                mapnum += 1

        self.num_maps = mapnum

        self.map_specs = {'hist' : hist_inds, 'statistic' : stat_inds}

        return self




[docs]    def save_maps(self,map_options=None,save=None,show=True,match_cmap_limits=True,schedule_diagram=True) :
        """Renders and saves the pre-calculated maps stored in the object

        With the maps calculated, this function renders the maps with broad flexibility on aesthetic 
        options. It is mostly a wrapper for the render_map function.


        Parameters
        ----------
        map_options : dict, optional
            A dictionary containing options for the render map function.

        save : bool, optional
            Although technically contained within map_options, this option is here so users can
            more easily say whether they want the images to be saved or not.

        show : bool, optional
            An option to show the maps in a python figure window or not.

        match_cmap_limits : bool, optional
            When producing multiple maps, it can sometimes be desirable for the colormap limits
            to be consistent across the set of images. This boolean enables that.

        schedule_diagram : bool, optional
            If true, a circular diagram is rendered on the map illustrating the schedule that
            generated the map.

        """

        if map_options is not None :
            self.map_options.update(map_options)

        if save is not None and isinstance(save,bool) :
            self.map_options['save'] = save

        if match_cmap_limits :
            self.map_options['cmap_limits'] = [np.nanmin(self.maps),np.nanmax(self.maps)]
            if self.map_options['cmap_limits'][0] < 0.1 * self.map_options['cmap_limits'][1] :
                self.map_options['cmap_limits'][0] = 0

        for i in range(self.num_maps) :
            opts = self.map_options
            opts['title'] = self.titles[i]
            if self.filenames is not None :
                opts['img_filename'] = self.filenames[i]
            if schedule_diagram :
                opts['schedule'] = self.hist_specs[self.map_specs['hist'][i]]['exposure_schedule']
            render_map(
                self.maps[i,:,:],
                lat=self.lat,
                lon=self.lon,
                cbar_label=self.hist_specs[self.map_specs['hist'][i]]['units'],
                show=show,
                **opts)







[docs]def render_map(map,
lat=None,
lon=None,
title=None,
save=True,
show=True,
schedule=None,
schedule_bbox=(-0.03,0,1,0.91),
img_filename=None,
img_dir="",
img_size=[20,15],
img_dpi=300,
img_filetype="png",
brdr_nation=True,
brdr_nation_rgba=[0,0,0,1],
brdr_state=True,
brdr_state_rgba=[0,0,0,0.75],
cmap="gist_ncar",
cmap_limits=None,
cbar=True,
cbar_limits=None,
cbar_label=None,
country_focus="CHE",
gridlines=True,
gridlines_dms=False,
mch_logo=True) :
    """Renders and saves maps

    Renders and saves maps with a wide variety of aesthetic options.

    Parameters
    ----------
    map : array
        The map to be rendered
    lat : [type], optional
        [description], by default None
    lon : [type], optional
        [description], by default None
    title : [type], optional
        [description], by default None
    save : bool, optional
        [description], by default True
    show : bool, optional
        [description], by default True
    schedule : [type], optional
        [description], by default None
    schedule_bbox : tuple, optional
        [description], by default (-0.03,0,1,0.91)
    img_filename : [type], optional
        [description], by default None
    img_dir : str, optional
        [description], by default ""
    img_size : list, optional
        [description], by default [20,15]
    img_dpi : int, optional
        [description], by default 300
    img_filetype : str, optional
        [description], by default "png"
    brdr_nation : bool, optional
        [description], by default True
    brdr_nation_rgba : list, optional
        [description], by default [0,0,0,1]
    brdr_state : bool, optional
        [description], by default True
    brdr_state_rgba : list, optional
        [description], by default [0,0,0,0.75]
    cmap : str, optional
        [description], by default "gist_ncar"
    cmap_limits : [type], optional
        [description], by default None
    cbar : bool, optional
        [description], by default True
    cbar_limits : [type], optional
        [description], by default None
    cbar_label : [type], optional
        [description], by default None
    country_focus : str, optional
        [description], by default "CHE"
    gridlines : bool, optional
        [description], by default True
    gridlines_dms : bool, optional
        [description], by default False
    mch_logo : bool, optional
        [description], by default True
    """

    # TODO: Add custom sizing and resolution specifications
    fig = plt.figure(figsize=(img_size[0]/2.54,img_size[1]/2.54))

    # TODO: Accept custom projections
    # proj = ccrs.Mercator()
    proj = ccrs.Orthographic(central_longitude=(lon[0]+lon[-1])/2, central_latitude=(lat[0]+lat[-1])/2)

    # TODO: Add support for multiple plots per figure (too complex? consider use cases)
    ax = fig.add_subplot(1,1,1,projection = proj)

    # TODO: Increase flexibility of borders consideration
    if brdr_state :
        state_brdrs = cfeat.NaturalEarthFeature(
            category='cultural',
            name='admin_1_states_provinces_lines',
            scale='10m',
            facecolor='none')
        ax.add_feature(state_brdrs,linestyle="--",edgecolor=tuple(brdr_state_rgba),linewidth=0.5)
    if brdr_nation :
        ax.add_feature(cfeat.BORDERS,edgecolor=tuple(brdr_nation_rgba))

    if country_focus is not None :
        shpfilename = shapereader.natural_earth(resolution='10m',
            category='cultural',name='admin_0_countries')
        reader = shapereader.Reader(shpfilename)
        countries = reader.records()    
        # this is a very janky search for Switzerland, but it's ultimately simpler than
        # making geopandas a requirement for the library
        for country in countries :
            if country.attributes['ADM0_A3'] == country_focus :
                break
        assert country.attributes['ADM0_A3'] == country_focus, "country_focus input not recognised"
        poly = country.geometry

        msk_proj  = proj.project_geometry (poly, ccrs.Geodetic())  # project geometry to the projection used by stamen

        # plot the mask using semi-transparency (alpha=0.65) on the masked-out portion
        ax.add_geometries( msk_proj, proj, facecolor='white', edgecolor='none', alpha=0.8)

    # TODO: Consider first-last versus min-max - how can we avoid accidentally flipping images
    extents=[lon[0],lon[-1],lat[0],lat[-1]]
    ax.set_extent(extents,crs=ccrs.Geodetic())

    # this code correctly translate the lat/lon limits into the projected coordinates
    extents_proj = proj.transform_points(ccrs.Geodetic(),np.array(extents[:2]),np.array(extents[2:]))
    extents_proj = extents_proj[:,:2].flatten(order='F')

    if gridlines :
        ax.gridlines(draw_labels=True, dms=gridlines_dms, x_inline=False, y_inline=False,linewidth=0.25,
        ylocs=[46,46.5,47,47.5])

    # TODO: Custom colormaps, interpolation, cropping

    # Upscale matrix for better reprojection
    # f = interp2d(lon, lat, map, kind='linear')
    # latnew = np.linspace(lat[0], lat[-1], (len(lat)-1)*3+1)
    # lonnew = np.linspace(lon[0], lon[-1], (len(lon)-1)*3+1)
    # mapnew = f(lonnew, latnew)

    # Upscale matrix for better reprojection
    mapnew = zoom(map,3)

    # show map with given cmap and set cmap limits
    im = ax.imshow(mapnew,extent=extents,transform=ccrs.PlateCarree(),
        origin='lower',cmap=cmap)
    if cmap_limits is not None :
        im.set_clim(cmap_limits[0],cmap_limits[1])

    # colorbar
    # TODO: Add support for horizontal vertical option
    if cbar :
        cb = plt.colorbar(im, ax=ax, orientation='horizontal',pad=0.05,fraction=0.05)
        cb.ax.set_xlabel(cbar_label)

    # show schedule diagram
    if schedule is not None :
        ax2 = inset_axes(ax, width="25%", height="25%", loc=2,
            axes_class = get_projection_class('polar'),
            bbox_to_anchor=tuple(schedule_bbox),
            bbox_transform=ax.transAxes)
        schedule_clock(ax2,schedule,title="Exposure schedule")

    # TODO: Add more advanced title interpretation (i.e. smart date placeholder)
    if title is not None :
        ax.set_title(title)

    if mch_logo :
        ex = ax.get_extent()
        mch_logo_img = plt.imread('python_tamer/mch_logo.png')
        mch_logo_width = 0.15
        mch_logo_pad = 0
        # some maths to work out position, note image aspect ratio 5:1
        mch_extents = [ex[1]-(ex[1]-ex[0])*mch_logo_width-(ex[1]-ex[0])*mch_logo_pad,
            ex[1]-(ex[1]-ex[0])*mch_logo_pad,
            ex[2]+(ex[3]-ex[2])*mch_logo_pad,
            ex[2]+0.2*(ex[1]-ex[0])*mch_logo_width+(ex[3]-ex[2])*mch_logo_pad]
        # zorder puts image on top (behind mask otherwise for some reason)
        ax.imshow(mch_logo_img,extent=mch_extents,zorder=12)

    # TODO: Add plot title, small textbox description, copyright from dataset, ticks and gridlines
    if save :
        # Generate timestamp filename if relying on default
        if img_filename is None :
            if title is not None :
                img_filename = format_filename(title)
            else :
                img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
        elif img_filename == "timestamp" :
            img_filename=dt.datetime.now().strftime('%Y%m%d_%H%M%S_%f')

        plt.savefig(img_dir+img_filename+"."+img_filetype,
            bbox_inches="tight",dpi=img_dpi)

    if show :
        plt.show()


[docs]def schedule_clock(axes,schedule,title=None,title_size=9,center=0.25,rmax=1) :
    """Generates a clock-style representation of an exposure schedule

    [extended_summary]

    Parameters
    ----------
    axes : matplotlib.axes.Axes
        The polar axes upon which the clock will be plotted
    schedule : list or numpy.ndarray
        The exposure schedule - a length-24 vector of hourly exposure proportions
    title : str, optional
        Title of the exposure clock
    title_size : int, optional
        [description], by default 9
    center : float, optional
        [description], by default 0.25
    rmax : int, optional
        [description], by default 1

    Returns
    -------
    [type]
        [description]
    """
    axes.bar(
        np.arange(24)/24*2*np.pi, 
        schedule,
        width=2*np.pi/24,
        align='edge',
        bottom=center) 
    axes.bar(0,0.25,width=2*np.pi,color='k')      
    # Set the circumference labels
    axes.set_xticks(np.linspace(0, 2*np.pi, 8, endpoint=False))
    axes.set_xticklabels(np.linspace(0, 24, 8, endpoint=False,dtype=int),fontsize=8) 
    axes.tick_params(axis='both',which='major', pad=-3)
    axes.set_yticks([0.5+center])
    axes.set_yticklabels(['0.5'],fontsize=5)
    axes.grid(True,color='black',linewidth=0.25)
    # Make the labels go clockwise
    axes.set_theta_direction(-1)       
    # Place 0 at the top
    axes.set_theta_offset(np.pi/2)   
    # format grid and fake ticks
    for t  in np.linspace(0, 2*np.pi, 24, endpoint=False):
        axes.plot([t,t], np.array([0.95,1.1])*rmax+center, lw=0.5, color="k")
    for t  in np.linspace(0, 2*np.pi, 8, endpoint=False):
        axes.plot([t,t], np.array([0.9,1.1])*rmax+center, color="k")   
    if title is not None :
        axes.set_title(title,fontsize=title_size) 
    axes.set_rmax(rmax+center)

    return axes


[docs]def gen_map_title(
statistic = None,
exposure_schedule=None,
hour=None,
units=None,
year_selection=None,
day_selection=None,
filename=False,
**kwargs) :

    if units in ['SED','J m-2','UVIh'] :
        if all(exposure_schedule == np.ones(24)) :
            title = 'UV daily doses'
        else :
            title = 'UV doses'
    elif units in ['W m-2','UVI','mW m-2'] :
        if np.sum(exposure_schedule)==1 and all(x in [0,1] for x in exposure_schedule) :
            #user chosen just one hour
            hour=np.where(exposure_schedule)[0][0]
            title = 'UV intensity between ' + str(hour) + 'h-' + str(hour+1) + 'h'
        else :
            title = 'UV intensity'
    else :
        raise ValueError('Units must be SED, J m-2, UVIh, UVI, W m-2, or mW m-2')
    
    title = statistic + ' of ' + title

    ayear = pd.date_range(start="2010-01-01",end="2010-12-31")
    ds = {'year' : ayear.dayofyear.values.tolist()}
    ds['winter (DJF)'] = [x for i in [12,1,2] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
    ds['spring (MAM)'] = [x for i in [3,4,5] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
    ds['summer (JJA)'] = [x for i in [6,7,8] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
    ds['autumn (SON)'] = [x for i in [9,10,11] for x in ayear[ayear.month == i].dayofyear.values.tolist()]
    ds['January'] = ayear[ayear.month == 1].dayofyear.values.tolist()
    ds["February"] = ayear[ayear.month == 2].dayofyear.values.tolist()
    ds["March"] = ayear[ayear.month == 3].dayofyear.values.tolist()
    ds["April"] = ayear[ayear.month == 4].dayofyear.values.tolist()
    ds["May"] = ayear[ayear.month == 5].dayofyear.values.tolist()
    ds["June"] = ayear[ayear.month == 6].dayofyear.values.tolist()
    ds["July"] = ayear[ayear.month == 7].dayofyear.values.tolist()
    ds["August"] = ayear[ayear.month == 8].dayofyear.values.tolist()
    ds["September"] = ayear[ayear.month == 9].dayofyear.values.tolist()
    ds["October"] = ayear[ayear.month == 10].dayofyear.values.tolist()
    ds["November"] = ayear[ayear.month == 11].dayofyear.values.tolist()
    ds["December"] = ayear[ayear.month == 12].dayofyear.values.tolist()

    day_str = None

    for item in ds.items() :
        if set(day_selection) == set(item[1]) :
            day_str = item[0]
            break

    
    if day_str == 'year' :
        if len(year_selection) == 1 :
            title = title + ' for the year of ' + str(year_selection[0])
        elif all(np.diff(year_selection)==1) :
            title = title + ' for the years ' + str(np.min(year_selection)) + '-' + str(np.max(year_selection))
        else :
            title = title + ' for the years: ' + np.array2string(year_selection,separator=', ')

    elif day_str is not None :
        title = title + ' for ' + day_str 
        if len(year_selection) == 1 :
            title = title + ' ' + str(year_selection[0])
        elif all(np.diff(year_selection)==1) :
            title = title + ', ' + str(np.min(year_selection)) + '-' + str(np.max(year_selection))
        else :
            title = title + ' ' + np.array2string(year_selection,separator=', ')

    else :
        # TODO: potentially make this workable with "custom day selection" placeholder in title
        raise ValueError("Day selection not recognised, auto-title cannot proceed")

    if filename :
        custom = False
        filename = "UV." + units + '.' + statistic + '.'
        if len(year_selection) == 1 :
            filename = filename + str(year_selection[0]) + '.'
        elif all(np.diff(year_selection)==1) :
            filename = filename + str(np.min(year_selection)) + '-' + str(np.max(year_selection)) + '.'
        else :
            filename = filename + str(year_selection[0]) + '-custom' + '.'
            custom = True
        day_str_filenamer = {
            "January"   : "01",
            "February"  : "02",
            "March"     : "03",
            "April"     : "04",
            "May"       : "05",
            "June"      : "06",
            "July"      : "07",
            "August"    : "08",
            "September" : "09",
            "October"   : "10",
            "November"  : "11",
            "December"  : "12",
            "winter (DJF)"  : "s-",
            "spring (MAM)"  : "s-",
            "summer (JJA)"  : "s-",
            "autumn (SON)"  : "s-",
            "year"      : "year"}
        filename = filename + day_str_filenamer[day_str] 
        if hour is not None :
            filename = filename + '.' + str(hour) + 'h'
        if custom :
            filename = filename + '.created_' + dt.datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = format_filename(filename)
        return title, filename
    else :
        return title
    





[docs]def calculate_map_from_hists(pix_hist,statistic,bin_centers) :

    # Begin by defining the easy options that only require two inputs
    basic_descriptor_functions = {
        "mean": hist_mean,
        "median": lambda x,y: hist_percentile(x,y,0.5),
        "med": lambda x,y: hist_percentile(x,y,0.5),
        "sd": hist_stdev,
        "std": hist_stdev,
        "stdev": hist_stdev,
        "max": hist_max,
        "maximum": hist_max,
        "min": hist_min,
        "minimum":hist_min
    }
    # we can check if the chosen statistic is basic or advanced
    if statistic.lower() in basic_descriptor_functions.keys() :
        # in this case, we can simply select the basic function from the dict...
        descriptor_function = basic_descriptor_functions[statistic.lower()]
        # ...and execute it across the map
        map = np.apply_along_axis(lambda x: descriptor_function(x,bin_centers),0,pix_hist)
    # TODO: a loose space could ruin this, need shunting yard algorithm of sorts
    elif statistic.lower()[3:] == "prct" or statistic.lower()[3:] == "percentile" :
        prct = int(statistic[0:2]) / 100
        map = np.apply_along_axis(lambda x: hist_percentile(x,bin_centers,prct),0,pix_hist)
    else :
        # TODO: interpret self.statistic to build advanced functions (y i k e s)
        raise ValueError("Statistic string not recognised")
    

    return map