Source code for python_tamer.SpecificDoses

import pandas as pd
import numpy as np
import netCDF4 as nc
from .subroutines import *


[docs]class SpecificDoses(pd.DataFrame): """A class for specific dose estimates akin to dosimetry measurements High resolution data allows for personal and ambient dose estimation without the need for direct measurement. This class is structured like a table with a set of functions to add columns ultimately leading to dose estimates. Each row of this table represents a specific exposure instance, i.e. an individual at a specific location for a specific date and time with a specific exposure ratio. See Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) for more information on calculations appropriate for this class. Parameters ---------- src_filename_format : str Describes the filename of the netCDF files containing the UV data with 'yyyy' in place of the year. src_directory : str The directory where the data is stored. Must end with a slash. Notes ----- Presently, the class is inherited from a pandas.DataFrame which is somewhat restrictive and will likely be revised in a later update. For the time being, this means that the parameters cannot be set when initialising a `SpecificDoses` object, they must instead be adjusted after initialisation, like so:: ExistingExposureMapObject.src_directory = "/new/data/directory/" Example ------- In this example, we illustrate the process for calculating the doses in Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the spreadsheet contains only local Swiss time and not UTC time. There are four important functions as part of this class, three for standardising and preparing the columns, and one for actually loading the data and performing the dose calculations. See below:: import python_tamer as pt import pandas as pd example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx', header=2,index_col=0,usecols="B:K")) example.src_directory = 'C:/enter_the_directory_of_your_dataset_here' example = example.standard_column_names() example = example.schedule_constant_exposure().ER_from_posture() example = example.calculate_specific_dose() """ # This property ensures that functions return the same subclass @property def _constructor(self): return SpecificDoses # This adds some useful metadata (self-explanatory) _metadata = ["src_filename_format","src_directory"] src_filename_format = 'UVery.AS_ch02.lonlat_yyyy01010000.nc' src_directory = 'C:/Data/UV/' # TODO: set up __init__ for these options # It feels like this should be declared with __init__ as well but idk
[docs] def standard_column_names(self) : """Limited function to standardise column names When loading tables to use as the basis for a SpecificDoses table, some columns may have slightly names to what is expected. This function standardises the names but is very limited in terms of what it can recognise. The user is encourages to ensure the columns are correctly labelled themselves and not to rely on this function. Returns ------- SpecificDoses The table has its column names modified. """ legend_dict_reverse = {'Point' : ['Lieu de mesure'], 'Date' : ['Date'], 'Time_start' : ['Heure début','Start_time','Start time'], 'Time_end' : ['Heure fin','End_time','End time'], 'Measured_dose' : ['Exposition [MED]','Exposure'], 'Anatomic_zone' : ['Zone anatomique','Body part','Anatomic zone'], 'Posture' : ['Body posture'], 'Latitude' : ['lat'], 'Longitude' : ['lon','lng']} legend_dict = {keys: old_keys for old_keys, old_values in legend_dict_reverse.items() for keys in old_values} self = self.rename(columns=legend_dict) return self
[docs] def schedule_constant_exposure(self) : """Generates exposure schedules given start and end times. This function generates exposure schedules based on simple continuous exposure, i.e. with a start time and an end time. The exposure schedule is a vector with length 24 with each entry representing the proportion of the corresponding hour of the day that the subject is exposed. Returns ------- python_tamer.SpecificDoses An exposure_schedule column is created and is appended to the input `SpecificDoses` object or, if that column already exists, it is overwritten. Notes ----- The input `SpecificDoses` object must contain the following columns: * ``Time_start`` * ``Time_end`` Example ------- In this example, we illustrate the process for calculating the doses in Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the spreadsheet contains only local Swiss time and not UTC time. There are four important functions as part of this class, three for standardising and preparing the columns, and one for actually loading the data and performing the dose calculations. See below:: import python_tamer as pt import pandas as pd example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx', header=2,index_col=0,usecols="B:K")) example.src_directory = 'C:/enter_the_directory_of_your_dataset_here' example = example.standard_column_names() example = example.schedule_constant_exposure().ER_from_posture() example = example.calculate_specific_dose() """ def schedule_constant_exposure_iter(Start_time,End_time) : """Iterates through rows of a SpecificDoses table to generate schedules. This function is designed to be applied to each row in a datatable to generate an exposure schedule based on a start time and end time Parameters ---------- Start_time : datetime.time UTC time at which exposure period begins End_time : datetime.time UTC time at which exposure period end Returns ------- numpy.array 24 length vector of values between 0 and 1 indicating proportion of time exposed for that corresponding hour of the day. """ schedule = np.zeros(24) schedule[Start_time.hour:End_time.hour] = 1 # Modify start and end hours according to proportion of time exposed if Start_time.minute != 0 : schedule[Start_time.hour] = (1 - Start_time.minute/60) if End_time.minute != 0 : schedule[End_time.hour] = End_time.minute/60 return schedule # With that function defined, we need just one line to apply it to the whole table self["Schedule"] = self.apply(lambda x: schedule_constant_exposure_iter( x["Time_start"],x["Time_end"]),axis='columns') return self
[docs] def ER_from_posture(self, Vis_table_path=None, Vis_table=None) : """ER_from_posture calculates Exposure Ratios for a given anatomic zone, posture, and date. This function calculates ER as a percentage between 0 and 100 based on information from an input table. The input table must contain certain columns at a minimum. Those are: Date, Anatomic_zone, and Posture. This function contains hard-coded synonyms for certain anatomical zones, e.g. 'Forehead" maps to "Face'. See Vernez et al., Journal of Exposure Science and Environmental Epidemiology (2015) 25, 113–118 (https://doi.org/10.1038/jes.2014.6) for further details on the model used for the calculation. Parameters ---------- Vis_table_path : str, optional The full path to an alternative table for the Vis parameter. Must be a csv file. Defaults to None. Vis_table : str, optional An alternative table for the Vis parameter. Defaults to None. Returns ------- SpecificDoses Returns input table appended with ER column Notes ----- The SpecificDoses table used must contain columns for Date, Anatomic_zone, and Posture. The Date column should contain DateTime entries. The Anatonic_zone column should contain one string per row describing the exposed body part. The Posture column should contain one string per row describing one of six accepted postures. Example ------- In this example, we illustrate the process for calculating the doses in Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the spreadsheet contains only local Swiss time and not UTC time. There are four important functions as part of this class, three for standardising and preparing the columns, and one for actually loading the data and performing the dose calculations. See below:: import python_tamer as pt import pandas as pd example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx', header=2,index_col=0,usecols="B:K")) example.src_directory = 'C:/enter_the_directory_of_your_dataset_here' example = example.standard_column_names() example = example.schedule_constant_exposure().ER_from_posture() example = example.calculate_specific_dose() """ # This chunk of code checks if the default Vis table should be used or if the user enters some alternative table. if Vis_table is None and Vis_table_path is None : Vis_table = pd.DataFrame.from_records( columns=['Seated','Kneeling','Standing erect arms down','Standing erect arms up','Standing bowing'], index=['Face','Skull','Forearm','Upper arm','Neck','Top of shoulders','Belly','Upper back','Hand','Shoulder','Upper leg','Lower leg','Lower back'], data=[[53.7,28.7,46.6,44.9,19.2], [56.2,66.6,61.1,58.4,67.5], [62.3,56.5,49.4,53.1,62.1], [51.7,60.5,45.9,65.3,61.6], [58.3,84.3,67.6,65.2,81.6], [35.9,50.3,48.6,45.7,85.3], [58.1,45.1,50.3,49.6,15.2], [35.9,50.3,48.6,45.7,85.3], [59.2,58.8,42.4,55,58.5], [68,62,63,67.1,64], [65.4,45.4,50.9,51,43.5], [32.8,63.4,49.7,50.3,50], [44.9,51.6,56.6,53.4,86.9]]) # The 'standing moving' posture must be dealt with somehow... # Vis_table['Standing moving']= (Vis_table['Standing erect arms down'] + Vis_table['Standing bowing']) / 2 # TODO: add interpeter or force users to conform? Vis_table['Standing moving']= Vis_table['Standing erect arms down'] elif Vis_table is None : Vis_table = pd.read_csv(Vis_table_path) # Below is a dictionary describing a range of synonyms for the anatomical zones defined in the Vis table. Anatomic_zone_synonyms_reverse = { 'Forearm' : ['wrist', 'Left extern radial', 'Right extern radial', 'Left wrist: radius head', 'Right wrist: radius head', 'Left wrist', 'Right wrist'], 'Face' : ['Forehead'], 'Upper back' : ['Right trapezoid', 'Left trapezoid', 'trapezius'], 'Belly' : ['Chest'], 'Shoulder' : ['Left deltoid', 'Right deltoid', 'Left shoulder', 'Right shoulder'], 'Upper arm' : ['Left elbow', 'Right elbow', 'Left biceps', 'Right biceps'], 'Upper leg' : ['Left thigh', 'Right thigh', 'Left knee', 'Right knee'], 'Lower back' : ['Low back'] } # The dictionary is reversed so that the multiple synonyms can be mapped to the few correct terms for the Vis table. Anatomic_zone_synonyms = {keys: old_keys for old_keys, old_values in Anatomic_zone_synonyms_reverse.items() for keys in old_values} self = self.replace({'Anatomic_zone' : Anatomic_zone_synonyms}) # With the correct anatomic zone names established, we can lookup the Vis values from the table # TODO: lookup is being depreciated, must replace with something new Vis = Vis_table.lookup(self['Anatomic_zone'],self['Posture']) # Next we must calculate the minimal Solar Zenith Angle for the given date mSZA = min_solar_zenith_angle(self.Date,self.Latitude) # With the Vis value and the SZA, we can calculate the ER according to the Vernez model self.loc[:,'ER'] = ER_Vernez_model_equation(Vis,mSZA) / 100 return self
[docs] def calculate_specific_dose(self) : """Calculates doses according to exposure schedule, ER, date, and location. This function takes the SpecificDoseEstimationTable and calculates the specific ambient and personal doses according to the exposure schedule and ER. There are a few key steps to this function. First it reads the Date column to determine which years of data must be loaded. It then iterates through each year, loading only the necessary dates. It applies the exposure schedule and the ER to calculate the ambient and personal doses. Returns ------- SpecificDoses The input table is appended with a Ambient_dose and Personal_dose column. Notes ----- The input SpecificDoses object must include Date, Schedule, ER, Latitude, and Longitude columns. Consult Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) for more information on how this function can be used in the context of mimicking UV dosimetry measurements. Example ------- In this example, we illustrate the process for calculating the doses in Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the spreadsheet contains only local Swiss time and not UTC time. There are four important functions as part of this class, three for standardising and preparing the columns, and one for actually loading the data and performing the dose calculations. See below:: import python_tamer as pt import pandas as pd example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx', header=2,index_col=0,usecols="B:K")) example.src_directory = 'C:/enter_the_directory_of_your_dataset_here' example = example.standard_column_names() example = example.schedule_constant_exposure().ER_from_posture() example = example.calculate_specific_dose() """ # First step is find unique years to avoid loading unnecessary data years = pd.DatetimeIndex(self.Date).year unique_years = sorted(set(years)) self['Ambient_dose'] = np.nan self['Personal_dose'] = np.nan for year in unique_years : # Load netCDF file print("Processing year "+str(year)) dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year))) dataset.set_auto_mask(False) # This is important for nans to import correctly # Make temporary table for yearly subset temp_table = self[years == year].copy() # find all unique days in year to be loaded unique_days,unique_days_idx = np.unique(pd.DatetimeIndex(temp_table.Date).dayofyear, return_inverse=True) temp_table['unique_days_idx'] = unique_days_idx #pd.DatetimeIndex(nc.num2date(dataset.variables["time"][:],dataset.variables["time"].units,only_use_cftime_datetimes=False)) if dataset.dimensions['time'].size == 24 : # needed if just a single day time_subset = [True for i in range(dataset.dimensions['time'].size)] else : # Next we pull a subset from the netCDF file # declare false array with same length of time dimension from netCDF time_subset = [False for i in range(dataset.dimensions['time'].size)] # reshape false array to have first dimension 24 (hours in day) time_subset = assert_data_shape_24(time_subset) # set the appropriate days as true time_subset[:,unique_days-1] = True # flatten time_subset array back to one dimension time_subset = time_subset.flatten(order='F') data = assert_data_shape_24(dataset['UV_AS'][time_subset,:,:]) # TODO: improve comprehension of raw data units rather than assuming # convert lat lon into pixel coordinates # TODO: consider is necessary to load entire maps for just a few required pixels lat = dataset['lat'][:] lon = dataset['lon'][:] temp_table['pixel_lat'] = temp_table.apply(lambda x: find_nearest(lat,x['Latitude']),axis='columns') temp_table['pixel_lon'] = temp_table.apply(lambda x: find_nearest(lon,x['Longitude']),axis='columns') # calculate doses temp_table['Ambient_dose'] = temp_table.apply(lambda x: np.sum(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] * x['Schedule']),axis='columns') temp_table['Personal_dose'] = temp_table.apply(lambda x: np.sum(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] * (x['Schedule'] * x['ER'])),axis='columns') # extra step necessary to ensure correct assignment self.loc[temp_table.index,'Ambient_dose'] = temp_table['Ambient_dose'].values self.loc[temp_table.index,'Personal_dose'] = temp_table['Personal_dose'].values # TODO: improve units options here self['Ambient_dose'] = self['Ambient_dose']/40*3600/100 # SED self['Personal_dose'] = self['Personal_dose']/40*3600/100 # SED return self
[docs] def analyse_variable(self, variable="UV_AS", statistic="Mean", src_filename_format=None, src_directory=None) : """Basic calculations for specific exposure instances This function is for calculating information other than ambient and personal doses that corresponds to specific exposure instances. Parameters ---------- variable : str, optional The name of the variable to be analysed. This informs what data should be pulled from the source netCDF files. This also informs the name of the column(s) that will be created by this function. Defaults to "UV_AS", i.e. the All-Sky UV data that is used in the calculate_specific_dose function. statistic : str or list, optional The statistic to be calculated, options include: mean, median, stdev, variance, min, max, weighted_mean, and sum. Not case sensitive. Can be a single string or a list of strings whereby multiple columns will be calculated. Defaults to "Mean". src_filename_format : str, optional Allows the user to select different source data. This may be useful in cases where the user wants to compare doses calculated with one dataset to (say) cloud cover from another dataset. Defaults to None, where the function uses the source files specified by the object's metadata. src_directory : str, optional Allows the user to select different source data. This may be useful in cases where the user wants to compare doses calculated with one dataset to (say) cloud cover from another dataset. Defaults to None, where the function uses the source files specified by the object's metadata. Returns ------- SpecificDoses The table is appended with new columns named [variable]_[statistic]. Example ------- In this example, we illustrate the process for calculating the doses in Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the spreadsheet contains only local Swiss time and not UTC time. Additionally, to demonstrate the analyse_variable function, we also calculate the weighted mean CMF assuming it to be an additional variable in the source data files. See below:: import python_tamer as pt import pandas as pd example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx', header=2,index_col=0,usecols="B:K")) example.src_directory = 'C:/enter_the_directory_of_your_dataset_here' example = example.standard_column_names() example = example.schedule_constant_exposure().ER_from_posture() example = example.calculate_specific_dose() example = example.analyse_variable(variable="CMF",statistic="weighted_mean") """ # users have option to load different files, otherwise defaults to metadata if src_filename_format is None : src_filename_format = self.src_filename_format if src_directory is None : src_directory = self.src_directory # First step is find unique years to avoid loading unnecessary data years = pd.DatetimeIndex(self.Date).year unique_years = sorted(set(years)) if isinstance(statistic,str) : self[variable+"_"+statistic] = np.nan # convert to list to simplify code later statistic = [statistic] elif isinstance(statistic,list) : for x in statistic : self[variable+"_"+x]=np.nan else : raise TypeError("statistic input must be str or list of str") for year in unique_years : # Load netCDF file print("Processing year "+str(year)) dataset=nc.Dataset(src_directory+src_filename_format.replace('yyyy',str(year))) dataset.set_auto_mask(False) # This is important for nans to import correctly # Make temporary table for yearly subset temp_table = self[years == year].copy() # find all unique days in year to be loaded unique_days,unique_days_idx = np.unique(pd.DatetimeIndex(temp_table.Date).dayofyear, return_inverse=True) temp_table['unique_days_idx'] = unique_days_idx #pd.DatetimeIndex(nc.num2date(dataset.variables["time"][:],dataset.variables["time"].units,only_use_cftime_datetimes=False)) if dataset.dimensions['time'].size == 24 : # needed if just a single day time_subset = [True for i in range(dataset.dimensions['time'].size)] else : # Next we pull a subset from the netCDF file # declare false array with same length of time dimension from netCDF time_subset = [False for i in range(dataset.dimensions['time'].size)] # reshape false array to have first dimension 24 (hours in day) time_subset = assert_data_shape_24(time_subset) # set the appropriate days as true time_subset[:,unique_days-1] = True # flatten time_subset array back to one dimension time_subset = time_subset.flatten(order='F') data = assert_data_shape_24(dataset[variable][time_subset,:,:]) # TODO: improve comprehension of raw data units rather than assuming # convert lat lon into pixel coordinates # TODO: consider is necessary to load entire maps for just a few required pixels lat = dataset['lat'][:] lon = dataset['lon'][:] temp_table['pixel_lat'] = temp_table.apply(lambda x: find_nearest(lat,x['Latitude']),axis='columns') temp_table['pixel_lon'] = temp_table.apply(lambda x: find_nearest(lon,x['Longitude']),axis='columns') # calculate for stat in statistic : # mean if stat.lower() in ["mean",'average','avg'] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.mean(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # median elif stat.lower() in ["median","med"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.median(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # stdev elif stat.lower() in ["std","sd","stdev"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.std(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # variance elif stat.lower() in ["var","variance"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.var(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # minimum elif stat.lower() in ["min",'minimum'] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.amin(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # maximum elif stat.lower() in ["max","maximum"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.amax(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # weighted mean elif stat.lower() in ["weighted_mean","weighted_average","mean_weighted","average_weighted","avg_weighted"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.average(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']],weights=x['Schedule']),axis='columns') # sum elif stat.lower() in ["sum","total"] : temp_table[variable+"_"+stat] = temp_table.apply(lambda x: np.sum(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns') # extra step necessary to ensure correct assignment self.loc[temp_table.index,variable+"_"+stat] = temp_table[variable+"_"+stat].values return self