import pandas as pd
import numpy as np
import netCDF4 as nc
from .subroutines import *
[docs]class SpecificDoses(pd.DataFrame):
"""A class for specific dose estimates akin to dosimetry measurements
High resolution data allows for personal and ambient dose estimation without the need for
direct measurement. This class is structured like a table with a set of functions to add
columns ultimately leading to dose estimates. Each row of this table represents a specific
exposure instance, i.e. an individual at a specific location for a specific date and time
with a specific exposure ratio. See Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) for more information on calculations appropriate
for this class.
Parameters
----------
src_filename_format : str
Describes the filename of the netCDF files containing the UV data with 'yyyy' in place
of the year.
src_directory : str
The directory where the data is stored. Must end with a slash.
Notes
-----
Presently, the class is inherited from a pandas.DataFrame which is somewhat restrictive
and will likely be revised in a later update. For the time being, this means that the
parameters cannot be set when initialising a `SpecificDoses` object, they must instead
be adjusted after initialisation, like so::
ExistingExposureMapObject.src_directory = "/new/data/directory/"
Example
-------
In this example, we illustrate the process for calculating the doses in Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary
data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the
spreadsheet contains only local Swiss time and not UTC time. There are four important
functions as part of this class, three for standardising and preparing the columns,
and one for actually loading the data and performing the dose calculations. See below::
import python_tamer as pt
import pandas as pd
example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx',
header=2,index_col=0,usecols="B:K"))
example.src_directory = 'C:/enter_the_directory_of_your_dataset_here'
example = example.standard_column_names()
example = example.schedule_constant_exposure().ER_from_posture()
example = example.calculate_specific_dose()
"""
# This property ensures that functions return the same subclass
@property
def _constructor(self):
return SpecificDoses
# This adds some useful metadata (self-explanatory)
_metadata = ["src_filename_format","src_directory"]
src_filename_format = 'UVery.AS_ch02.lonlat_yyyy01010000.nc'
src_directory = 'C:/Data/UV/' # TODO: set up __init__ for these options
# It feels like this should be declared with __init__ as well but idk
[docs] def standard_column_names(self) :
"""Limited function to standardise column names
When loading tables to use as the basis for a SpecificDoses table, some columns may have
slightly names to what is expected. This function standardises the names but is very
limited in terms of what it can recognise. The user is encourages to ensure the columns
are correctly labelled themselves and not to rely on this function.
Returns
-------
SpecificDoses
The table has its column names modified.
"""
legend_dict_reverse = {'Point' : ['Lieu de mesure'],
'Date' : ['Date'],
'Time_start' : ['Heure début','Start_time','Start time'],
'Time_end' : ['Heure fin','End_time','End time'],
'Measured_dose' : ['Exposition [MED]','Exposure'],
'Anatomic_zone' : ['Zone anatomique','Body part','Anatomic zone'],
'Posture' : ['Body posture'],
'Latitude' : ['lat'],
'Longitude' : ['lon','lng']}
legend_dict = {keys: old_keys for old_keys, old_values in legend_dict_reverse.items() for keys in old_values}
self = self.rename(columns=legend_dict)
return self
[docs] def schedule_constant_exposure(self) :
"""Generates exposure schedules given start and end times.
This function generates exposure schedules based on simple continuous exposure, i.e.
with a start time and an end time. The exposure schedule is a vector with length 24
with each entry representing the proportion of the corresponding hour of the day that
the subject is exposed.
Returns
-------
python_tamer.SpecificDoses
An exposure_schedule column is created and is appended to the input
`SpecificDoses` object or, if that column already exists, it is overwritten.
Notes
-----
The input `SpecificDoses` object must contain the following columns:
* ``Time_start``
* ``Time_end``
Example
-------
In this example, we illustrate the process for calculating the doses in Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary
data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the
spreadsheet contains only local Swiss time and not UTC time. There are four important
functions as part of this class, three for standardising and preparing the columns,
and one for actually loading the data and performing the dose calculations. See below::
import python_tamer as pt
import pandas as pd
example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx',
header=2,index_col=0,usecols="B:K"))
example.src_directory = 'C:/enter_the_directory_of_your_dataset_here'
example = example.standard_column_names()
example = example.schedule_constant_exposure().ER_from_posture()
example = example.calculate_specific_dose()
"""
def schedule_constant_exposure_iter(Start_time,End_time) :
"""Iterates through rows of a SpecificDoses table to generate schedules.
This function is designed to be applied to each row in a datatable to generate an
exposure schedule based on a start time and end time
Parameters
----------
Start_time : datetime.time
UTC time at which exposure period begins
End_time : datetime.time
UTC time at which exposure period end
Returns
-------
numpy.array
24 length vector of values between 0 and 1 indicating proportion
of time exposed for that corresponding hour of the day.
"""
schedule = np.zeros(24)
schedule[Start_time.hour:End_time.hour] = 1
# Modify start and end hours according to proportion of time exposed
if Start_time.minute != 0 :
schedule[Start_time.hour] = (1 - Start_time.minute/60)
if End_time.minute != 0 :
schedule[End_time.hour] = End_time.minute/60
return schedule
# With that function defined, we need just one line to apply it to the whole table
self["Schedule"] = self.apply(lambda x: schedule_constant_exposure_iter(
x["Time_start"],x["Time_end"]),axis='columns')
return self
[docs] def ER_from_posture(self,
Vis_table_path=None,
Vis_table=None) :
"""ER_from_posture calculates Exposure Ratios for a given anatomic zone, posture, and date.
This function calculates ER as a percentage between 0 and 100 based on information from an input table.
The input table must contain certain columns at a minimum. Those are: Date, Anatomic_zone, and Posture.
This function contains hard-coded synonyms for certain anatomical zones, e.g. 'Forehead" maps to "Face'.
See Vernez et al., Journal of Exposure Science and Environmental Epidemiology (2015) 25, 113–118
(https://doi.org/10.1038/jes.2014.6) for further details on the model used for the calculation.
Parameters
----------
Vis_table_path : str, optional
The full path to an alternative table for the Vis parameter.
Must be a csv file. Defaults to None.
Vis_table : str, optional
An alternative table for the Vis parameter. Defaults to None.
Returns
-------
SpecificDoses
Returns input table appended with ER column
Notes
-----
The SpecificDoses table used must contain columns for Date, Anatomic_zone, and Posture.
The Date column should contain DateTime entries. The Anatonic_zone column should contain one string per
row describing the exposed body part. The Posture column should contain one string per row describing
one of six accepted postures.
Example
-------
In this example, we illustrate the process for calculating the doses in Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary
data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the
spreadsheet contains only local Swiss time and not UTC time. There are four important
functions as part of this class, three for standardising and preparing the columns,
and one for actually loading the data and performing the dose calculations. See below::
import python_tamer as pt
import pandas as pd
example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx',
header=2,index_col=0,usecols="B:K"))
example.src_directory = 'C:/enter_the_directory_of_your_dataset_here'
example = example.standard_column_names()
example = example.schedule_constant_exposure().ER_from_posture()
example = example.calculate_specific_dose()
"""
# This chunk of code checks if the default Vis table should be used or if the user enters some alternative table.
if Vis_table is None and Vis_table_path is None :
Vis_table = pd.DataFrame.from_records(
columns=['Seated','Kneeling','Standing erect arms down','Standing erect arms up','Standing bowing'],
index=['Face','Skull','Forearm','Upper arm','Neck','Top of shoulders','Belly','Upper back','Hand','Shoulder','Upper leg','Lower leg','Lower back'],
data=[[53.7,28.7,46.6,44.9,19.2],
[56.2,66.6,61.1,58.4,67.5],
[62.3,56.5,49.4,53.1,62.1],
[51.7,60.5,45.9,65.3,61.6],
[58.3,84.3,67.6,65.2,81.6],
[35.9,50.3,48.6,45.7,85.3],
[58.1,45.1,50.3,49.6,15.2],
[35.9,50.3,48.6,45.7,85.3],
[59.2,58.8,42.4,55,58.5],
[68,62,63,67.1,64],
[65.4,45.4,50.9,51,43.5],
[32.8,63.4,49.7,50.3,50],
[44.9,51.6,56.6,53.4,86.9]])
# The 'standing moving' posture must be dealt with somehow...
# Vis_table['Standing moving']= (Vis_table['Standing erect arms down'] + Vis_table['Standing bowing']) / 2
# TODO: add interpeter or force users to conform?
Vis_table['Standing moving']= Vis_table['Standing erect arms down']
elif Vis_table is None :
Vis_table = pd.read_csv(Vis_table_path)
# Below is a dictionary describing a range of synonyms for the anatomical zones defined in the Vis table.
Anatomic_zone_synonyms_reverse = {
'Forearm' : ['wrist',
'Left extern radial',
'Right extern radial',
'Left wrist: radius head',
'Right wrist: radius head',
'Left wrist',
'Right wrist'],
'Face' : ['Forehead'],
'Upper back' : ['Right trapezoid',
'Left trapezoid',
'trapezius'],
'Belly' : ['Chest'],
'Shoulder' : ['Left deltoid',
'Right deltoid',
'Left shoulder',
'Right shoulder'],
'Upper arm' : ['Left elbow',
'Right elbow',
'Left biceps',
'Right biceps'],
'Upper leg' : ['Left thigh',
'Right thigh',
'Left knee',
'Right knee'],
'Lower back' : ['Low back']
}
# The dictionary is reversed so that the multiple synonyms can be mapped to the few correct terms for the Vis table.
Anatomic_zone_synonyms = {keys: old_keys for old_keys, old_values in Anatomic_zone_synonyms_reverse.items() for keys in old_values}
self = self.replace({'Anatomic_zone' : Anatomic_zone_synonyms})
# With the correct anatomic zone names established, we can lookup the Vis values from the table
# TODO: lookup is being depreciated, must replace with something new
Vis = Vis_table.lookup(self['Anatomic_zone'],self['Posture'])
# Next we must calculate the minimal Solar Zenith Angle for the given date
mSZA = min_solar_zenith_angle(self.Date,self.Latitude)
# With the Vis value and the SZA, we can calculate the ER according to the Vernez model
self.loc[:,'ER'] = ER_Vernez_model_equation(Vis,mSZA) / 100
return self
[docs] def calculate_specific_dose(self) :
"""Calculates doses according to exposure schedule, ER, date, and location.
This function takes the SpecificDoseEstimationTable and calculates the specific
ambient and personal doses according to the exposure schedule and ER. There are
a few key steps to this function. First it reads the Date column to determine
which years of data must be loaded. It then iterates through each year, loading
only the necessary dates. It applies the exposure schedule and the ER to
calculate the ambient and personal doses.
Returns
-------
SpecificDoses
The input table is appended with a Ambient_dose and Personal_dose column.
Notes
-----
The input SpecificDoses object must include Date, Schedule, ER, Latitude,
and Longitude columns.
Consult Harris et al. 2021 (https://doi.org/10.3390/atmos12020268) for more
information on how this function can be used in the context of mimicking UV
dosimetry measurements.
Example
-------
In this example, we illustrate the process for calculating the doses in Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary
data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the
spreadsheet contains only local Swiss time and not UTC time. There are four important
functions as part of this class, three for standardising and preparing the columns,
and one for actually loading the data and performing the dose calculations. See below::
import python_tamer as pt
import pandas as pd
example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx',
header=2,index_col=0,usecols="B:K"))
example.src_directory = 'C:/enter_the_directory_of_your_dataset_here'
example = example.standard_column_names()
example = example.schedule_constant_exposure().ER_from_posture()
example = example.calculate_specific_dose()
"""
# First step is find unique years to avoid loading unnecessary data
years = pd.DatetimeIndex(self.Date).year
unique_years = sorted(set(years))
self['Ambient_dose'] = np.nan
self['Personal_dose'] = np.nan
for year in unique_years :
# Load netCDF file
print("Processing year "+str(year))
dataset=nc.Dataset(self.src_directory+self.src_filename_format.replace('yyyy',str(year)))
dataset.set_auto_mask(False) # This is important for nans to import correctly
# Make temporary table for yearly subset
temp_table = self[years == year].copy()
# find all unique days in year to be loaded
unique_days,unique_days_idx = np.unique(pd.DatetimeIndex(temp_table.Date).dayofyear,
return_inverse=True)
temp_table['unique_days_idx'] = unique_days_idx
#pd.DatetimeIndex(nc.num2date(dataset.variables["time"][:],dataset.variables["time"].units,only_use_cftime_datetimes=False))
if dataset.dimensions['time'].size == 24 :
# needed if just a single day
time_subset = [True for i in range(dataset.dimensions['time'].size)]
else :
# Next we pull a subset from the netCDF file
# declare false array with same length of time dimension from netCDF
time_subset = [False for i in range(dataset.dimensions['time'].size)]
# reshape false array to have first dimension 24 (hours in day)
time_subset = assert_data_shape_24(time_subset)
# set the appropriate days as true
time_subset[:,unique_days-1] = True
# flatten time_subset array back to one dimension
time_subset = time_subset.flatten(order='F')
data = assert_data_shape_24(dataset['UV_AS'][time_subset,:,:])
# TODO: improve comprehension of raw data units rather than assuming
# convert lat lon into pixel coordinates
# TODO: consider is necessary to load entire maps for just a few required pixels
lat = dataset['lat'][:]
lon = dataset['lon'][:]
temp_table['pixel_lat'] = temp_table.apply(lambda x:
find_nearest(lat,x['Latitude']),axis='columns')
temp_table['pixel_lon'] = temp_table.apply(lambda x:
find_nearest(lon,x['Longitude']),axis='columns')
# calculate doses
temp_table['Ambient_dose'] = temp_table.apply(lambda x:
np.sum(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] *
x['Schedule']),axis='columns')
temp_table['Personal_dose'] = temp_table.apply(lambda x:
np.sum(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] *
(x['Schedule'] * x['ER'])),axis='columns')
# extra step necessary to ensure correct assignment
self.loc[temp_table.index,'Ambient_dose'] = temp_table['Ambient_dose'].values
self.loc[temp_table.index,'Personal_dose'] = temp_table['Personal_dose'].values
# TODO: improve units options here
self['Ambient_dose'] = self['Ambient_dose']/40*3600/100 # SED
self['Personal_dose'] = self['Personal_dose']/40*3600/100 # SED
return self
[docs] def analyse_variable(self,
variable="UV_AS",
statistic="Mean",
src_filename_format=None,
src_directory=None) :
"""Basic calculations for specific exposure instances
This function is for calculating information other than ambient and personal
doses that corresponds to specific exposure instances.
Parameters
----------
variable : str, optional
The name of the variable to be analysed. This informs what data should be
pulled from the source netCDF files. This also informs the name of the column(s)
that will be created by this function. Defaults to "UV_AS", i.e. the All-Sky
UV data that is used in the calculate_specific_dose function.
statistic : str or list, optional
The statistic to be calculated, options include: mean, median, stdev, variance,
min, max, weighted_mean, and sum. Not case sensitive. Can be a single string or
a list of strings whereby multiple columns will be calculated. Defaults to "Mean".
src_filename_format : str, optional
Allows the user to select different source data. This may be useful in cases where
the user wants to compare doses calculated with one dataset to (say) cloud cover
from another dataset. Defaults to None, where the function uses the source files
specified by the object's metadata.
src_directory : str, optional
Allows the user to select different source data. This may be useful in cases where
the user wants to compare doses calculated with one dataset to (say) cloud cover
from another dataset. Defaults to None, where the function uses the source files
specified by the object's metadata.
Returns
-------
SpecificDoses
The table is appended with new columns named [variable]_[statistic].
Example
-------
In this example, we illustrate the process for calculating the doses in Harris et al. 2021
(https://doi.org/10.3390/atmos12020268) from the spreadsheet supplied as supplementary
data (https://www.mdpi.com/2073-4433/12/2/268/s1). Note that results will differ as the
spreadsheet contains only local Swiss time and not UTC time. Additionally, to demonstrate
the analyse_variable function, we also calculate the weighted mean CMF assuming it to be
an additional variable in the source data files. See below::
import python_tamer as pt
import pandas as pd
example = pt.SpecificDoses(pd.read_excel(r'atmosphere-12-00268-s001.xlsx',
header=2,index_col=0,usecols="B:K"))
example.src_directory = 'C:/enter_the_directory_of_your_dataset_here'
example = example.standard_column_names()
example = example.schedule_constant_exposure().ER_from_posture()
example = example.calculate_specific_dose()
example = example.analyse_variable(variable="CMF",statistic="weighted_mean")
"""
# users have option to load different files, otherwise defaults to metadata
if src_filename_format is None :
src_filename_format = self.src_filename_format
if src_directory is None :
src_directory = self.src_directory
# First step is find unique years to avoid loading unnecessary data
years = pd.DatetimeIndex(self.Date).year
unique_years = sorted(set(years))
if isinstance(statistic,str) :
self[variable+"_"+statistic] = np.nan
# convert to list to simplify code later
statistic = [statistic]
elif isinstance(statistic,list) :
for x in statistic :
self[variable+"_"+x]=np.nan
else :
raise TypeError("statistic input must be str or list of str")
for year in unique_years :
# Load netCDF file
print("Processing year "+str(year))
dataset=nc.Dataset(src_directory+src_filename_format.replace('yyyy',str(year)))
dataset.set_auto_mask(False) # This is important for nans to import correctly
# Make temporary table for yearly subset
temp_table = self[years == year].copy()
# find all unique days in year to be loaded
unique_days,unique_days_idx = np.unique(pd.DatetimeIndex(temp_table.Date).dayofyear,
return_inverse=True)
temp_table['unique_days_idx'] = unique_days_idx
#pd.DatetimeIndex(nc.num2date(dataset.variables["time"][:],dataset.variables["time"].units,only_use_cftime_datetimes=False))
if dataset.dimensions['time'].size == 24 :
# needed if just a single day
time_subset = [True for i in range(dataset.dimensions['time'].size)]
else :
# Next we pull a subset from the netCDF file
# declare false array with same length of time dimension from netCDF
time_subset = [False for i in range(dataset.dimensions['time'].size)]
# reshape false array to have first dimension 24 (hours in day)
time_subset = assert_data_shape_24(time_subset)
# set the appropriate days as true
time_subset[:,unique_days-1] = True
# flatten time_subset array back to one dimension
time_subset = time_subset.flatten(order='F')
data = assert_data_shape_24(dataset[variable][time_subset,:,:])
# TODO: improve comprehension of raw data units rather than assuming
# convert lat lon into pixel coordinates
# TODO: consider is necessary to load entire maps for just a few required pixels
lat = dataset['lat'][:]
lon = dataset['lon'][:]
temp_table['pixel_lat'] = temp_table.apply(lambda x:
find_nearest(lat,x['Latitude']),axis='columns')
temp_table['pixel_lon'] = temp_table.apply(lambda x:
find_nearest(lon,x['Longitude']),axis='columns')
# calculate
for stat in statistic :
# mean
if stat.lower() in ["mean",'average','avg'] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.mean(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# median
elif stat.lower() in ["median","med"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.median(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# stdev
elif stat.lower() in ["std","sd","stdev"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.std(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# variance
elif stat.lower() in ["var","variance"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.var(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# minimum
elif stat.lower() in ["min",'minimum'] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.amin(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# maximum
elif stat.lower() in ["max","maximum"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.amax(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# weighted mean
elif stat.lower() in ["weighted_mean","weighted_average","mean_weighted","average_weighted","avg_weighted"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.average(data[:,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']],weights=x['Schedule']),axis='columns')
# sum
elif stat.lower() in ["sum","total"] :
temp_table[variable+"_"+stat] = temp_table.apply(lambda x:
np.sum(data[x['Schedule']!=0,x['unique_days_idx'],x['pixel_lat'],x['pixel_lon']] ),axis='columns')
# extra step necessary to ensure correct assignment
self.loc[temp_table.index,variable+"_"+stat] = temp_table[variable+"_"+stat].values
return self