Module nais_processor
Expand source code
import numpy as np
from matplotlib import colors
import matplotlib.pyplot as plt
from matplotlib.ticker import LogLocator
from datetime import date, datetime, timedelta
import matplotlib.dates as dts
import pandas as pd
import os
import locale
import warnings
import yaml
import re
import sys
from dateutil.parser import parse
from tinydb import TinyDB, Query
from tinydb.operations import add
import time
import json
import aerosol_functions as af
__pdoc__ = {
'tubeloss': False,
'average_mob': False,
'average_dp': False,
'find_diagnostic_names': False,
'process_data': False,
'correct_data': False,
'clean_data': False,
}
# The final geometric mean diameters of diameter and mobility bins
dp_ion = np.array([7.86360416e-10, 9.08232168e-10, 1.04902018e-09, 1.21167006e-09,
1.39958930e-09, 1.61672083e-09, 1.86762862e-09, 2.15759741e-09,
2.49274932e-09, 2.88018000e-09, 3.32811839e-09, 3.84611427e-09,
4.44525917e-09, 5.13844742e-09, 5.94068566e-09, 6.86946146e-09,
7.94518431e-09, 9.19171623e-09, 1.06370142e-08, 1.23139134e-08,
1.42610904e-08, 1.65242568e-08, 1.91576555e-08, 2.22259544e-08,
2.58066722e-08, 2.99933244e-08, 3.48995548e-08, 4.06646353e-08])*1e9
dp_par = np.array([7.498942093324539870e-01,8.659643233600640144e-01,
9.999999999999980016e-01,1.154781984689456031e+00,1.333521432163321974e+00,
1.539926526059490097e+00,1.778279410038920094e+00,2.053525026457140079e+00,
2.371373705661659947e+00,2.738419634264360081e+00,3.162277660168379967e+00,
3.651741272548380213e+00,4.216965034285819591e+00,4.869675251658620141e+00,
5.623413251903479626e+00,6.493816315762099833e+00,7.498942093324560076e+00,
8.659643233600640144e+00,1.000000000000000000e+01,1.154781984689457985e+01,
1.333521432163323972e+01,1.539926526059490008e+01,1.778279410038922137e+01,
2.053525026457139901e+01,2.371373705661660125e+01,2.738419634264360170e+01,
3.162277660168379967e+01,3.651741272548380124e+01,4.216965034285819769e+01])
mob_ion = np.array([3.162277660168379937e-04,2.371373705661659990e-04,
1.778279410038920258e-04,1.333521432163320159e-04,1.000000000000000048e-04,
7.498942093324559917e-05,5.623413251903490022e-05,4.216965034285820205e-05,
3.162277660168380208e-05,2.371373705661660125e-05,1.778279410038919852e-05,
1.333521432163319990e-05,1.000000000000000082e-05,7.498942093324561442e-06,
5.623413251903490361e-06,4.216965034285830030e-06,3.162277660168380038e-06,
2.371373705661659871e-06,1.778279410038920148e-06,1.333521432163330027e-06,
1.000000000000000167e-06,7.498942093324570124e-07,5.623413251903499890e-07,
4.216965034285829924e-07,3.162277660168379721e-07,2.371373705661660136e-07,
1.778279410038920042e-07,1.333521432163329868e-07])*1e4
mob_ion_geomeans=np.array([2.73841963e-04, 2.05352503e-04, 1.53992653e-04, 1.15478198e-04,
8.65964323e-05, 6.49381632e-05, 4.86967525e-05, 3.65174127e-05,
2.73841963e-05, 2.05352503e-05, 1.53992653e-05, 1.15478198e-05,
8.65964323e-06, 6.49381632e-06, 4.86967525e-06, 3.65174127e-06,
2.73841963e-06, 2.05352503e-06, 1.53992653e-06, 1.15478198e-06,
8.65964323e-07, 6.49381632e-07, 4.86967525e-07, 3.65174127e-07,
2.73841963e-07, 2.05352503e-07, 1.53992653e-07])*1e4
dp_par_geomeans=np.array([0.80584219, 0.93057204, 1.07460783, 1.24093776, 1.43301257,
1.6548171 , 1.91095297, 2.20673407, 2.54829675, 2.94272718,
3.39820833, 3.92418976, 4.53158364, 5.23299115, 6.0429639 ,
6.97830585, 8.05842188, 9.30572041, 10.74607828, 12.40937761,
14.3301257 , 16.548171 , 19.10952975, 22.06734069, 25.48296748,
29.42727176, 33.98208329, 39.24189758])
dlogmob_ion=np.array([0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125,
0.125])
dlogdp_ion = np.array([0.06257524, 0.0625811 , 0.06259375, 0.06260838, 0.06262533,
0.06264495, 0.06266769, 0.06269404, 0.06272461, 0.06276008,
0.06280128, 0.06284916, 0.06290487, 0.06296974, 0.06304539,
0.0631337 , 0.06323696, 0.06335788, 0.06349974, 0.0636665 ,
0.06386292, 0.06409481, 0.06436924, 0.06469482, 0.06508209,
0.06554394, 0.06609614, 0.06639699])
dlogdp_par=np.array([0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625, 0.0625,
0.0625, 0.0625, 0.0625, 0.0625, 0.0625])
filename_formats = [
["%Y-%m-%d.ions.nds","%Y-%m-%d.particles.nds","%Y-%m-%d.log"],
["%Y%m%d-block-ions.spectra","%Y%m%d-block-particles.spectra","%Y%m%d-block.records"],
["%Y%m%d-block-ions.spectra","%Y%m%d-block-particles.spectra","%Y%m%d-block.diagnostics"]]
possible_sampleflow_names1 = [
"sampleflow",
"Flowaer"]
possible_sampleflow_names2 = [
"pos_sampleflow.mean",
"neg_sampleflow.mean",
"pos_sampleflow",
"neg_sampleflow"
]
possible_temperature_names = [
"temperature.mean",
"temperature",
"temp"]
possible_pressure_names = [
"baro.mean",
"baro"]
# electrometer size ranges for different inverters:
ions_pos_v14_lrnd={"0": [7.16444775804687e-10, 1.0700473216535486e-09], "1": [8.766005865635541e-10, 1.2912139078236106e-09], "2":
[1.0233784015731513e-09, 1.494607599390042e-09], "3": [1.167004143869059e-09, 1.6953050539978397e-09], "4":
[1.3171140158277396e-09, 1.9129953633709412e-09], "5": [1.5010400712091295e-09, 2.196448726880819e-09], "6":
[1.7374102549917467e-09, 2.5397495919423145e-09], "7": [1.9987455846433743e-09, 2.909827835540643e-09], "8":
[2.308391221999045e-09, 3.399628318414748e-09], "9": [2.743654460662328e-09, 4.064377206025429e-09], "10":
[3.235105579106799e-09, 4.708980203333236e-09], "11": [3.657786198230896e-09, 5.320149444473465e-09], "12":
[4.40743242351629e-09, 7.235758783698336e-09], "13": [6.341141170615947e-09, 1.0173608443214825e-08], "14":
[8.61139257420043e-09, 1.2899482426374689e-08], "15": [1.0474248763637253e-08, 1.556121847426194e-08], "16":
[1.2937549036927316e-08, 2.07105715562843e-08], "17": [1.778277482687919e-08, 2.8143993934982054e-08], "18":
[2.3703396062208645e-08, 3.638374245002531e-08], "19": [2.9466514534877575e-08, 4.3834357529742775e-08], "20":
[3.3648711881521194e-08, 4.601489588650497e-08]}
particles_pos_v14_lrnd_elm25_chv={"0": [7.084021223198885e-10, 1.1041550022447838e-09], "1": [8.824680153980799e-10, 1.3381985992962885e-09], "2":
[1.027268571461551e-09, 1.5535807431315568e-09], "3": [1.1710814400196452e-09, 1.7778453394121642e-09], "4":
[1.3364125967253046e-09, 2.0521125317491067e-09], "5": [1.5165756393379476e-09, 2.364104261278049e-09], "6":
[1.7469782917199996e-09, 2.7691235104649e-09], "7": [2.0030670784526445e-09, 3.1804078983335615e-09], "8":
[2.3555174234652947e-09, 3.889745378517969e-09], "9": [2.8998890919509277e-09, 4.85643554134041e-09], "10":
[3.4535426802312536e-09, 5.802269084443331e-09], "11": [3.952828282143858e-09, 6.706155173416103e-09], "12":
[4.680496931685137e-09, 8.345157694215357e-09], "13": [5.908440036792274e-09, 1.0914143399425853e-08], "14":
[7.25270144756779e-09, 1.3424351306635164e-08], "15": [8.481012704476841e-09, 1.5854436158750896e-08], "16":
[9.92828580185976e-09, 1.8875874031548135e-08], "17": [1.2480795668183397e-08, 2.5489807672756865e-08], "18":
[1.618264753193867e-08, 3.382694961210274e-08], "19": [1.9259686684447952e-08, 4.1933601744775866e-08], "20":
[2.22037089383357e-08, 4.865271942977691e-08], "21": [2.4978007194778478e-08, 5.377912883862792e-08], "22":
[2.7446273522212072e-08, 5.731891713329477e-08], "23": [2.966486504673139e-08, 6.006253448103395e-08], "24":
[3.1538173585926114e-08, 6.213974119411533e-08]}
particles_neg_v14_lrnd={"0": [7.071203017729596e-10, 1.1005013232430319e-09], "1": [8.797742415579472e-10, 1.3341534055269684e-09], "2":
[1.0249360413125393e-09, 1.5500231709924995e-09], "3": [1.1663524905165027e-09, 1.76746761608056e-09], "4":
[1.3247911530621861e-09, 2.0313528986656205e-09], "5": [1.4993872332769961e-09, 2.3322201548214762e-09], "6":
[1.7211922995795243e-09, 2.7237415257553574e-09], "7": [1.9709779821521803e-09, 3.127660894213944e-09], "8":
[2.2719521313715783e-09, 3.683906828365737e-09], "9": [2.6910604870370505e-09, 4.432425549741674e-09], "10":
[3.1273717566860052e-09, 5.176267101656894e-09], "11": [3.5214138488359515e-09, 5.88784346872649e-09], "12":
[4.344519392306214e-09, 8.075601998567308e-09], "13": [6.055647568060633e-09, 1.15707771780943e-08], "14":
[7.891354399664643e-09, 1.4822317894774355e-08], "15": [9.488534907698969e-09, 1.7987926934224552e-08], "16":
[1.1972289469619363e-08, 2.4352144139698158e-08], "17": [1.6205665582181165e-08, 3.529177218334523e-08], "18":
[2.1726991804436577e-08, 4.890078120743212e-08], "19": [2.747153199034406e-08, 5.80468336800726e-08], "20":
[3.181625187151363e-08, 6.282803902888346e-08]}
ions_pos_v141_lrnd_elm25_chv={"0": [7.174408260504354e-10, 1.073624396936229e-09], "1": [8.795427161035404e-10, 1.2949773919916534e-09], "2":
[1.0258582665799353e-09, 1.4979919549108165e-09], "3": [1.1719327677994699e-09, 1.70372171508208e-09], "4":
[1.32799146465657e-09, 1.932594430907855e-09], "5": [1.520240256748169e-09, 2.2261429400706174e-09], "6":
[1.7628304747501982e-09, 2.581861661002808e-09], "7": [2.0323715276272585e-09, 2.9598100485518586e-09], "8":
[2.3816068293602794e-09, 3.5773872919384414e-09], "9": [2.962208652767496e-09, 4.432897017241381e-09], "10":
[3.5762132418655334e-09, 5.239831433099603e-09], "11": [4.133555700629352e-09, 6.039237962304223e-09], "12":
[4.882640365333869e-09, 7.457408037033863e-09], "13": [6.2783199783114545e-09, 9.575956296394927e-09], "14":
[7.904204442058769e-09, 1.1694806122760362e-08], "15": [9.352755529575812e-09, 1.3732975020325302e-08], "16":
[1.0981897607602298e-08, 1.6276182350230556e-08], "17": [1.3500089105006028e-08, 2.1603165023592403e-08], "18":
[1.8022095523973226e-08, 2.708852306358357e-08], "19": [2.156970936611481e-08, 3.1921441726778743e-08], "20":
[2.4519917336200788e-08, 3.609220688998671e-08], "21": [2.7168015322179712e-08, 3.993591879861599e-08], "22":
[2.9639336081649807e-08, 4.339405159440388e-08], "23": [3.19559928473482e-08, 4.538489884003738e-08], "24":
[3.362603005585794e-08, 4.572616914940904e-08]}
ions_pos_v141_hrnd_elm25_chv={"0": [7.177816842294488e-10, 1.072221766346416e-09], "1": [8.784324649240066e-10, 1.2952331371673022e-09], "2":
[1.0282272593935292e-09, 1.496448130128779e-09], "3": [1.1709728034437686e-09, 1.70109575761716e-09], "4":
[1.3300366642732846e-09, 1.9341044305153346e-09], "5": [1.521422514332051e-09, 2.220832669677381e-09], "6":
[1.7632530836302399e-09, 2.584244551737274e-09], "7": [2.0351677980258766e-09, 2.955213035032816e-09], "8":
[2.379947213073259e-09, 3.5807287457548474e-09], "9": [2.9614387929489157e-09, 4.428794241338281e-09], "10":
[3.5808241708120252e-09, 5.247041226910477e-09], "11": [4.136034551200579e-09, 6.026277543624166e-09], "12":
[4.890752691340854e-09, 7.457985468634934e-09], "13": [6.2730994769101586e-09, 9.589732077862633e-09], "14":
[7.893847904275436e-09, 1.1687225512360978e-08], "15": [9.345340174460752e-09, 1.3762043152233714e-08], "16":
[1.0978493586549243e-08, 1.6262760201045273e-08], "17": [1.3526934344462764e-08, 2.157926406305932e-08], "18":
[1.8045823361694697e-08, 2.711029182088656e-08], "19": [2.1550025677740064e-08, 3.1861088201059746e-08], "20":
[2.453350988841541e-08, 3.611626721404304e-08], "21": [2.7196984694271958e-08, 3.995278834492865e-08], "22":
[2.9641573529840143e-08, 4.3318065851782826e-08], "23": [3.187419245775203e-08, 4.5478238497900765e-08], "24":
[3.3521538858575206e-08, 4.6046488264314935e-08]}
particles_pos_v14_lrnd={"0": [7.071203017729596e-10, 1.1005013232430319e-09], "1": [8.797742415579472e-10, 1.3341534055269684e-09], "2":
[1.0249360413125393e-09, 1.5500231709924995e-09], "3": [1.1663524905165027e-09, 1.76746761608056e-09], "4":
[1.3247911530621861e-09, 2.0313528986656205e-09], "5": [1.4993872332769961e-09, 2.3322201548214762e-09], "6":
[1.7211922995795243e-09, 2.7237415257553574e-09], "7": [1.9709779821521803e-09, 3.127660894213944e-09], "8":
[2.2719521313715783e-09, 3.683906828365737e-09], "9": [2.6910604870370505e-09, 4.432425549741674e-09], "10":
[3.1273717566860052e-09, 5.176267101656894e-09], "11": [3.5214138488359515e-09, 5.88784346872649e-09], "12":
[4.344519392306214e-09, 8.075601998567308e-09], "13": [6.055647568060633e-09, 1.15707771780943e-08], "14":
[7.891354399664643e-09, 1.4822317894774355e-08], "15": [9.488534907698969e-09, 1.7987926934224552e-08], "16":
[1.1972289469619363e-08, 2.4352144139698158e-08], "17": [1.6205665582181165e-08, 3.529177218334523e-08], "18":
[2.1726991804436577e-08, 4.890078120743212e-08], "19": [2.747153199034406e-08, 5.80468336800726e-08], "20":
[3.181625187151363e-08, 6.282803902888346e-08]}
ions_neg_v141_lrnd_elm25_chv={"0": [7.162722829189713e-10, 1.069396256829605e-09], "1": [8.777038868584165e-10, 1.2946446240931807e-09], "2":
[1.02759999667648e-09, 1.5009614340173589e-09], "3": [1.1745671577354954e-09, 1.7069296262514892e-09], "4":
[1.330366866209285e-09, 1.936923662090671e-09], "5": [1.523569941517978e-09, 2.230584153641438e-09], "6":
[1.763456724402997e-09, 2.5797080071606806e-09], "7": [2.0282822638164162e-09, 2.9544966841050904e-09], "8":
[2.3814411392836232e-09, 3.585069910708154e-09], "9": [2.975412233075357e-09, 4.4586459608966074e-09], "10":
[3.5950536764791088e-09, 5.264604651129057e-09], "11": [4.1451727671809455e-09, 6.0533707850852946e-09], "12":
[4.908070335058664e-09, 7.554663305133556e-09], "13": [6.370251631603248e-09, 9.708647141925531e-09], "14":
[7.99768693004782e-09, 1.1807392092933995e-08], "15": [9.421664599359865e-09, 1.3820815597006762e-08], "16":
[1.1077372179456622e-08, 1.6484128380953205e-08], "17": [1.3672218205379754e-08, 2.166074698335169e-08], "18":
[1.8004147704745442e-08, 2.705025422013022e-08], "19": [2.1526595943708422e-08, 3.186462197910958e-08], "20":
[2.4479448411058165e-08, 3.602967602639687e-08], "21": [2.7167075804775312e-08, 3.995596608326538e-08], "22":
[2.9709429581149162e-08, 4.347660778516122e-08], "23": [3.2017621669510734e-08, 4.541693553734151e-08], "24":
[3.3681779306509456e-08, 4.5708942542429593e-08]}
ions_neg_v141_hrnd_elm25_chv={"0": [7.166088691273807e-10, 1.0680529042034586e-09], "1": [8.766077238618173e-10, 1.2948822433267513e-09], "2":
[1.0299408040688298e-09, 1.499348034087609e-09], "3": [1.1734785792847775e-09, 1.7044494278972901e-09], "4":
[1.3325133850369467e-09, 1.9382490836139435e-09], "5": [1.5246381007528337e-09, 2.225245739179207e-09], "6":
[1.7639085256336676e-09, 2.5820656631195802e-09], "7": [2.031125366865139e-09, 2.9500866762907164e-09], "8":
[2.3797421811002037e-09, 3.588570659248709e-09], "9": [2.974497863791539e-09, 4.454598568481367e-09], "10":
[3.600685696291343e-09, 5.270248740213011e-09], "11": [4.147441219601736e-09, 6.040242224249017e-09], "12":
[4.91677135667135e-09, 7.553001655458096e-09], "13": [6.368408114819548e-09, 9.720375884730652e-09], "14":
[7.983853657878074e-09, 1.1805425434050108e-08], "15": [9.415625209974426e-09, 1.385125858049306e-08], "16":
[1.1073898998765917e-08, 1.646956079843046e-08], "17": [1.3699649532490082e-08, 2.163235038140565e-08], "18":
[1.802716724985027e-08, 2.7073230406563897e-08], "19": [2.150965694231408e-08, 3.18014983817787e-08], "20":
[2.4492014342932346e-08, 3.605523207826252e-08], "21": [2.7196391232036758e-08, 3.9971877339499414e-08], "22":
[2.9708672750890287e-08, 4.340340668907393e-08], "23": [3.1934336215501455e-08, 4.5516606356023787e-08], "24":
[3.357606684507602e-08, 4.604620001912706e-08]}
particles_neg_v14_hrnd_elm25_chv={"0": [7.064042348613744e-10, 1.098448642629657e-09], "1": [8.804166704055082e-10, 1.3382314628239056e-09], "2":
[1.029929687471369e-09, 1.5587130355258417e-09], "3": [1.1715316139497021e-09, 1.7855361801274118e-09], "4":
[1.3375770105761571e-09, 2.0569498417960264e-09], "5": [1.5213418055613532e-09, 2.3650454557473994e-09], "6":
[1.7508747954828767e-09, 2.7596483400473384e-09], "7": [2.0016660559184965e-09, 3.1683106443504784e-09], "8":
[2.3537662142103147e-09, 3.892364103276864e-09], "9": [2.908132768324034e-09, 4.87724949455506e-09], "10":
[3.4647588825814346e-09, 5.819021297995083e-09], "11": [3.9538012917605546e-09, 6.718817658651353e-09], "12":
[4.699417872175317e-09, 8.46741114160911e-09], "13": [5.980472218281243e-09, 1.1075320194303405e-08], "14":
[7.32788314110871e-09, 1.356178222960962e-08], "15": [8.546658915188655e-09, 1.5975204027458133e-08], "16":
[1.003513710889909e-08, 1.9121314649296197e-08], "17": [1.2584497621235887e-08, 2.5586370462254533e-08], "18":
[1.6193366252358348e-08, 3.375225311508183e-08], "19": [1.9296366093983868e-08, 4.135787670654157e-08], "20":
[2.2205359456932222e-08, 4.775609758877974e-08], "21": [2.4949980719630763e-08, 5.296183331803598e-08], "22":
[2.74591102013903e-08, 5.6781631708117125e-08], "23": [2.9663083522354196e-08, 5.960548252960392e-08], "24":
[3.1563036449021557e-08, 6.184741780170188e-08]}
particles_pos_v14_hrnd_elm25_chv={"0": [7.078936573300082e-10, 1.1028124202267342e-09], "1": [8.820689074711288e-10, 1.338578480376556e-09], "2":
[1.0282838004812897e-09, 1.5555733231186909e-09], "3": [1.1690201376888472e-09, 1.78149953583638e-09], "4":
[1.3348577366613476e-09, 2.052450596702526e-09], "5": [1.5181851404773485e-09, 2.3598931859583523e-09], "6":
[1.7509905816449534e-09, 2.7622818843531693e-09], "7": [2.005225058134512e-09, 3.173616142257642e-09], "8":
[2.3525975248633675e-09, 3.883241010415373e-09], "9": [2.894779176315381e-09, 4.847916178622021e-09], "10":
[3.4477052157384657e-09, 5.792051680298515e-09], "11": [3.944104923721565e-09, 6.701786044525521e-09], "12":
[4.666951603466218e-09, 8.360587273279385e-09], "13": [5.8997089753584225e-09, 1.0919012148501147e-08], "14":
[7.252706673752877e-09, 1.342019953209076e-08], "15": [8.486965748259635e-09, 1.586474588608095e-08], "16":
[9.936765651010928e-09, 1.8874421710969515e-08], "17": [1.246739709406105e-08, 2.5483091135053706e-08], "18":
[1.6208024471095697e-08, 3.3797206216952925e-08], "19": [1.9338854709118882e-08, 4.146124436417098e-08], "20":
[2.2243072357824654e-08, 4.783315990811425e-08], "21": [2.4952099445624026e-08, 5.296484079233324e-08], "22":
[2.7389130112705036e-08, 5.667341170880334e-08], "23": [2.9602350590226694e-08, 5.953977095698898e-08], "24":
[3.149305285177872e-08, 6.176077534814207e-08]}
ions_neg_v14_lrnd={"0": [7.16444775804687e-10, 1.0700473216535486e-09], "1": [8.766005865635541e-10, 1.2912139078236106e-09], "2":
[1.0233784015731513e-09, 1.494607599390042e-09], "3": [1.167004143869059e-09, 1.6953050539978397e-09], "4":
[1.3171140158277396e-09, 1.9129953633709412e-09], "5": [1.5010400712091295e-09, 2.196448726880819e-09], "6":
[1.7374102549917467e-09, 2.5397495919423145e-09], "7": [1.9987455846433743e-09, 2.909827835540643e-09], "8":
[2.308391221999045e-09, 3.399628318414748e-09], "9": [2.743654460662328e-09, 4.064377206025429e-09], "10":
[3.235105579106799e-09, 4.708980203333236e-09], "11": [3.657786198230896e-09, 5.320149444473465e-09], "12":
[4.40743242351629e-09, 7.235758783698336e-09], "13": [6.341141170615947e-09, 1.0173608443214825e-08], "14":
[8.61139257420043e-09, 1.2899482426374689e-08], "15": [1.0474248763637253e-08, 1.556121847426194e-08], "16":
[1.2937549036927316e-08, 2.07105715562843e-08], "17": [1.778277482687919e-08, 2.8143993934982054e-08], "18":
[2.3703396062208645e-08, 3.638374245002531e-08], "19": [2.9466514534877575e-08, 4.3834357529742775e-08], "20":
[3.3648711881521194e-08, 4.601489588650497e-08]}
particles_neg_v14_lrnd_elm25_chv={"0": [7.069362615274498e-10, 1.0997213171324974e-09], "1": [8.808566990541904e-10, 1.3378548681525188e-09], "2":
[1.0289107593642929e-09, 1.5567373372470626e-09], "3": [1.1735882698862885e-09, 1.7819165288950059e-09], "4":
[1.33907446265167e-09, 2.0566964315737684e-09], "5": [1.5196769831618589e-09, 2.3693061857773854e-09], "6":
[1.746857574391727e-09, 2.766483303572841e-09], "7": [1.9994652400593872e-09, 3.175124915389092e-09], "8":
[2.356723607259348e-09, 3.898924433703989e-09], "9": [2.9132704671892387e-09, 4.885896237782877e-09], "10":
[3.4706415007361796e-09, 5.829183030947836e-09], "11": [3.962593522847839e-09, 6.722985797091222e-09], "12":
[4.7129989830246345e-09, 8.451324582668515e-09], "13": [5.988747492408655e-09, 1.1071809141751731e-08], "14":
[7.327476301230385e-09, 1.3565350436407152e-08], "15": [8.540412648877852e-09, 1.5964663591630477e-08], "16":
[1.0026869681645872e-08, 1.9124820128830518e-08], "17": [1.2598231836692178e-08, 2.5591353853168848e-08], "18":
[1.6168464020950793e-08, 3.3780310510756356e-08], "19": [1.9217207258451423e-08, 4.1823547718041306e-08], "20":
[2.2165154711387387e-08, 4.85721513696297e-08], "21": [2.4975870919608738e-08, 5.3775910543120685e-08], "22":
[2.7516844715923574e-08, 5.7423919806126056e-08], "23": [2.9725447211466474e-08, 6.012495231939605e-08], "24":
[3.160714147308623e-08, 6.222035590925216e-08]}
# Define standard conditions
temp_ref = 273.15 # K, 0C
pres_ref = 101325.0 # Pa, 1atm
def make_config_template(fn):
"""
Make a configuration file template
Parameters
----------
fn : str
full path to configuration file
For example `/home/user/config.yml`
"""
with open(fn,"w") as f:
f.write("location: # Name of the measurement site\n")
f.write("data_folder: # Full paths to raw data folders\n")
f.write("- # Data folder 1\n")
f.write("- # Data folder 2, and so on...\n")
f.write("processed_folder: # Full path to folder where procesed data is saved\n")
f.write("database_file: # Full path to database file (will be created on first run) \n")
f.write("start_date: # Format: yyyy-mm-dd\n")
f.write("end_date: # Format: yyyy-mm-dd or '' for current day\n")
f.write("apply_corrections: # true or false\n")
f.write("inlet_length: # length of inlet in meters\n")
f.write("sealevel_correction: # true or false\n")
f.write("apply_cleaning: # true or false\n")
f.write("remove_corona_ions: # true or false\n")
f.write("remove_noisy_electrometers: # true or false\n")
f.write("inverter_name: # hires_25, lores_25, lores_21 or '' (needed for noise removal, '' if noise not removed)\n")
f.write("allow_reprocess: # true or false")
def tubeloss(dpp,pflow,plength,temp,press):
DPP,TEMP = np.meshgrid(dpp,temp)
DPP,PRESS = np.meshgrid(dpp,press)
DPP,PFLOW = np.meshgrid(dpp,pflow)
rmuu = np.pi*af.particle_diffusivity(DPP,TEMP,PRESS)*plength/PFLOW
pene = np.nan*np.ones(rmuu.shape)
cond1=rmuu<0.02
cond2=rmuu>=0.02
pene[cond1] = 1. - 2.56*rmuu[cond1]**(2./3.) + 1.2*rmuu[cond1]+0.177*rmuu[cond1]**(4./3.)
pene[cond2] = 0.819*np.exp(-3.657*rmuu[cond2]) + 0.097*np.exp(-22.3*rmuu[cond2]) + 0.032*np.exp(-57.0*rmuu[cond2])
return pene
def read_file(fn):
"""
Read NAIS raw data file into a pandas.DataFrame
Parameters
----------
fn : str
Raw data filename with path
Returns
-------
pandas.DataFrame
Contents of the file
"""
with open(fn,'r') as f:
header_found = False
data_matrix=[]
lines = f.read().splitlines()
for line in lines:
# Skip empty and comments
if (len(line)==0):
continue
if (line[0]=='#'):
continue
# Test if it is a header
elif (header_found==False):
if "opmode" in line:
delimiter = re.search('(.)opmode',line).group(1)
header = line.split(delimiter)
number_of_columns = len(header)
header_found = True
continue
else:
continue
else:
data_line = line.split(delimiter)
if ((len(data_line)==number_of_columns) & ("opmode" not in data_line)):
data_matrix.append(data_line)
continue
if len(data_matrix)==0:
return None
else:
# Convert anything that can be converted to float and the rest is coerced to NaNs
df = pd.DataFrame(columns = header, data = data_matrix)
df.iloc[:,3:] = df.iloc[:,3:].apply(pd.to_numeric, errors='coerce').astype(float)
# Establish begin_time (first column) as index
df = df.set_index(df.columns[0])
df.index = pd.to_datetime(df.index)
return df
def average_mob(y,h):
data = pd.DataFrame([])
for i in range(0,len(mob_ion_geomeans)):
if i==0:
y_block = y.iloc[:,h>mob_ion_geomeans[i]]
else:
y_block = y.iloc[:,((h>mob_ion_geomeans[i]) & (h<=mob_ion_geomeans[i-1]))]
data[i] = y_block.median(axis=1)
y_block = y.iloc[:,h<=mob_ion_geomeans[i]]
data[i+1] = y_block.mean(axis=1)
return data
def average_dp(y,h):
data = pd.DataFrame([])
for i in range(0,len(dp_par_geomeans)):
if i==0:
y_block = y.iloc[:,h<dp_par_geomeans[i]]
else:
y_block = y.iloc[:,((h<dp_par_geomeans[i]) & (h>=dp_par_geomeans[i-1]))]
data[i] = y_block.median(axis=1)
y_block = y.iloc[:,h>=dp_par_geomeans[i]]
data[i+1] = y_block.mean(axis=1)
return data
def find_diagnostic_names(diag_params):
sampleflow_name=None
temperature_name=None
pressure_name=None
for temp_name in possible_temperature_names:
if temp_name in diag_params:
temperature_name = temp_name
break
for pres_name in possible_pressure_names:
if pres_name in diag_params:
pressure_name = pres_name
break
# try single flow sensor
for flow_name in possible_sampleflow_names1:
if flow_name in diag_params:
sampleflow_name = flow_name
break
if sampleflow_name is None:
# try two flow sensors
sf_name = []
for flow_name in possible_sampleflow_names2:
if flow_name in diag_params:
sf_name.append(flow_name)
if len(sf_name)==2:
sampleflow_name=sf_name
return temperature_name, pressure_name, sampleflow_name
def process_data(df,mode):
if (df is None):
return None, None
elif not df.index.to_series().is_monotonic_increasing:
return None, None
else:
df_columns = df.columns
df_inverter_reso = int((len(df_columns)-2)/4)
neg_df = df.iloc[:,2:2+df_inverter_reso]
pos_df = df.iloc[:,2+2*df_inverter_reso:2+3*df_inverter_reso]
if mode=="ions":
mob_ion_inv = np.array([float(re.findall(r"[-+]?\d*\.\d+|\d+",y)[0])
for y in df_columns[2:2+df_inverter_reso]])
neg_df = average_mob(neg_df,mob_ion_inv)
pos_df = average_mob(pos_df,mob_ion_inv)
# Convert to number size distributions
neg_df = neg_df * dlogmob_ion / dlogdp_ion
pos_df = pos_df * dlogmob_ion / dlogdp_ion
if mode=="particles":
dp_par_inv = 2.0*np.array([float(re.findall(r"[-+]?\d*\.\d+|\d+",y)[0])
for y in df_columns[2:2+df_inverter_reso]])
neg_df = average_dp(neg_df,dp_par_inv)
pos_df = average_dp(pos_df,dp_par_inv)
# Construct the headers
if mode=="ions":
df_header = dp_ion*1e-9
if mode=="particles":
df_header = dp_par*1e-9
negdf = pd.DataFrame(columns=df_header, index=df.index, data=neg_df.values)
posdf = pd.DataFrame(columns=df_header, index=df.index, data=pos_df.values)
negdf.index.name = "Time"
posdf.index.name= "Time"
if negdf.isna().all().all():
negdf = None
if posdf.isna().all().all():
posdf = None
return negdf, posdf
def correct_data(
df,
rec,
mode,
do_sealevel_corr,
pipe_length):
if ((rec is None) or (df is None)):
return None
else:
# Extract the records that match the mode
if mode=="ions":
df_rec = rec[rec.opmode=='ions']
if mode=="particles":
df_rec = rec[rec.opmode=='particles']
if not df_rec.index.to_series().is_monotonic_increasing:
return None
df_rec = df_rec.reindex(df.index,method="nearest")
# Check that the relevant diagnostic data is found
t_name,p_name,sf_name = find_diagnostic_names(list(df_rec))
if ((t_name is not None) &
(p_name is not None) &
(sf_name is not None)):
pass
else:
return None
# Temperature
t_df = 273.15 + pd.DataFrame(df_rec[t_name].astype(float))
# Pressure
p_df = 100.0 * pd.DataFrame(df_rec[p_name].astype(float))
# Sampleflow
if len(sf_name)==2:
flow_df = pd.DataFrame(df_rec[sf_name].sum(axis=1,min_count=2).astype(float))
else:
flow_df = pd.DataFrame(df_rec[sf_name].astype(float))
# Test if the sampleflow is in cm3/s (old models) or
# l/min and if necessary convert to l/min
if (np.nanmedian(flow_df)>300):
flow_df = (flow_df/1000.0) * 60.0
else:
pass
# If all parameters are NaN e.g. sensor is broken
if (flow_df.isna().all().all() |
p_df.isna().all().all() |
t_df.isna().all().all()):
return None
# Sanity check the values
t_df = t_df.where(((t_df>=223.)|(t_df<=353.)),np.nan)
p_df = p_df.where(((p_df>=37000.)|(p_df<=121000.)),np.nan)
flow_df = flow_df.where(((flow_df>=48.)|(flow_df<=60.)),np.nan)
# Correct the number concentrations to standard conditions
if (do_sealevel_corr):
stp_corr_df = (pres_ref*t_df.values)/(temp_ref*p_df.values)
df = stp_corr_df * df
# Diffusion loss correction
if mode=="ions":
throughput = tubeloss(dp_ion*1e-9,flow_df.values*1.667e-5,pipe_length,t_df.values,p_df.values)
if mode=="particles":
throughput = tubeloss(dp_par*1e-9,flow_df.values*1.667e-5,pipe_length,t_df.values,p_df.values)
df = df / throughput
# Robert Wagner's calibration (only ions)
if mode=="ions":
roberts_corr = 0.713*dp_ion**0.120
df = df / roberts_corr
return df
def clean_data(
df,
rec,
mode,
pol,
remove_corona_ions,
remove_electrometer_noise,
inverter_name): # Only needed if removing electrometer noise
if ((df is None) or (rec is None)):
return None
if remove_corona_ions:
# Only consider likely limit range
lower = 1.5e-9
upper = 5.0e-9
c = (lower <= df.columns.values) & (upper >= df.columns.values)
df2 = df.loc[:, c]
# Find maximum difference between size bin medians
corona_lim = df2.columns.values[df2.median().diff().abs().argmax()]
# Set values below corona ion limit to NaNs
df.iloc[:,df.columns.values<=corona_lim]=np.nan
if remove_electrometer_noise:
if inverter_name =="hires_25":
if mode=="ions":
if pol=="neg":
elm2dp = ions_neg_v141_hrnd_elm25_chv
if pol=="pos":
elm2dp = ions_pos_v141_hrnd_elm25_chv
if mode=="particles":
if pol=="neg":
elm2dp = particles_neg_v14_hrnd_elm25_chv
if pol=="pos":
elm2dp = particles_pos_v14_hrnd_elm25_chv
elif inverter_name == "lores_25":
if mode=="ions":
if pol=="neg":
elm2dp = ions_neg_v141_lrnd_elm25_chv
if pol=="pos":
elm2dp = ions_pos_v141_lrnd_elm25_chv
if mode=="particles":
if pol=="neg":
elm2dp = particles_neg_v14_lrnd_elm25_chv
if pol=="pos":
elm2dp = particles_pos_v14_lrnd_elm25_chv
elif inverter_name == "lores_21":
if mode=="ions":
if pol=="neg":
elm2dp = ions_neg_v14_lrnd
if pol=="pos":
elm2dp = ions_pos_v14_lrnd
if mode=="particles":
if pol=="neg":
elm2dp = particles_neg_v14_lrnd
if pol=="pos":
elm2dp = particles_pos_v14_lrnd
else:
return df
# Extract the records that match the mode
if mode=="ions":
df_rec = rec[rec.opmode=='ions']
if mode=="particles":
df_rec = rec[rec.opmode=='particles']
df_rec = df_rec.reindex(df.index,method="nearest")
elm2dp = {int(k):v for k,v in elm2dp.items()}
number_of_elms = len(elm2dp)
# Rolling time windows
reso_in_seconds = (df.index[1]-df.index[0]).seconds
small_window = int((10.*60.)/(reso_in_seconds)) # 10 minutes
medium_window = int((4.*60.*60.)/(reso_in_seconds)) # 6 hours
large_window = int((12.*60.*60.)/(reso_in_seconds)) # 12 hours
# NOISE LEVEL FROM THE RECORDS
if pol == "neg":
df_std = df_rec.iloc[:,2+2*number_of_elms:2+3*number_of_elms]
if pol == "pos":
df_std = df_rec.iloc[:,2+3*number_of_elms:2+4*number_of_elms]
else:
return None
# Set index to electrometer number
elm_header = np.arange(0,number_of_elms).astype(int)
df_std.columns = elm_header
# Calculate noise level at each diameter
df_std2 = df.copy()
for d in df.columns.values:
elms = []
for elm in df_std.columns.values:
if ((d >= elm2dp[elm][0]) & (d <= elm2dp[elm][1])):
elms.append(elm)
df_std2[d] = df_std[elms].mean(axis=1).values
# Apply medium window to get rid of small fluctuations in electrometer noise
df_std2 = df_std2.rolling(medium_window, min_periods=int((medium_window+1.)/2.), center=True).median()
# Get the median noise
median_std2 = np.nanmedian(df_std2)
# Then find where the noise is more than N times median
N = 500
df_std3 = df_std2.where((df_std2>N*median_std2), np.nan)
# NOISE LEVEL FROM THE INVERTED DATA
# Calculate standard deviation in 10 min segments
df2 = df.rolling(small_window, min_periods=int((small_window+1.)/2.), center=True).std()
# In a bigger window (12 hours) calculate the 75th quantile of the standard deviations
# (semi)continuous noise causes higher values compared to normal and rare sudden changes in conc
df2 = df2.rolling(large_window, min_periods=int((large_window+1.)/2.), center=True).quantile(0.75)
# find where the noise is more than M times the median
M = 7
threshold = M*np.nanmedian(df2)
df3 = df2.where(df2 > threshold, np.nan)
# REMOVE DATA FROM WHERE THE ELECTROMETER NOISE AND THE INVERTED DATA NOISE AGREE
df = df[df3.isna() & df_std3.isna()]
return df
def nais_processor(config_file):
""" Processes NAIS data
Parameters
----------
config_file : str
full path to configuration file
"""
with open(config_file,'r') as stream:
config = yaml.safe_load(stream)
load_path = config['data_folder']
save_path = config['processed_folder']
start_date = config['start_date']
database = config['database_file']
location = config['location']
end_date = config['end_date']
allow_reprocess = config["allow_reprocess"]
pipelength = config['inlet_length']
sealevel_correction = config['sealevel_correction']
apply_corrections = config['apply_corrections']
apply_cleaning=config["apply_cleaning"]
remove_noisy_electrometers = config["remove_noisy_electrometers"]
remove_corona_ions = config["remove_corona_ions"]
inverter_name = config["inverter_name"]
db = TinyDB(database)
check = Query()
assert isinstance(start_date,date)
assert (end_date=='' or isinstance(end_date,date))
assert os.path.exists(save_path)
assert all([os.path.exists(x) for x in load_path])
assert isinstance(allow_reprocess,bool)
assert isinstance(remove_corona_ions,bool)
assert isinstance(remove_noisy_electrometers,bool)
assert isinstance(sealevel_correction,bool)
assert isinstance(apply_cleaning,bool)
assert isinstance(apply_corrections,bool)
assert ((inverter_name=="hires_25") | (inverter_name=="lores_25") | (inverter_name=="lores_21") | (inverter_name==''))
assert (isinstance(pipelength,(float, int)) & (not isinstance(pipelength,bool)))
end_date = date.today() if end_date=='' else end_date
db = TinyDB(database)
check = Query()
start_dt=pd.to_datetime(start_date)
end_dt=pd.to_datetime(end_date)
start_date_str = start_dt.strftime("%Y%m%d")
end_date_str = end_dt.strftime("%Y%m%d")
# list existing dates based on if diagnostic file was found
list_of_existing_dates = [x["timestamp"] for x in db.search(check.diagnostics.exists())]
if len(list_of_existing_dates)==0:
print("building database...")
list_of_datetimes = pd.date_range(start=start_date_str, end=end_date_str)
else:
last_existing_date = sorted(list_of_existing_dates)[-1]
list_of_datetimes = pd.date_range(start=last_existing_date, end=end_date_str)
# Add unprocessed datafiles to the database
for x in list_of_datetimes:
if (x.strftime("%Y%m%d") in list_of_existing_dates):
continue
else:
files_found=False
for z in load_path:
for y in filename_formats:
ion_fn = os.path.join(z,x.strftime(y[0]))
particle_fn = os.path.join(z,x.strftime(y[1]))
diagnostic_fn = os.path.join(z,x.strftime(y[2]))
if ( (os.path.exists(ion_fn) | # ions
os.path.exists(particle_fn)) & # particles
os.path.exists(diagnostic_fn) # diagnostics
):
dtstr = x.strftime("%Y%m%d")
db.insert(
{"timestamp":dtstr,
"diagnostics":diagnostic_fn}
)
if os.path.exists(ion_fn):
db.update(
{"ions":ion_fn},
check.timestamp==dtstr)
if os.path.exists(particle_fn):
db.update(
{"particles":particle_fn},
check.timestamp==dtstr)
files_found=True
break
if files_found:
break
# From the database find the last day with processed data
processed_days = db.search(
check.processed_neg_ion_file.exists() |
check.processed_pos_ion_file.exists() |
check.processed_neg_particle_file.exists() |
check.processed_pos_particle_file.exists())
if len(processed_days)!=0:
last_day=np.max([datetime.strptime(x["timestamp"],"%Y%m%d") for x in processed_days]).strftime("%Y%m%d")
else:
last_day=None
if allow_reprocess:
iterator1 = iter(db.search(
(check.diagnostics.exists() &
(check.ions.exists() |
check.particles.exists()) &
(check.timestamp>=start_date_str) &
(check.timestamp<=end_date_str))))
else:
iterator1 = iter(db.search(
((check.timestamp==last_day) &
(check.timestamp>=start_date_str) &
(check.timestamp<=end_date_str)) |
(check.diagnostics.exists() &
(check.ions.exists() |
check.particles.exists()) &
~check.processed_neg_ion_file.exists() &
~check.processed_pos_ion_file.exists() &
~check.processed_neg_particle_file.exists() &
~check.processed_pos_particle_file.exists() &
(check.timestamp>=start_date_str) &
(check.timestamp<=end_date_str))))
for x in iterator1:
print("processing %s (%s)" % (x["timestamp"],location))
ions_exist=bool(db.search(
check.ions.exists() &
(check.timestamp==x["timestamp"])))
particles_exist=bool(db.search(
check.particles.exists() &
(check.timestamp==x["timestamp"])))
records = read_file(x["diagnostics"])
# ions
if ions_exist:
ions = read_file(x["ions"])
negion_datamatrix,posion_datamatrix = process_data(ions,"ions")
if apply_corrections:
negion_datamatrix = correct_data(
negion_datamatrix,
records,
"ions",
sealevel_correction,
pipelength)
posion_datamatrix = correct_data(
posion_datamatrix,
records,
"ions",
sealevel_correction,
pipelength)
if apply_cleaning:
negion_datamatrix = clean_data(
negion_datamatrix,
records,
"ions",
"neg",
False,
remove_noisy_electrometers,
inverter_name)
posion_datamatrix = clean_data(
posion_datamatrix,
records,
"ions",
"pos",
False,
remove_noisy_electrometers,
inverter_name)
if (negion_datamatrix is not None):
my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"nds.sum")
negion_datamatrix.to_csv(my_save_path_neg)
db.update({"processed_neg_ion_file": my_save_path_neg},
check.timestamp==x["timestamp"])
if (posion_datamatrix is not None):
my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"nds.sum")
posion_datamatrix.to_csv(my_save_path_pos)
db.update({"processed_pos_ion_file": my_save_path_pos},
check.timestamp==x["timestamp"])
# particles
if particles_exist:
particles = read_file(x["particles"])
negpar_datamatrix,pospar_datamatrix = process_data(particles,"particles")
if apply_corrections:
negpar_datamatrix = correct_data(
negpar_datamatrix,
records,
"particles",
sealevel_correction,
pipelength)
pospar_datamatrix = correct_data(
pospar_datamatrix,
records,
"particles",
sealevel_correction,
pipelength)
if apply_cleaning:
negpar_datamatrix = clean_data(
negion_datamatrix,
records,
"particles",
"neg",
remove_corona_ions,
remove_noisy_electrometers,
inverter_name)
pospar_datamatrix = clean_data(
pospar_datamatrix,
records,
"particles",
"pos",
remove_corona_ions,
remove_noisy_electrometers,
inverter_name)
if (negpar_datamatrix is not None):
my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"np.sum")
negpar_datamatrix.to_csv(my_save_path_neg)
db.update({"processed_neg_particle_file": my_save_path_neg},
check.timestamp==x["timestamp"])
if (pospar_datamatrix is not None):
my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"np.sum")
pospar_datamatrix.to_csv(my_save_path_pos)
db.update({"processed_pos_particle_file": my_save_path_pos},
check.timestamp==x["timestamp"])
print("Done!")
def combine_databases(database_list, combined_database):
"""Combine JSON databases
If the measurement setup changes one may have to use multiple configuration files
which results in multiple databases. With this function you can combine the databases
into a single database after processing.
Parameters
----------
database_list : str
List of full paths to databases that should be combined
First database should have the earliest data, second database
the second earliest and so on
combined_database : str
full path to combined database
"""
DB = {}
i = 0
for database in database_list:
fid=open(database)
database_json=json.load(fid)
for key in database_json["_default"]:
DB[i] = database_json["_default"][key]
i=i+1
with open(combined_database, "w") as f:
json.dump({"_default":DB},f)
def combine_spectra(
database_file,
begin_time,
end_time,
spectrum_type="negion",
reso=60):
"""
Combine processed particle or ion data from some time range
Parameters
----------
database_file : str
full path to database_file
begin_time : str
time zone aware iso formatted time string
For example `"2013-01-02 15:00:00+02:00"`
end_time : str
time zone aware iso formatted time string
For example `"2013-01-03 17:00:00+02:00"`
spectrum_type : str
negative ions `negion` (default)
positive ions `posion`
negative particles `negpar`
positive particles `pospar`
reso : int
desired resolution given in minutes
Returns
-------
pandas.DataFrame
Combined aerosol number size distribution in the given
time interval
"""
db = TinyDB(database_file)
check = Query()
begin_dt=pd.to_datetime(begin_time)
end_dt=pd.to_datetime(end_time)
begin_date=begin_dt.strftime("%Y%m%d")
end_date=end_dt.strftime("%Y%m%d")
assert spectrum_type in ["posion","pospar","negpar","negion"],\
"%s is not valid 'spectrum_type'" % spectrum_type
if spectrum_type=="negpar":
iterator = iter(db.search(
(check.processed_neg_particle_file.exists()) &
(check.timestamp>=begin_date) &
(check.timestamp<=end_date)))
db_entry = "processed_neg_particle_file"
elif spectrum_type=="pospar":
iterator = iter(db.search(
(check.processed_pos_particle_file.exists()) &
(check.timestamp>=begin_date) &
(check.timestamp<=end_date)))
db_entry = "processed_pos_particle_file"
elif spectrum_type=="negion":
iterator = iter(db.search(
(check.processed_neg_ion_file.exists()) &
(check.timestamp>=begin_date) &
(check.timestamp<=end_date)))
db_entry = "processed_neg_ion_file"
else:
iterator = iter(db.search(
(check.processed_pos_ion_file.exists()) &
(check.timestamp>=begin_date) &
(check.timestamp<=end_date)))
db_entry = "processed_pos_ion_file"
filenames = [x[db_entry] for x in iterator]
df = af.stack_data(filenames, begin_time, end_time, reso)
return df
Functions
def combine_databases(database_list, combined_database)
-
Combine JSON databases
If the measurement setup changes one may have to use multiple configuration files which results in multiple databases. With this function you can combine the databases into a single database after processing.
Parameters
database_list
:str
-
List of full paths to databases that should be combined
First database should have the earliest data, second database the second earliest and so on
combined_database
:str
- full path to combined database
Expand source code
def combine_databases(database_list, combined_database): """Combine JSON databases If the measurement setup changes one may have to use multiple configuration files which results in multiple databases. With this function you can combine the databases into a single database after processing. Parameters ---------- database_list : str List of full paths to databases that should be combined First database should have the earliest data, second database the second earliest and so on combined_database : str full path to combined database """ DB = {} i = 0 for database in database_list: fid=open(database) database_json=json.load(fid) for key in database_json["_default"]: DB[i] = database_json["_default"][key] i=i+1 with open(combined_database, "w") as f: json.dump({"_default":DB},f)
def combine_spectra(database_file, begin_time, end_time, spectrum_type='negion', reso=60)
-
Combine processed particle or ion data from some time range
Parameters
database_file
:str
- full path to database_file
begin_time
:str
-
time zone aware iso formatted time string
For example
"2013-01-02 15:00:00+02:00"
end_time
:str
-
time zone aware iso formatted time string
For example
"2013-01-03 17:00:00+02:00"
spectrum_type
:str
-
negative ions
negion
(default)positive ions
posion
negative particles
negpar
positive particles
pospar
reso
:int
- desired resolution given in minutes
Returns
pandas.DataFrame
- Combined aerosol number size distribution in the given time interval
Expand source code
def combine_spectra( database_file, begin_time, end_time, spectrum_type="negion", reso=60): """ Combine processed particle or ion data from some time range Parameters ---------- database_file : str full path to database_file begin_time : str time zone aware iso formatted time string For example `"2013-01-02 15:00:00+02:00"` end_time : str time zone aware iso formatted time string For example `"2013-01-03 17:00:00+02:00"` spectrum_type : str negative ions `negion` (default) positive ions `posion` negative particles `negpar` positive particles `pospar` reso : int desired resolution given in minutes Returns ------- pandas.DataFrame Combined aerosol number size distribution in the given time interval """ db = TinyDB(database_file) check = Query() begin_dt=pd.to_datetime(begin_time) end_dt=pd.to_datetime(end_time) begin_date=begin_dt.strftime("%Y%m%d") end_date=end_dt.strftime("%Y%m%d") assert spectrum_type in ["posion","pospar","negpar","negion"],\ "%s is not valid 'spectrum_type'" % spectrum_type if spectrum_type=="negpar": iterator = iter(db.search( (check.processed_neg_particle_file.exists()) & (check.timestamp>=begin_date) & (check.timestamp<=end_date))) db_entry = "processed_neg_particle_file" elif spectrum_type=="pospar": iterator = iter(db.search( (check.processed_pos_particle_file.exists()) & (check.timestamp>=begin_date) & (check.timestamp<=end_date))) db_entry = "processed_pos_particle_file" elif spectrum_type=="negion": iterator = iter(db.search( (check.processed_neg_ion_file.exists()) & (check.timestamp>=begin_date) & (check.timestamp<=end_date))) db_entry = "processed_neg_ion_file" else: iterator = iter(db.search( (check.processed_pos_ion_file.exists()) & (check.timestamp>=begin_date) & (check.timestamp<=end_date))) db_entry = "processed_pos_ion_file" filenames = [x[db_entry] for x in iterator] df = af.stack_data(filenames, begin_time, end_time, reso) return df
def make_config_template(fn)
-
Make a configuration file template
Parameters
fn
:str
-
full path to configuration file
For example
/home/user/config.yml
Expand source code
def make_config_template(fn): """ Make a configuration file template Parameters ---------- fn : str full path to configuration file For example `/home/user/config.yml` """ with open(fn,"w") as f: f.write("location: # Name of the measurement site\n") f.write("data_folder: # Full paths to raw data folders\n") f.write("- # Data folder 1\n") f.write("- # Data folder 2, and so on...\n") f.write("processed_folder: # Full path to folder where procesed data is saved\n") f.write("database_file: # Full path to database file (will be created on first run) \n") f.write("start_date: # Format: yyyy-mm-dd\n") f.write("end_date: # Format: yyyy-mm-dd or '' for current day\n") f.write("apply_corrections: # true or false\n") f.write("inlet_length: # length of inlet in meters\n") f.write("sealevel_correction: # true or false\n") f.write("apply_cleaning: # true or false\n") f.write("remove_corona_ions: # true or false\n") f.write("remove_noisy_electrometers: # true or false\n") f.write("inverter_name: # hires_25, lores_25, lores_21 or '' (needed for noise removal, '' if noise not removed)\n") f.write("allow_reprocess: # true or false")
def nais_processor(config_file)
-
Processes NAIS data
Parameters
config_file
:str
- full path to configuration file
Expand source code
def nais_processor(config_file): """ Processes NAIS data Parameters ---------- config_file : str full path to configuration file """ with open(config_file,'r') as stream: config = yaml.safe_load(stream) load_path = config['data_folder'] save_path = config['processed_folder'] start_date = config['start_date'] database = config['database_file'] location = config['location'] end_date = config['end_date'] allow_reprocess = config["allow_reprocess"] pipelength = config['inlet_length'] sealevel_correction = config['sealevel_correction'] apply_corrections = config['apply_corrections'] apply_cleaning=config["apply_cleaning"] remove_noisy_electrometers = config["remove_noisy_electrometers"] remove_corona_ions = config["remove_corona_ions"] inverter_name = config["inverter_name"] db = TinyDB(database) check = Query() assert isinstance(start_date,date) assert (end_date=='' or isinstance(end_date,date)) assert os.path.exists(save_path) assert all([os.path.exists(x) for x in load_path]) assert isinstance(allow_reprocess,bool) assert isinstance(remove_corona_ions,bool) assert isinstance(remove_noisy_electrometers,bool) assert isinstance(sealevel_correction,bool) assert isinstance(apply_cleaning,bool) assert isinstance(apply_corrections,bool) assert ((inverter_name=="hires_25") | (inverter_name=="lores_25") | (inverter_name=="lores_21") | (inverter_name=='')) assert (isinstance(pipelength,(float, int)) & (not isinstance(pipelength,bool))) end_date = date.today() if end_date=='' else end_date db = TinyDB(database) check = Query() start_dt=pd.to_datetime(start_date) end_dt=pd.to_datetime(end_date) start_date_str = start_dt.strftime("%Y%m%d") end_date_str = end_dt.strftime("%Y%m%d") # list existing dates based on if diagnostic file was found list_of_existing_dates = [x["timestamp"] for x in db.search(check.diagnostics.exists())] if len(list_of_existing_dates)==0: print("building database...") list_of_datetimes = pd.date_range(start=start_date_str, end=end_date_str) else: last_existing_date = sorted(list_of_existing_dates)[-1] list_of_datetimes = pd.date_range(start=last_existing_date, end=end_date_str) # Add unprocessed datafiles to the database for x in list_of_datetimes: if (x.strftime("%Y%m%d") in list_of_existing_dates): continue else: files_found=False for z in load_path: for y in filename_formats: ion_fn = os.path.join(z,x.strftime(y[0])) particle_fn = os.path.join(z,x.strftime(y[1])) diagnostic_fn = os.path.join(z,x.strftime(y[2])) if ( (os.path.exists(ion_fn) | # ions os.path.exists(particle_fn)) & # particles os.path.exists(diagnostic_fn) # diagnostics ): dtstr = x.strftime("%Y%m%d") db.insert( {"timestamp":dtstr, "diagnostics":diagnostic_fn} ) if os.path.exists(ion_fn): db.update( {"ions":ion_fn}, check.timestamp==dtstr) if os.path.exists(particle_fn): db.update( {"particles":particle_fn}, check.timestamp==dtstr) files_found=True break if files_found: break # From the database find the last day with processed data processed_days = db.search( check.processed_neg_ion_file.exists() | check.processed_pos_ion_file.exists() | check.processed_neg_particle_file.exists() | check.processed_pos_particle_file.exists()) if len(processed_days)!=0: last_day=np.max([datetime.strptime(x["timestamp"],"%Y%m%d") for x in processed_days]).strftime("%Y%m%d") else: last_day=None if allow_reprocess: iterator1 = iter(db.search( (check.diagnostics.exists() & (check.ions.exists() | check.particles.exists()) & (check.timestamp>=start_date_str) & (check.timestamp<=end_date_str)))) else: iterator1 = iter(db.search( ((check.timestamp==last_day) & (check.timestamp>=start_date_str) & (check.timestamp<=end_date_str)) | (check.diagnostics.exists() & (check.ions.exists() | check.particles.exists()) & ~check.processed_neg_ion_file.exists() & ~check.processed_pos_ion_file.exists() & ~check.processed_neg_particle_file.exists() & ~check.processed_pos_particle_file.exists() & (check.timestamp>=start_date_str) & (check.timestamp<=end_date_str)))) for x in iterator1: print("processing %s (%s)" % (x["timestamp"],location)) ions_exist=bool(db.search( check.ions.exists() & (check.timestamp==x["timestamp"]))) particles_exist=bool(db.search( check.particles.exists() & (check.timestamp==x["timestamp"]))) records = read_file(x["diagnostics"]) # ions if ions_exist: ions = read_file(x["ions"]) negion_datamatrix,posion_datamatrix = process_data(ions,"ions") if apply_corrections: negion_datamatrix = correct_data( negion_datamatrix, records, "ions", sealevel_correction, pipelength) posion_datamatrix = correct_data( posion_datamatrix, records, "ions", sealevel_correction, pipelength) if apply_cleaning: negion_datamatrix = clean_data( negion_datamatrix, records, "ions", "neg", False, remove_noisy_electrometers, inverter_name) posion_datamatrix = clean_data( posion_datamatrix, records, "ions", "pos", False, remove_noisy_electrometers, inverter_name) if (negion_datamatrix is not None): my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"nds.sum") negion_datamatrix.to_csv(my_save_path_neg) db.update({"processed_neg_ion_file": my_save_path_neg}, check.timestamp==x["timestamp"]) if (posion_datamatrix is not None): my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"nds.sum") posion_datamatrix.to_csv(my_save_path_pos) db.update({"processed_pos_ion_file": my_save_path_pos}, check.timestamp==x["timestamp"]) # particles if particles_exist: particles = read_file(x["particles"]) negpar_datamatrix,pospar_datamatrix = process_data(particles,"particles") if apply_corrections: negpar_datamatrix = correct_data( negpar_datamatrix, records, "particles", sealevel_correction, pipelength) pospar_datamatrix = correct_data( pospar_datamatrix, records, "particles", sealevel_correction, pipelength) if apply_cleaning: negpar_datamatrix = clean_data( negion_datamatrix, records, "particles", "neg", remove_corona_ions, remove_noisy_electrometers, inverter_name) pospar_datamatrix = clean_data( pospar_datamatrix, records, "particles", "pos", remove_corona_ions, remove_noisy_electrometers, inverter_name) if (negpar_datamatrix is not None): my_save_path_neg=os.path.join(save_path,"NAISn"+x["timestamp"]+"np.sum") negpar_datamatrix.to_csv(my_save_path_neg) db.update({"processed_neg_particle_file": my_save_path_neg}, check.timestamp==x["timestamp"]) if (pospar_datamatrix is not None): my_save_path_pos=os.path.join(save_path,"NAISp"+x["timestamp"]+"np.sum") pospar_datamatrix.to_csv(my_save_path_pos) db.update({"processed_pos_particle_file": my_save_path_pos}, check.timestamp==x["timestamp"]) print("Done!")
def read_file(fn)
-
Read NAIS raw data file into a pandas.DataFrame
Parameters
fn
:str
- Raw data filename with path
Returns
pandas.DataFrame
- Contents of the file
Expand source code
def read_file(fn): """ Read NAIS raw data file into a pandas.DataFrame Parameters ---------- fn : str Raw data filename with path Returns ------- pandas.DataFrame Contents of the file """ with open(fn,'r') as f: header_found = False data_matrix=[] lines = f.read().splitlines() for line in lines: # Skip empty and comments if (len(line)==0): continue if (line[0]=='#'): continue # Test if it is a header elif (header_found==False): if "opmode" in line: delimiter = re.search('(.)opmode',line).group(1) header = line.split(delimiter) number_of_columns = len(header) header_found = True continue else: continue else: data_line = line.split(delimiter) if ((len(data_line)==number_of_columns) & ("opmode" not in data_line)): data_matrix.append(data_line) continue if len(data_matrix)==0: return None else: # Convert anything that can be converted to float and the rest is coerced to NaNs df = pd.DataFrame(columns = header, data = data_matrix) df.iloc[:,3:] = df.iloc[:,3:].apply(pd.to_numeric, errors='coerce').astype(float) # Establish begin_time (first column) as index df = df.set_index(df.columns[0]) df.index = pd.to_datetime(df.index) return df