diff --git a/activitysynth/notebooks/TOD_School_Category_Estimation.ipynb b/activitysynth/notebooks/TOD_School_Category_Estimation.ipynb new file mode 100644 index 0000000..2f2fab8 --- /dev/null +++ b/activitysynth/notebooks/TOD_School_Category_Estimation.ipynb @@ -0,0 +1,2158 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "from urbansim_templates import modelmanager as mm\n", + "from urbansim_templates.models import LargeMultinomialLogitStep\n", + "from urbansim_templates.models import SmallMultinomialLogitStep\n", + "import orca\n", + "import os; os.chdir('../')\n", + "import warnings; warnings.simplefilter('ignore')\n", + "\n", + "import pandas as pd\n", + "# import pandana as pdna\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from functools import reduce\n", + "\n", + "import scipy.stats as st\n", + "from scipy.stats import skewnorm\n", + "\n", + "# import matplotlib\n", + "# matplotlib.style.use('ggplot')\n", + "\n", + "%matplotlib inline\n", + "\n", + "pd.options.display.max_columns = 80" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "trips = pd.read_csv('/home/emma/ual_model_workspace/spring-2019-models/notebooks-emma/HStrips_031219.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0HHPERHHPERTRIPoriginorigin_dwellorigin_STorigin_ETtrip_ETTTMODE
00103205331.032053e+09home13.75000017.7500007.5000007.550.050000shared
11103205331.032053e+09school8.6166677.55000016.16666717.751.583333shared
22103205341.032053e+09home13.75000017.7500007.5000007.550.050000shared
33103205341.032053e+09school8.6166677.55000016.16666717.751.583333shared
44103205351.032054e+09home14.83333316.6666677.5000007.550.050000shared
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 HHPER HHPERTRIP origin origin_dwell origin_ST \\\n", + "0 0 10320533 1.032053e+09 home 13.750000 17.750000 \n", + "1 1 10320533 1.032053e+09 school 8.616667 7.550000 \n", + "2 2 10320534 1.032053e+09 home 13.750000 17.750000 \n", + "3 3 10320534 1.032053e+09 school 8.616667 7.550000 \n", + "4 4 10320535 1.032054e+09 home 14.833333 16.666667 \n", + "\n", + " origin_ET trip_ET TT MODE \n", + "0 7.500000 7.55 0.050000 shared \n", + "1 16.166667 17.75 1.583333 shared \n", + "2 7.500000 7.55 0.050000 shared \n", + "3 16.166667 17.75 1.583333 shared \n", + "4 7.500000 7.55 0.050000 shared " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare TOD and Dwell columns" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#select people who make both home-school and school-home trips:\n", + "tripsII = trips.groupby('HHPER').filter(lambda x: len(x) == 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#make sure all home-school trip rows are listed first\n", + "tripsIII = tripsII.sort_values(['HHPER','origin']).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#move school-home trip info up into home-school trip rows\n", + "\n", + "tripsIII['school_dwell'] = tripsIII.groupby('HHPER', group_keys=False).origin_dwell.shift(-1)\n", + "tripsIII['school_ST'] = tripsIII.groupby('HHPER', group_keys=False).origin_ST.shift(-1)\n", + "tripsIII['SH_trip_ST'] = tripsIII.groupby('HHPER', group_keys=False).origin_ET.shift(-1)\n", + "tripsIII['SH_trip_ET'] = tripsIII.groupby('HHPER', group_keys=False).trip_ET.shift(-1)\n", + "tripsIII['SH_TT'] = tripsIII.groupby('HHPER', group_keys=False).TT.shift(-1)\n", + "tripsIII['SH_mode'] = tripsIII.groupby('HHPER', group_keys=False).MODE.shift(-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII = tripsIII.groupby('HHPER').first().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII.rename(columns = {'origin_dwell':'home_dwell','origin_ST':'home_ST','origin_ET':'HS_trip_ST',\n", + " 'trip_ET':'HS_trip_ET','TT':'HS_TT','MODE':'HS_mode','TOD':'HS_TOD'},inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['HS_TOD1'] = (\n", + " ((tripsIII.HS_trip_ET.between(3,7.75,inclusive = False)) | (tripsIII.HS_trip_ET==3))*1 +\n", + " ((tripsIII.HS_trip_ET.between(7.75,8.5,inclusive = True)))*2 +\n", + " ((tripsIII.HS_trip_ET.between(8.5,9.5,inclusive = False)) | (tripsIII.HS_trip_ET==9.5))*3 +\n", + " ((tripsIII.HS_trip_ET.between(9.5,15.0,inclusive = False)) | (tripsIII.HS_trip_ET==15.0))*4 +\n", + " ((tripsIII.HS_trip_ET>15.0))*5 +\n", + " ((tripsIII.HS_trip_ET.between(0,3,inclusive = False)) | (tripsIII.HS_trip_ET==0))*5\n", + ")\n", + "\n", + "tripsIII['HS_TOD1'] = tripsIII['HS_TOD1'] - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['Sdwell'] = (\n", + " ((tripsIII.school_dwell.between(0,3.5,inclusive = False)) | (tripsIII.school_dwell==0))*1 +\n", + " ((tripsIII.school_dwell.between(3.5,6,inclusive = False)) | (tripsIII.school_dwell==3.5))*2 +\n", + " ((tripsIII.school_dwell.between(6,8,inclusive = True)))*3 +\n", + " ((tripsIII.school_dwell.between(8,10,inclusive = False)) | (tripsIII.school_dwell==10))*4 +\n", + " ((tripsIII.school_dwell>10))*5\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['Sdwell'] = pd.to_numeric(tripsIII['Sdwell'])\n", + "tripsIII['HS_TOD1'] = pd.to_numeric(tripsIII['HS_TOD1'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add the demographic variables" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNPERNORELATGENDAGEAGEBHISPRACE1RACE2RACE3RACE4O_RACENTVTYCNTRYLICUSERTRANSTPTYP1TPTYP2TPTYP3TPTYP4TPTYP5TPTYP6TPTYP7O_TPTYPCLIP1CLIP2CLIP3COMPMETPASSTLFLEXEMPLYWKSTATO_WKSTATJOBSWLOCWNAMEWCITYWSTAT...HVLOGPTRIPSTOLLFTOLLR1TOLLR2TOLLR3TOLLR4TOLLR5TOLLR6TOLLR7TOLLR8TOLLR9TOLLR10TOLLB1TOLLB2TOLLB3TOLLB4TOLLB5TOLLB6TOLLB7TOLLB8TOLLB9TOLLB10HOVLNOGOWHYNOGOWHY_OInCompleteMoto_tripWCTFIPWTRACTSCTFIPSTRACTWPrimaryCityWSTFIPW2PrimaryCityW2STFIPSPrimaryCitySSTFIPPERWGTEXPPERWGT
0103198511174NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.01.0NaNNaNNaNNaNNaNNaN...1.02.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.05208617.647568
1103198522273NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.01.0NaNNaNNaNNaNNaNNaN...1.02.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.05208617.647568
2103203611146NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.01.0NaNNaN1.01.0HIDDENSAN DIEGOCA...NaN5.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.073.017032.0NaNNaNSAN DIEGO6.0NaNNaNNaNNaN1.223974414.701494
3103203622247NaN21.097.0NaNNaNMULTI-RACIAL1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.03.0NaNNaNNaNNaNNaNNaN...NaN18.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.863473292.558373
4103203633115NaN21.097.0NaNNaNMULTI-RACIAL1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaN4.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.0NaNNaN73.017030.0NaNNaNNaNNaNSAN DIEGO6.00.941412318.965100
\n", + "

5 rows × 148 columns

\n", + "
" + ], + "text/plain": [ + " SAMPN PERNO RELAT GEND AGE AGEB HISP RACE1 RACE2 RACE3 RACE4 \\\n", + "0 1031985 1 1 1 74 NaN 2 1.0 NaN NaN NaN \n", + "1 1031985 2 2 2 73 NaN 2 1.0 NaN NaN NaN \n", + "2 1032036 1 1 1 46 NaN 2 1.0 NaN NaN NaN \n", + "3 1032036 2 2 2 47 NaN 2 1.0 97.0 NaN NaN \n", + "4 1032036 3 3 1 15 NaN 2 1.0 97.0 NaN NaN \n", + "\n", + " O_RACE NTVTY CNTRY LIC USER TRANS TPTYP1 TPTYP2 TPTYP3 \\\n", + "0 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "1 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "2 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "3 MULTI-RACIAL 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "4 MULTI-RACIAL 1 NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " TPTYP4 TPTYP5 TPTYP6 TPTYP7 O_TPTYP CLIP1 CLIP2 CLIP3 COMP MET \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " PASSTL FLEX EMPLY WKSTAT O_WKSTAT JOBS WLOC WNAME WCITY WSTAT \\\n", + "0 2.0 2.0 2.0 1.0 NaN NaN NaN NaN NaN NaN \n", + "1 2.0 2.0 2.0 1.0 NaN NaN NaN NaN NaN NaN \n", + "2 2.0 2.0 1.0 NaN NaN 1.0 1.0 HIDDEN SAN DIEGO CA \n", + "3 2.0 2.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " ... HVLOG PTRIPS TOLLF TOLLR1 TOLLR2 TOLLR3 TOLLR4 TOLLR5 TOLLR6 \\\n", + "0 ... 1.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "1 ... 1.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "2 ... NaN 5.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "3 ... NaN 18.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "4 ... NaN 4.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "\n", + " TOLLR7 TOLLR8 TOLLR9 TOLLR10 TOLLB1 TOLLB2 TOLLB3 TOLLB4 TOLLB5 \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " TOLLB6 TOLLB7 TOLLB8 TOLLB9 TOLLB10 HOVL NOGOWHY NOGOWHY_O InComplete \\\n", + "0 NaN NaN NaN NaN NaN 2.0 NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN 2.0 NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "\n", + " Moto_trip WCTFIP WTRACT SCTFIP STRACT WPrimaryCity WSTFIP \\\n", + "0 1.0 NaN NaN NaN NaN NaN NaN \n", + "1 1.0 NaN NaN NaN NaN NaN NaN \n", + "2 1.0 73.0 17032.0 NaN NaN SAN DIEGO 6.0 \n", + "3 1.0 NaN NaN NaN NaN NaN NaN \n", + "4 1.0 NaN NaN 73.0 17030.0 NaN NaN \n", + "\n", + " W2PrimaryCity W2STFIP SPrimaryCity SSTFIP PERWGT EXPPERWGT \n", + "0 NaN NaN NaN NaN 0.052086 17.647568 \n", + "1 NaN NaN NaN NaN 0.052086 17.647568 \n", + "2 NaN NaN NaN NaN 1.223974 414.701494 \n", + "3 NaN NaN NaN NaN 0.863473 292.558373 \n", + "4 NaN NaN SAN DIEGO 6.0 0.941412 318.965100 \n", + "\n", + "[5 rows x 148 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "person = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_PER.csv')\n", + "\n", + "person.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "person = person[['SAMPN','PERNO','GEND','AGE','HISP','RACE1','RACE2','RACE3','RACE4',\n", + " 'HOURS','EDUCA']]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNPERNOGENDAGEHISPRACE1RACE2RACE3RACE4O_RACENTVTYLICJOBSHOURSEDUCAWSCHEDDISABINDUSOCCUPHHPERfemalewhiteblacknativeasianPIimmigrantnolic
01031985117401.0NaNNaNNaNNaN11.0NaNNaN6NaN0NaNNaN1031985101000000.0
11031985227301.0NaNNaNNaNNaN11.0NaNNaN6NaN0NaNNaN1031985211000000.0
21032036114601.0NaNNaNNaNNaN11.01.040.062.0054.015.01032036101000000.0
31032036224701.097.0NaNNaNMULTI-RACIAL11.0NaNNaN6NaN0NaNNaN1032036211000000.0
41032036311501.097.0NaNNaNMULTI-RACIAL1NaNNaNNaN1NaN0NaNNaN103203630100000NaN
\n", + "
" + ], + "text/plain": [ + " SAMPN PERNO GEND AGE HISP RACE1 RACE2 RACE3 RACE4 O_RACE \\\n", + "0 1031985 1 1 74 0 1.0 NaN NaN NaN NaN \n", + "1 1031985 2 2 73 0 1.0 NaN NaN NaN NaN \n", + "2 1032036 1 1 46 0 1.0 NaN NaN NaN NaN \n", + "3 1032036 2 2 47 0 1.0 97.0 NaN NaN MULTI-RACIAL \n", + "4 1032036 3 1 15 0 1.0 97.0 NaN NaN MULTI-RACIAL \n", + "\n", + " NTVTY LIC JOBS HOURS EDUCA WSCHED DISAB INDUS OCCUP HHPER \\\n", + "0 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10319851 \n", + "1 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10319852 \n", + "2 1 1.0 1.0 40.0 6 2.0 0 54.0 15.0 10320361 \n", + "3 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10320362 \n", + "4 1 NaN NaN NaN 1 NaN 0 NaN NaN 10320363 \n", + "\n", + " female white black native asian PI immigrant nolic \n", + "0 0 1 0 0 0 0 0 0.0 \n", + "1 1 1 0 0 0 0 0 0.0 \n", + "2 0 1 0 0 0 0 0 0.0 \n", + "3 1 1 0 0 0 0 0 0.0 \n", + "4 0 1 0 0 0 0 0 NaN " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "person['HHPER'] = person['SAMPN'].map(str) + person['PERNO'].map(str)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNHHVEHHHBICOWNINCOMHHSIZrent
01031985221320
11032036141750
21032053222261
31032425231720
41032558002111
\n", + "
" + ], + "text/plain": [ + " SAMPN HHVEH HHBIC OWN INCOM HHSIZ rent\n", + "0 1031985 2 2 1 3 2 0\n", + "1 1032036 1 4 1 7 5 0\n", + "2 1032053 2 2 2 2 6 1\n", + "3 1032425 2 3 1 7 2 0\n", + "4 1032558 0 0 2 1 1 1" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh_df = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_HH.csv')[\n", + " ['SAMPN','HHVEH','OWN','INCOM','HHSIZ']]\n", + "\n", + "hh_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNPERNOGENDAGEHISPRACE1RACE2RACE3RACE4O_RACENTVTYLICJOBSHOURSEDUCAWSCHEDDISABINDUSOCCUPHHPERfemalewhiteblacknativeasianPIimmigrantnolicHHVEHHHBICOWNINCOMHHSIZrent
01031985117401.0NaNNaNNaNNaN11.0NaNNaN6NaN0NaNNaN1031985101000000.0221320
11031985227301.0NaNNaNNaNNaN11.0NaNNaN6NaN0NaNNaN1031985211000000.0221320
21032036114601.0NaNNaNNaNNaN11.01.040.062.0054.015.01032036101000000.0141750
31032036224701.097.0NaNNaNMULTI-RACIAL11.0NaNNaN6NaN0NaNNaN1032036211000000.0141750
41032036311501.097.0NaNNaNMULTI-RACIAL1NaNNaNNaN1NaN0NaNNaN103203630100000NaN141750
\n", + "
" + ], + "text/plain": [ + " SAMPN PERNO GEND AGE HISP RACE1 RACE2 RACE3 RACE4 O_RACE \\\n", + "0 1031985 1 1 74 0 1.0 NaN NaN NaN NaN \n", + "1 1031985 2 2 73 0 1.0 NaN NaN NaN NaN \n", + "2 1032036 1 1 46 0 1.0 NaN NaN NaN NaN \n", + "3 1032036 2 2 47 0 1.0 97.0 NaN NaN MULTI-RACIAL \n", + "4 1032036 3 1 15 0 1.0 97.0 NaN NaN MULTI-RACIAL \n", + "\n", + " NTVTY LIC JOBS HOURS EDUCA WSCHED DISAB INDUS OCCUP HHPER \\\n", + "0 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10319851 \n", + "1 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10319852 \n", + "2 1 1.0 1.0 40.0 6 2.0 0 54.0 15.0 10320361 \n", + "3 1 1.0 NaN NaN 6 NaN 0 NaN NaN 10320362 \n", + "4 1 NaN NaN NaN 1 NaN 0 NaN NaN 10320363 \n", + "\n", + " female white black native asian PI immigrant nolic HHVEH HHBIC \\\n", + "0 0 1 0 0 0 0 0 0.0 2 2 \n", + "1 1 1 0 0 0 0 0 0.0 2 2 \n", + "2 0 1 0 0 0 0 0 0.0 1 4 \n", + "3 1 1 0 0 0 0 0 0.0 1 4 \n", + "4 0 1 0 0 0 0 0 NaN 1 4 \n", + "\n", + " OWN INCOM HHSIZ rent \n", + "0 1 3 2 0 \n", + "1 1 3 2 0 \n", + "2 1 7 5 0 \n", + "3 1 7 5 0 \n", + "4 1 7 5 0 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "demo = person.merge(hh_df,on = 'SAMPN',how = 'left')\n", + "\n", + "demo.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get rid of null values" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "na_dict = {\n", + " 'GEND':[9],\n", + " 'AGE':[998,999],\n", + " 'HOURS':[998,999],\n", + " 'EDUCA':[8,9],\n", + " 'HHVEH':[98,99],\n", + " 'OWN':[7,8,9],\n", + " 'INCOM':[98,99],\n", + " 'HHSIZ':[98,99]\n", + "}\n", + "\n", + "for col in na_dict:\n", + " for vals in na_dict[col]:\n", + " demo[col] = demo[col].replace(vals,np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "demo = demo.dropna(subset = ['GEND', 'AGE', 'HOURS', 'EDUCA','HHVEH','OWN','INCOM','HHSIZ'])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9857\n", + "9857\n", + "8979\n" + ] + } + ], + "source": [ + "tripsIII['HHPER'] = tripsIII['HHPER'].map(str)\n", + "\n", + "trips1 = pd.merge(tripsIII, demo, on='HHPER')\n", + "\n", + "print (len(tripsIII.index))\n", + "print (len(trips1.index))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare data for use in MNL estimation (make dummy columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['minority'] = np.where((trips1['HISP'].isin([1.0]) |\n", + " trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0]) |\n", + " trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0]) |\n", + " trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0]) |\n", + " trips1['RACE1'].isin([5.0]) | trips1['RACE2'].isin([5.0]) | trips1['RACE3'].isin([5.0]) | trips1['RACE4'].isin([5.0]) |\n", + " trips1['RACE1'].isin([97.0]) | trips1['RACE2'].isin([97.0]) | trips1['RACE3'].isin([97.0]) | trips1['RACE4'].isin([97.0])),1,0)\n", + "\n", + "trips1['black'] = np.where((trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0])),1,0)\n", + "trips1['native'] = np.where((trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0])),1,0)\n", + "trips1['asian'] = np.where((trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0])),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['hh_inc_less35k'] = np.where(trips1['INCOM'].isin([1.0,2.0,3.0]),1,0)\n", + "trips1['hh_inc_less50k'] = np.where(trips1['INCOM'].isin([1.0,2.0,3.0,4.0]),1,0)\n", + "trips1['hh_inc_150kplus'] = np.where(trips1['INCOM'].isin([8.0,9.0,10.0]),1,0)\n", + "trips1['hh_inc_150kless250k'] = np.where(trips1['INCOM'].isin([8.0,9.0]),1,0)\n", + "trips1['hh_inc_250kplus'] = np.where(trips1['INCOM'].isin([10.0]),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['lessGED'] = np.where(trips1['EDUCA'].isin([1.0]),1,0)\n", + "trips1['GEDsomeBach'] = np.where(trips1['EDUCA'].isin([2.0,3.0]),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['age_less5'] = np.where(((trips1.AGE.between(0,5,inclusive = False)) | (trips1.AGE==0)),1,0)\n", + "trips1['age_12less16'] = np.where(((trips1.AGE.between(12,16,inclusive = False)) | (trips1.AGE==12)),1,0)\n", + "trips1['age_16less19'] = np.where(((trips1.AGE.between(16,19,inclusive = False)) | (trips1.AGE==16)),1,0)\n", + "trips1['age_19less27'] = np.where(((trips1.AGE.between(19,27,inclusive = False)) | (trips1.AGE==19)),1,0)\n", + "trips1['age_27plus'] = np.where(((trips1.AGE.between(27,100,inclusive = False)) | (trips1.AGE==27)),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['female'] = trips1['GEND'] - 1\n", + "\n", + "trips1['tenure_2'] = trips1['OWN'] - 1\n", + "\n", + "trips1['noveh'] = np.where(trips1.HHVEH.isin([0.0]),1,0)\n", + "\n", + "trips1['hh_size_4plusper'] = np.where(trips1.HHSIZ.between(4,8,inclusive = True),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['TOD_3to745'] = np.where(trips1['HS_TOD1'].isin([0]),1,0)\n", + "trips1['TOD_830to930'] = np.where(trips1['HS_TOD1'].isin([2]),1,0)\n", + "trips1['TOD_930to1500'] = np.where(trips1['HS_TOD1'].isin([3]),1,0)\n", + "trips1['TOD_1500up'] = np.where(trips1['HS_TOD1'].isin([4]),1,0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate the model for Home-to-School Trip End Times" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "@orca.table(cache=True)\n", + "def tripsA():\n", + " return trips1" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [], + "source": [ + "m = SmallMultinomialLogitStep()\n", + "m.name = 'STOD_choice'\n", + "m.tables = ['tripsA']\n", + "m.choice_column = 'HS_TOD1'\n", + "m.model_expression = OrderedDict([\n", + " ('intercept', [1,2,3,4]),\n", + " \n", + " ('less5',[0,2,3,4]),\n", + "# ('5less12'),\n", + " ('12less16',[0]),\n", + " ('16less19',[0,3,4]),\n", + " ('19less27',[0,2,3,4]),\n", + " ('27plus',[2,3,4]),\n", + " \n", + " ('female',[0]),\n", + " \n", + " ('black',[3]),\n", + " ('native',[2,3]),\n", + " ('asian',[0,2,3]),\n", + " \n", + " ('less35k',[2]),\n", + " ('150kless250k',[0]),\n", + " ('250kplus',[0]),\n", + "\n", + " ('lessGED',[0,2,3,4]),\n", + " \n", + " ('noveh',[3]),\n", + "\n", + " ('4plusper',[3])\n", + " \n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log-likelihood at zero: -14,451.1430\n", + "Initial Log-likelihood: -14,451.1430\n", + "Estimation Time for Point Estimation: 0.81 seconds.\n", + "Final log-likelihood: -9,385.2482\n", + " Multinomial Logit Model Regression Results \n", + "===================================================================================\n", + "Dep. Variable: _chosen No. Observations: 8,979\n", + "Model: Multinomial Logit Model Df Residuals: 8,944\n", + "Method: MLE Df Model: 35\n", + "Date: Mon, 25 Mar 2019 Pseudo R-squ.: 0.351\n", + "Time: 16:32:43 Pseudo R-bar-squ.: 0.348\n", + "AIC: 18,840.496 Log-Likelihood: -9,385.248\n", + "BIC: 19,089.089 LL-Null: -14,451.143\n", + "==================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "----------------------------------------------------------------------------------\n", + "intercept_1 1.4351 0.174 8.231 0.000 1.093 1.777\n", + "intercept_2 0.6556 0.236 2.775 0.006 0.193 1.119\n", + "intercept_3 0.3501 0.268 1.306 0.191 -0.175 0.875\n", + "intercept_4 -2.4639 0.570 -4.323 0.000 -3.581 -1.347\n", + "less5_0 0.5197 0.125 4.163 0.000 0.275 0.764\n", + "less5_2 1.5944 0.115 13.857 0.000 1.369 1.820\n", + "less5_3 2.3554 0.177 13.275 0.000 2.008 2.703\n", + "less5_4 1.7310 0.820 2.112 0.035 0.125 3.337\n", + "12less16_0 0.8536 0.059 14.521 0.000 0.738 0.969\n", + "16less19_0 1.1646 0.069 16.760 0.000 1.028 1.301\n", + "16less19_3 1.2429 0.169 7.353 0.000 0.912 1.574\n", + "16less19_4 2.0394 0.522 3.910 0.000 1.017 3.062\n", + "19less27_0 0.9699 0.212 4.571 0.000 0.554 1.386\n", + "19less27_2 1.0356 0.240 4.311 0.000 0.565 1.506\n", + "19less27_3 2.0031 0.259 7.719 0.000 1.494 2.512\n", + "19less27_4 3.6530 0.575 6.354 0.000 2.526 4.780\n", + "27plus_2 1.1888 0.236 5.033 0.000 0.726 1.652\n", + "27plus_3 1.8772 0.264 7.101 0.000 1.359 2.395\n", + "27plus_4 4.7118 0.563 8.376 0.000 3.609 5.814\n", + "female_0 0.1071 0.049 2.182 0.029 0.011 0.203\n", + "black_3 0.3948 0.188 2.100 0.036 0.026 0.763\n", + "native_2 -0.2925 0.134 -2.177 0.029 -0.556 -0.029\n", + "native_3 -0.4258 0.182 -2.336 0.019 -0.783 -0.069\n", + "asian_0 -0.2358 0.089 -2.655 0.008 -0.410 -0.062\n", + "asian_2 0.2696 0.105 2.560 0.010 0.063 0.476\n", + "asian_3 0.2889 0.146 1.974 0.048 0.002 0.576\n", + "less35k_2 -0.2257 0.086 -2.633 0.008 -0.394 -0.058\n", + "150kless250k_0 -0.4313 0.078 -5.509 0.000 -0.585 -0.278\n", + "250kplus_0 -0.7532 0.131 -5.770 0.000 -1.009 -0.497\n", + "lessGED_0 0.3714 0.170 2.179 0.029 0.037 0.705\n", + "lessGED_2 -1.1101 0.216 -5.143 0.000 -1.533 -0.687\n", + "lessGED_3 -2.3321 0.217 -10.736 0.000 -2.758 -1.906\n", + "lessGED_4 -2.4942 0.389 -6.419 0.000 -3.256 -1.733\n", + "noveh_3 0.5608 0.216 2.600 0.009 0.138 0.984\n", + "4plusper_3 -0.2840 0.100 -2.835 0.005 -0.480 -0.088\n", + "==================================================================================\n" + ] + } + ], + "source": [ + "m.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": {}, + "outputs": [], + "source": [ + "m.name = 'school_TOD'" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering model step 'auto_ownership'\n", + "Registering model step 'dwell_work'\n", + "Registering model step 'TOD_choice'\n", + "Registering model step 'work_TOD_choice'\n", + "Registering model step 'primary_mode_choice'\n", + "Registering model step 'school_dwell'\n", + "Registering model step 'WLCM'\n" + ] + } + ], + "source": [ + "mm.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving 'school_TOD.yaml': /home/emma/ual_model_workspace/spring-2019-models/configs\n", + "Model saved to configs/school_TOD-model-object.pkl\n", + "Registering model step 'school_TOD'\n" + ] + } + ], + "source": [ + "m.tags = ['school_TOD','emma','test']\n", + "mm.register(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate the model for dwell time at school" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "@orca.table(cache=True)\n", + "def tripsB():\n", + " return trips1" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [], + "source": [ + "m = SmallMultinomialLogitStep()\n", + "m.name = 'Sdwell_choice'\n", + "m.tables = ['tripsB']\n", + "m.choice_column = 'Sdwell'\n", + "m.model_expression = OrderedDict([\n", + " ('intercept', [1,3,4,5]),\n", + " \n", + " ('TOD_3to745',[1,2,4,5]),\n", + " ('TOD_830to930',[1,2]),\n", + " ('TOD_930to1500',[1,2]),\n", + " ('TOD_1500up',[1,2]),\n", + "\n", + " ('less5',[1,2,4,5]),\n", + "# ('5less12',[1,3,4,5]),\n", + " ('12less16',[1,2]),\n", + " ('16less19',[1,4]),\n", + " ('19less27',[1,2]),\n", + " ('27plus',[1,2]),\n", + " \n", + " ('female',[4]),\n", + " \n", + " ('minority',[1]),\n", + "\n", + " ('less50k',[2,4]),\n", + " ('150kplus',[2,4,5]),\n", + " \n", + " ('lessGED',[4,5]),\n", + " ('GEDsomeBach',[[1,2]]),\n", + "\n", + " ('4plusper',[4])\n", + " \n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log-likelihood at zero: -14,451.1430\n", + "Initial Log-likelihood: -14,451.1430\n", + "Estimation Time for Point Estimation: 0.82 seconds.\n", + "Final log-likelihood: -9,352.9999\n", + " Multinomial Logit Model Regression Results \n", + "===================================================================================\n", + "Dep. Variable: _chosen No. Observations: 8,979\n", + "Model: Multinomial Logit Model Df Residuals: 8,942\n", + "Method: MLE Df Model: 37\n", + "Date: Mon, 25 Mar 2019 Pseudo R-squ.: 0.353\n", + "Time: 16:23:45 Pseudo R-bar-squ.: 0.350\n", + "AIC: 18,780.000 Log-Likelihood: -9,353.000\n", + "BIC: 19,042.798 LL-Null: -14,451.143\n", + "======================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "--------------------------------------------------------------------------------------\n", + "intercept_1 -2.0897 0.138 -15.182 0.000 -2.359 -1.820\n", + "intercept_3 1.4607 0.059 24.936 0.000 1.346 1.575\n", + "intercept_4 0.9643 0.140 6.875 0.000 0.689 1.239\n", + "intercept_5 -0.4320 0.176 -2.456 0.014 -0.777 -0.087\n", + "TOD_3to745_1 -0.3557 0.181 -1.961 0.050 -0.711 -0.000\n", + "TOD_3to745_2 -0.2786 0.088 -3.160 0.002 -0.451 -0.106\n", + "TOD_3to745_4 0.6999 0.065 10.687 0.000 0.572 0.828\n", + "TOD_3to745_5 1.5685 0.114 13.724 0.000 1.345 1.793\n", + "TOD_830to930_1 1.2851 0.139 9.266 0.000 1.013 1.557\n", + "TOD_830to930_2 0.7291 0.091 7.970 0.000 0.550 0.908\n", + "TOD_930to1500_1 2.9451 0.153 19.189 0.000 2.644 3.246\n", + "TOD_930to1500_2 1.8094 0.128 14.182 0.000 1.559 2.059\n", + "TOD_1500up_1 5.7739 0.598 9.658 0.000 4.602 6.946\n", + "TOD_1500up_2 4.1758 0.597 6.996 0.000 3.006 5.346\n", + "less5_1 2.6541 0.188 14.086 0.000 2.285 3.023\n", + "less5_2 1.0986 0.148 7.441 0.000 0.809 1.388\n", + "less5_4 2.1206 0.125 16.977 0.000 1.876 2.365\n", + "less5_5 1.3584 0.240 5.649 0.000 0.887 1.830\n", + "12less16_1 -0.4348 0.209 -2.078 0.038 -0.845 -0.025\n", + "12less16_2 -0.5867 0.087 -6.724 0.000 -0.758 -0.416\n", + "16less19_1 0.8834 0.165 5.367 0.000 0.561 1.206\n", + "16less19_4 0.1739 0.080 2.168 0.030 0.017 0.331\n", + "19less27_1 1.7279 0.201 8.608 0.000 1.334 2.121\n", + "19less27_2 0.4914 0.158 3.101 0.002 0.181 0.802\n", + "27plus_1 2.6899 0.201 13.413 0.000 2.297 3.083\n", + "27plus_2 1.2468 0.164 7.607 0.000 0.926 1.568\n", + "female_4 0.1305 0.060 2.188 0.029 0.014 0.247\n", + "minority_1 -0.3062 0.099 -3.108 0.002 -0.499 -0.113\n", + "less50k_2 -0.2541 0.069 -3.696 0.000 -0.389 -0.119\n", + "less50k_4 -0.3018 0.070 -4.312 0.000 -0.439 -0.165\n", + "150kplus_2 -0.2113 0.090 -2.354 0.019 -0.387 -0.035\n", + "150kplus_4 0.2582 0.080 3.218 0.001 0.101 0.416\n", + "150kplus_5 0.3980 0.138 2.874 0.004 0.127 0.669\n", + "lessGED_4 -1.0338 0.119 -8.681 0.000 -1.267 -0.800\n", + "lessGED_5 -1.6882 0.170 -9.929 0.000 -2.021 -1.355\n", + "GEDsomeBach_[1, 2] 0.9046 0.139 6.507 0.000 0.632 1.177\n", + "4plusper_4 -0.2844 0.073 -3.911 0.000 -0.427 -0.142\n", + "======================================================================================\n" + ] + } + ], + "source": [ + "m.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [], + "source": [ + "m.name = 'school_dwell'" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering model step 'auto_ownership'\n", + "Registering model step 'dwell_work'\n", + "Registering model step 'TOD_choice'\n", + "Registering model step 'work_TOD_choice'\n", + "Registering model step 'primary_mode_choice'\n", + "Registering model step 'WLCM'\n" + ] + } + ], + "source": [ + "mm.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving 'school_dwell.yaml': /home/emma/ual_model_workspace/spring-2019-models/configs\n", + "Model saved to configs/school_dwell-model-object.pkl\n", + "Registering model step 'school_dwell'\n" + ] + } + ], + "source": [ + "m.tags = ['school_dwell','emma','test']\n", + "mm.register(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validate models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Validation process\n", + "from scripts import validate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "validate.tp_rates(m)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predicted_choices = validate.get_predicted_choices(m)\n", + "pd.crosstab(m.choices.rename('observed'), predicted_choices, margins=True) # unnormalized" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "validate.model_crosstab(m)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns; sns.heatmap(validate.model_crosstab(m))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/activitysynth/notebooks/TOD_School_Distribution_Estimation.ipynb b/activitysynth/notebooks/TOD_School_Distribution_Estimation.ipynb new file mode 100644 index 0000000..0443ee8 --- /dev/null +++ b/activitysynth/notebooks/TOD_School_Distribution_Estimation.ipynb @@ -0,0 +1,3991 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "from urbansim_templates import modelmanager as mm\n", + "from urbansim_templates.models import LargeMultinomialLogitStep\n", + "from urbansim_templates.models import SmallMultinomialLogitStep\n", + "import orca\n", + "import os; os.chdir('../')\n", + "import warnings; warnings.simplefilter('ignore')\n", + "\n", + "import pandas as pd\n", + "# import pandana as pdna\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from functools import reduce\n", + "\n", + "import scipy.stats as st\n", + "from scipy.stats import skewnorm\n", + "\n", + "# import matplotlib\n", + "# matplotlib.style.use('ggplot')\n", + "\n", + "%matplotlib inline\n", + "\n", + "pd.options.display.max_columns = 80" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "trips1 = pd.read_csv('/home/emma/ual_model_workspace/spring-2019-models/notebooks-emma/school_pop_032519.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up distribution estimation functions" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution1(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " st.norm, st.skewnorm,\n", + " st.alpha,st.anglit,st.argus,st.betaprime,st.burr,st.burr12,st.cauchy,\n", + " st.chi,st.chi2,\n", + " st.cosine,\n", + " st.erlang,\n", + " st.exponnorm,\n", + " st.exponweib,st.exponpow,st.f,st.fisk\n", + "\n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)\n", + "\n", + "\n", + "def make_pdf(dist, params, size=10000):\n", + " \"\"\"Generate distributions' Probability Distribution Functions \"\"\"\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Get sane start and end points of distribution\n", + " start = dist.ppf(0.001, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)\n", + " end = dist.ppf(0.999, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)\n", + "\n", + " # Build PDF and turn into pandas Series\n", + " x = np.linspace(start, end, size)\n", + " y = dist.pdf(x, loc=loc, scale=scale, *arg)\n", + " pdf = pd.Series(y, x)\n", + "\n", + " return pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def best_fit_distribution2(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " st.gausshyper,\n", + " st.foldnorm,st.weibull_min,st.weibull_max,st.genlogistic,\n", + " st.gennorm,\n", + " st.genextreme,st.gamma,st.gengamma,st.gilbrat,st.gumbel_r,\n", + " st.gumbel_l,st.hypsecant,st.invgamma,st.invgauss]\n", + "\n", + " # Best holders\n", + " best_distribution = st.foldnorm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution3(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.johnsonsb, st.johnsonsu,st.ksone,st.logistic,st.loggamma,st.lognorm,st.maxwell,st.mielke,st.nakagami,st.ncx2,st.ncf\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.johnsonsu\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution4(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + " st.nct,st.pearson3,st.powerlognorm,st.powernorm,\n", + " st.rayleigh,st.rice,st.recipinvgauss,st.t,\n", + " st.vonmises,st.vonmises_line,st.wald,st.weibull_min,st.weibull_max\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.t\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution5(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.gompertz,\n", + " st.arcsine,st.beta,st.bradford,st.dgamma,st.dweibull,st.expon,st.fatiguelife,st.foldcauchy,\n", + " st.genpareto,st.genexpon,st.genhalflogistic,st.halfcauchy,st.halflogistic\n", + "\n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.foldcauchy\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution6(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.semicircular,\n", + " st.halfnorm,st.halfgennorm,st.kappa3,st.laplace,st.levy,st.levy_l,st.loglaplace,\n", + " st.lomax,st.pareto,st.powerlaw,st.rdist,st.kappa4,st.invweibull,\n", + " \n", + " st.reciprocal,st.trapz,st.triang,\n", + " st.truncexpon,st.truncnorm,st.tukeylambda,st.wrapcauchy\n", + " \n", + "#st.levy_stable,\n", + "# st.crystalball,st.kstwobign \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.loglaplace\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def make_pdf(dist, params, size=10000):\n", + " \"\"\"Generate distributions' Probability Distribution Functions \"\"\"\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Get sane start and end points of distribution\n", + " start = dist.ppf(0.001, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)\n", + " end = dist.ppf(0.999, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)\n", + "\n", + " # Build PDF and turn into pandas Series\n", + " x = np.linspace(start, end, size)\n", + " y = dist.pdf(x, loc=loc, scale=scale, *arg)\n", + " pdf = pd.Series(y, x)\n", + "\n", + " return pdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Estimate distributions for actual home-school trip end times" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution7(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + "# st.t,\n", + "# st.tukeylambda,\n", + "# st.cauchy,\n", + "# st.foldcauchy,\n", + " st.johnsonsu, \n", + " st.gennorm, \n", + " st.dweibull,st.dgamma\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HS = trips1.HS_trip_ET" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "/* Put everything inside the global mpl namespace */\n", + "window.mpl = {};\n", + "\n", + "\n", + "mpl.get_websocket_type = function() {\n", + " if (typeof(WebSocket) !== 'undefined') {\n", + " return WebSocket;\n", + " } else if (typeof(MozWebSocket) !== 'undefined') {\n", + " return MozWebSocket;\n", + " } else {\n", + " alert('Your browser does not have WebSocket support.' +\n", + " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", + " 'Firefox 4 and 5 are also supported but you ' +\n", + " 'have to enable WebSockets in about:config.');\n", + " };\n", + "}\n", + "\n", + "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", + " this.id = figure_id;\n", + "\n", + " this.ws = websocket;\n", + "\n", + " this.supports_binary = (this.ws.binaryType != undefined);\n", + "\n", + " if (!this.supports_binary) {\n", + " var warnings = document.getElementById(\"mpl-warnings\");\n", + " if (warnings) {\n", + " warnings.style.display = 'block';\n", + " warnings.textContent = (\n", + " \"This browser does not support binary websocket messages. \" +\n", + " \"Performance may be slow.\");\n", + " }\n", + " }\n", + "\n", + " this.imageObj = new Image();\n", + "\n", + " this.context = undefined;\n", + " this.message = undefined;\n", + " this.canvas = undefined;\n", + " this.rubberband_canvas = undefined;\n", + " this.rubberband_context = undefined;\n", + " this.format_dropdown = undefined;\n", + "\n", + " this.image_mode = 'full';\n", + "\n", + " this.root = $('
');\n", + " this._root_extra_style(this.root)\n", + " this.root.attr('style', 'display: inline-block');\n", + "\n", + " $(parent_element).append(this.root);\n", + "\n", + " this._init_header(this);\n", + " this._init_canvas(this);\n", + " this._init_toolbar(this);\n", + "\n", + " var fig = this;\n", + "\n", + " this.waiting = false;\n", + "\n", + " this.ws.onopen = function () {\n", + " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", + " fig.send_message(\"send_image_mode\", {});\n", + " if (mpl.ratio != 1) {\n", + " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", + " }\n", + " fig.send_message(\"refresh\", {});\n", + " }\n", + "\n", + " this.imageObj.onload = function() {\n", + " if (fig.image_mode == 'full') {\n", + " // Full images could contain transparency (where diff images\n", + " // almost always do), so we need to clear the canvas so that\n", + " // there is no ghosting.\n", + " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", + " }\n", + " fig.context.drawImage(fig.imageObj, 0, 0);\n", + " };\n", + "\n", + " this.imageObj.onunload = function() {\n", + " fig.ws.close();\n", + " }\n", + "\n", + " this.ws.onmessage = this._make_on_message_function(this);\n", + "\n", + " this.ondownload = ondownload;\n", + "}\n", + "\n", + "mpl.figure.prototype._init_header = function() {\n", + " var titlebar = $(\n", + " '
');\n", + " var titletext = $(\n", + " '
');\n", + " titlebar.append(titletext)\n", + " this.root.append(titlebar);\n", + " this.header = titletext[0];\n", + "}\n", + "\n", + "\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._init_canvas = function() {\n", + " var fig = this;\n", + "\n", + " var canvas_div = $('
');\n", + "\n", + " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", + "\n", + " function canvas_keyboard_event(event) {\n", + " return fig.key_event(event, event['data']);\n", + " }\n", + "\n", + " canvas_div.keydown('key_press', canvas_keyboard_event);\n", + " canvas_div.keyup('key_release', canvas_keyboard_event);\n", + " this.canvas_div = canvas_div\n", + " this._canvas_extra_style(canvas_div)\n", + " this.root.append(canvas_div);\n", + "\n", + " var canvas = $('');\n", + " canvas.addClass('mpl-canvas');\n", + " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", + "\n", + " this.canvas = canvas[0];\n", + " this.context = canvas[0].getContext(\"2d\");\n", + "\n", + " var backingStore = this.context.backingStorePixelRatio ||\n", + "\tthis.context.webkitBackingStorePixelRatio ||\n", + "\tthis.context.mozBackingStorePixelRatio ||\n", + "\tthis.context.msBackingStorePixelRatio ||\n", + "\tthis.context.oBackingStorePixelRatio ||\n", + "\tthis.context.backingStorePixelRatio || 1;\n", + "\n", + " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", + "\n", + " var rubberband = $('');\n", + " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", + "\n", + " var pass_mouse_events = true;\n", + "\n", + " canvas_div.resizable({\n", + " start: function(event, ui) {\n", + " pass_mouse_events = false;\n", + " },\n", + " resize: function(event, ui) {\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " stop: function(event, ui) {\n", + " pass_mouse_events = true;\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " });\n", + "\n", + " function mouse_event_fn(event) {\n", + " if (pass_mouse_events)\n", + " return fig.mouse_event(event, event['data']);\n", + " }\n", + "\n", + " rubberband.mousedown('button_press', mouse_event_fn);\n", + " rubberband.mouseup('button_release', mouse_event_fn);\n", + " // Throttle sequential mouse events to 1 every 20ms.\n", + " rubberband.mousemove('motion_notify', mouse_event_fn);\n", + "\n", + " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", + " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", + "\n", + " canvas_div.on(\"wheel\", function (event) {\n", + " event = event.originalEvent;\n", + " event['data'] = 'scroll'\n", + " if (event.deltaY < 0) {\n", + " event.step = 1;\n", + " } else {\n", + " event.step = -1;\n", + " }\n", + " mouse_event_fn(event);\n", + " });\n", + "\n", + " canvas_div.append(canvas);\n", + " canvas_div.append(rubberband);\n", + "\n", + " this.rubberband = rubberband;\n", + " this.rubberband_canvas = rubberband[0];\n", + " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", + " this.rubberband_context.strokeStyle = \"#000000\";\n", + "\n", + " this._resize_canvas = function(width, height) {\n", + " // Keep the size of the canvas, canvas container, and rubber band\n", + " // canvas in synch.\n", + " canvas_div.css('width', width)\n", + " canvas_div.css('height', height)\n", + "\n", + " canvas.attr('width', width * mpl.ratio);\n", + " canvas.attr('height', height * mpl.ratio);\n", + " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", + "\n", + " rubberband.attr('width', width);\n", + " rubberband.attr('height', height);\n", + " }\n", + "\n", + " // Set the figure to an initial 600x600px, this will subsequently be updated\n", + " // upon first draw.\n", + " this._resize_canvas(600, 600);\n", + "\n", + " // Disable right mouse context menu.\n", + " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", + " return false;\n", + " });\n", + "\n", + " function set_focus () {\n", + " canvas.focus();\n", + " canvas_div.focus();\n", + " }\n", + "\n", + " window.setTimeout(set_focus, 100);\n", + "}\n", + "\n", + "mpl.figure.prototype._init_toolbar = function() {\n", + " var fig = this;\n", + "\n", + " var nav_element = $('
')\n", + " nav_element.attr('style', 'width: 100%');\n", + " this.root.append(nav_element);\n", + "\n", + " // Define a callback function for later on.\n", + " function toolbar_event(event) {\n", + " return fig.toolbar_button_onclick(event['data']);\n", + " }\n", + " function toolbar_mouse_event(event) {\n", + " return fig.toolbar_button_onmouseover(event['data']);\n", + " }\n", + "\n", + " for(var toolbar_ind in mpl.toolbar_items) {\n", + " var name = mpl.toolbar_items[toolbar_ind][0];\n", + " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", + " var image = mpl.toolbar_items[toolbar_ind][2];\n", + " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", + "\n", + " if (!name) {\n", + " // put a spacer in here.\n", + " continue;\n", + " }\n", + " var button = $('');\n", + " button.click(method_name, toolbar_event);\n", + " button.mouseover(tooltip, toolbar_mouse_event);\n", + " nav_element.append(button);\n", + " }\n", + "\n", + " // Add the status bar.\n", + " var status_bar = $('');\n", + " nav_element.append(status_bar);\n", + " this.message = status_bar[0];\n", + "\n", + " // Add the close button to the window.\n", + " var buttongrp = $('
');\n", + " var button = $('');\n", + " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", + " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", + " buttongrp.append(button);\n", + " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", + " titlebar.prepend(buttongrp);\n", + "}\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(el){\n", + " var fig = this\n", + " el.on(\"remove\", function(){\n", + "\tfig.close_ws(fig, {});\n", + " });\n", + "}\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(el){\n", + " // this is important to make the div 'focusable\n", + " el.attr('tabindex', 0)\n", + " // reach out to IPython and tell the keyboard manager to turn it's self\n", + " // off when our div gets focus\n", + "\n", + " // location in version 3\n", + " if (IPython.notebook.keyboard_manager) {\n", + " IPython.notebook.keyboard_manager.register_events(el);\n", + " }\n", + " else {\n", + " // location in version 2\n", + " IPython.keyboard_manager.register_events(el);\n", + " }\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._key_event_extra = function(event, name) {\n", + " var manager = IPython.notebook.keyboard_manager;\n", + " if (!manager)\n", + " manager = IPython.keyboard_manager;\n", + "\n", + " // Check for shift+enter\n", + " if (event.shiftKey && event.which == 13) {\n", + " this.canvas_div.blur();\n", + " event.shiftKey = false;\n", + " // Send a \"J\" for go to next cell\n", + " event.which = 74;\n", + " event.keyCode = 74;\n", + " manager.command_mode();\n", + " manager.handle_keydown(event);\n", + " }\n", + "}\n", + "\n", + "mpl.figure.prototype.handle_save = function(fig, msg) {\n", + " fig.ondownload(fig, null);\n", + "}\n", + "\n", + "\n", + "mpl.find_output_cell = function(html_output) {\n", + " // Return the cell and output element which can be found *uniquely* in the notebook.\n", + " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", + " // IPython event is triggered only after the cells have been serialised, which for\n", + " // our purposes (turning an active figure into a static one), is too late.\n", + " var cells = IPython.notebook.get_cells();\n", + " var ncells = cells.length;\n", + " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", + " data = data.data;\n", + " }\n", + " if (data['text/html'] == html_output) {\n", + " return [cell, data, j];\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "// Register the function which deals with the matplotlib target/channel.\n", + "// The kernel may be null if the page has been refreshed.\n", + "if (IPython.notebook.kernel != null) {\n", + " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", + "}\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "johnsonsu(a=-0.53, b=0.71, loc=7.73, scale=0.29)\n" + ] + } + ], + "source": [ + "# # Load data\n", + "data = HS\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution7(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate distributions for Actual work dwell times" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "dwell_exact = trips1.school_dwell" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution8(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.foldcauchy, \n", + "# st.cauchy,\n", + "# st.gennorm,\n", + " st.loglaplace,\n", + " st.johnsonsu, \n", + "# st.t, \n", + "# st.tukeylambda\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "/* Put everything inside the global mpl namespace */\n", + "window.mpl = {};\n", + "\n", + "\n", + "mpl.get_websocket_type = function() {\n", + " if (typeof(WebSocket) !== 'undefined') {\n", + " return WebSocket;\n", + " } else if (typeof(MozWebSocket) !== 'undefined') {\n", + " return MozWebSocket;\n", + " } else {\n", + " alert('Your browser does not have WebSocket support.' +\n", + " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", + " 'Firefox 4 and 5 are also supported but you ' +\n", + " 'have to enable WebSockets in about:config.');\n", + " };\n", + "}\n", + "\n", + "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", + " this.id = figure_id;\n", + "\n", + " this.ws = websocket;\n", + "\n", + " this.supports_binary = (this.ws.binaryType != undefined);\n", + "\n", + " if (!this.supports_binary) {\n", + " var warnings = document.getElementById(\"mpl-warnings\");\n", + " if (warnings) {\n", + " warnings.style.display = 'block';\n", + " warnings.textContent = (\n", + " \"This browser does not support binary websocket messages. \" +\n", + " \"Performance may be slow.\");\n", + " }\n", + " }\n", + "\n", + " this.imageObj = new Image();\n", + "\n", + " this.context = undefined;\n", + " this.message = undefined;\n", + " this.canvas = undefined;\n", + " this.rubberband_canvas = undefined;\n", + " this.rubberband_context = undefined;\n", + " this.format_dropdown = undefined;\n", + "\n", + " this.image_mode = 'full';\n", + "\n", + " this.root = $('
');\n", + " this._root_extra_style(this.root)\n", + " this.root.attr('style', 'display: inline-block');\n", + "\n", + " $(parent_element).append(this.root);\n", + "\n", + " this._init_header(this);\n", + " this._init_canvas(this);\n", + " this._init_toolbar(this);\n", + "\n", + " var fig = this;\n", + "\n", + " this.waiting = false;\n", + "\n", + " this.ws.onopen = function () {\n", + " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", + " fig.send_message(\"send_image_mode\", {});\n", + " if (mpl.ratio != 1) {\n", + " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", + " }\n", + " fig.send_message(\"refresh\", {});\n", + " }\n", + "\n", + " this.imageObj.onload = function() {\n", + " if (fig.image_mode == 'full') {\n", + " // Full images could contain transparency (where diff images\n", + " // almost always do), so we need to clear the canvas so that\n", + " // there is no ghosting.\n", + " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", + " }\n", + " fig.context.drawImage(fig.imageObj, 0, 0);\n", + " };\n", + "\n", + " this.imageObj.onunload = function() {\n", + " fig.ws.close();\n", + " }\n", + "\n", + " this.ws.onmessage = this._make_on_message_function(this);\n", + "\n", + " this.ondownload = ondownload;\n", + "}\n", + "\n", + "mpl.figure.prototype._init_header = function() {\n", + " var titlebar = $(\n", + " '
');\n", + " var titletext = $(\n", + " '
');\n", + " titlebar.append(titletext)\n", + " this.root.append(titlebar);\n", + " this.header = titletext[0];\n", + "}\n", + "\n", + "\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._init_canvas = function() {\n", + " var fig = this;\n", + "\n", + " var canvas_div = $('
');\n", + "\n", + " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", + "\n", + " function canvas_keyboard_event(event) {\n", + " return fig.key_event(event, event['data']);\n", + " }\n", + "\n", + " canvas_div.keydown('key_press', canvas_keyboard_event);\n", + " canvas_div.keyup('key_release', canvas_keyboard_event);\n", + " this.canvas_div = canvas_div\n", + " this._canvas_extra_style(canvas_div)\n", + " this.root.append(canvas_div);\n", + "\n", + " var canvas = $('');\n", + " canvas.addClass('mpl-canvas');\n", + " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", + "\n", + " this.canvas = canvas[0];\n", + " this.context = canvas[0].getContext(\"2d\");\n", + "\n", + " var backingStore = this.context.backingStorePixelRatio ||\n", + "\tthis.context.webkitBackingStorePixelRatio ||\n", + "\tthis.context.mozBackingStorePixelRatio ||\n", + "\tthis.context.msBackingStorePixelRatio ||\n", + "\tthis.context.oBackingStorePixelRatio ||\n", + "\tthis.context.backingStorePixelRatio || 1;\n", + "\n", + " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", + "\n", + " var rubberband = $('');\n", + " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", + "\n", + " var pass_mouse_events = true;\n", + "\n", + " canvas_div.resizable({\n", + " start: function(event, ui) {\n", + " pass_mouse_events = false;\n", + " },\n", + " resize: function(event, ui) {\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " stop: function(event, ui) {\n", + " pass_mouse_events = true;\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " });\n", + "\n", + " function mouse_event_fn(event) {\n", + " if (pass_mouse_events)\n", + " return fig.mouse_event(event, event['data']);\n", + " }\n", + "\n", + " rubberband.mousedown('button_press', mouse_event_fn);\n", + " rubberband.mouseup('button_release', mouse_event_fn);\n", + " // Throttle sequential mouse events to 1 every 20ms.\n", + " rubberband.mousemove('motion_notify', mouse_event_fn);\n", + "\n", + " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", + " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", + "\n", + " canvas_div.on(\"wheel\", function (event) {\n", + " event = event.originalEvent;\n", + " event['data'] = 'scroll'\n", + " if (event.deltaY < 0) {\n", + " event.step = 1;\n", + " } else {\n", + " event.step = -1;\n", + " }\n", + " mouse_event_fn(event);\n", + " });\n", + "\n", + " canvas_div.append(canvas);\n", + " canvas_div.append(rubberband);\n", + "\n", + " this.rubberband = rubberband;\n", + " this.rubberband_canvas = rubberband[0];\n", + " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", + " this.rubberband_context.strokeStyle = \"#000000\";\n", + "\n", + " this._resize_canvas = function(width, height) {\n", + " // Keep the size of the canvas, canvas container, and rubber band\n", + " // canvas in synch.\n", + " canvas_div.css('width', width)\n", + " canvas_div.css('height', height)\n", + "\n", + " canvas.attr('width', width * mpl.ratio);\n", + " canvas.attr('height', height * mpl.ratio);\n", + " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", + "\n", + " rubberband.attr('width', width);\n", + " rubberband.attr('height', height);\n", + " }\n", + "\n", + " // Set the figure to an initial 600x600px, this will subsequently be updated\n", + " // upon first draw.\n", + " this._resize_canvas(600, 600);\n", + "\n", + " // Disable right mouse context menu.\n", + " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", + " return false;\n", + " });\n", + "\n", + " function set_focus () {\n", + " canvas.focus();\n", + " canvas_div.focus();\n", + " }\n", + "\n", + " window.setTimeout(set_focus, 100);\n", + "}\n", + "\n", + "mpl.figure.prototype._init_toolbar = function() {\n", + " var fig = this;\n", + "\n", + " var nav_element = $('
')\n", + " nav_element.attr('style', 'width: 100%');\n", + " this.root.append(nav_element);\n", + "\n", + " // Define a callback function for later on.\n", + " function toolbar_event(event) {\n", + " return fig.toolbar_button_onclick(event['data']);\n", + " }\n", + " function toolbar_mouse_event(event) {\n", + " return fig.toolbar_button_onmouseover(event['data']);\n", + " }\n", + "\n", + " for(var toolbar_ind in mpl.toolbar_items) {\n", + " var name = mpl.toolbar_items[toolbar_ind][0];\n", + " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", + " var image = mpl.toolbar_items[toolbar_ind][2];\n", + " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", + "\n", + " if (!name) {\n", + " // put a spacer in here.\n", + " continue;\n", + " }\n", + " var button = $('');\n", + " button.click(method_name, toolbar_event);\n", + " button.mouseover(tooltip, toolbar_mouse_event);\n", + " nav_element.append(button);\n", + " }\n", + "\n", + " // Add the status bar.\n", + " var status_bar = $('');\n", + " nav_element.append(status_bar);\n", + " this.message = status_bar[0];\n", + "\n", + " // Add the close button to the window.\n", + " var buttongrp = $('
');\n", + " var button = $('');\n", + " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", + " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", + " buttongrp.append(button);\n", + " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", + " titlebar.prepend(buttongrp);\n", + "}\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(el){\n", + " var fig = this\n", + " el.on(\"remove\", function(){\n", + "\tfig.close_ws(fig, {});\n", + " });\n", + "}\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(el){\n", + " // this is important to make the div 'focusable\n", + " el.attr('tabindex', 0)\n", + " // reach out to IPython and tell the keyboard manager to turn it's self\n", + " // off when our div gets focus\n", + "\n", + " // location in version 3\n", + " if (IPython.notebook.keyboard_manager) {\n", + " IPython.notebook.keyboard_manager.register_events(el);\n", + " }\n", + " else {\n", + " // location in version 2\n", + " IPython.keyboard_manager.register_events(el);\n", + " }\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._key_event_extra = function(event, name) {\n", + " var manager = IPython.notebook.keyboard_manager;\n", + " if (!manager)\n", + " manager = IPython.keyboard_manager;\n", + "\n", + " // Check for shift+enter\n", + " if (event.shiftKey && event.which == 13) {\n", + " this.canvas_div.blur();\n", + " event.shiftKey = false;\n", + " // Send a \"J\" for go to next cell\n", + " event.which = 74;\n", + " event.keyCode = 74;\n", + " manager.command_mode();\n", + " manager.handle_keydown(event);\n", + " }\n", + "}\n", + "\n", + "mpl.figure.prototype.handle_save = function(fig, msg) {\n", + " fig.ondownload(fig, null);\n", + "}\n", + "\n", + "\n", + "mpl.find_output_cell = function(html_output) {\n", + " // Return the cell and output element which can be found *uniquely* in the notebook.\n", + " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", + " // IPython event is triggered only after the cells have been serialised, which for\n", + " // our purposes (turning an active figure into a static one), is too late.\n", + " var cells = IPython.notebook.get_cells();\n", + " var ncells = cells.length;\n", + " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", + " data = data.data;\n", + " }\n", + " if (data['text/html'] == html_output) {\n", + " return [cell, data, j];\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "// Register the function which deals with the matplotlib target/channel.\n", + "// The kernel may be null if the page has been refreshed.\n", + "if (IPython.notebook.kernel != null) {\n", + " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", + "}\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "foldcauchy(c=8.28, loc=0.02, scale=0.84)\n" + ] + } + ], + "source": [ + "# # Load data\n", + "data = dwell_exact\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution8(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/activitysynth/notebooks/TOD_Work_Category_Estimation.ipynb b/activitysynth/notebooks/TOD_Work_Category_Estimation.ipynb new file mode 100644 index 0000000..a3872f1 --- /dev/null +++ b/activitysynth/notebooks/TOD_Work_Category_Estimation.ipynb @@ -0,0 +1,1988 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "from urbansim_templates import modelmanager as mm\n", + "from urbansim_templates.models import LargeMultinomialLogitStep\n", + "from urbansim_templates.models import SmallMultinomialLogitStep\n", + "import orca\n", + "import os; os.chdir('../')\n", + "import warnings; warnings.simplefilter('ignore')\n", + "\n", + "import pandas as pd\n", + "# import pandana as pdna\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from functools import reduce\n", + "\n", + "import scipy.stats as st\n", + "from scipy.stats import skewnorm\n", + "\n", + "# import matplotlib\n", + "# matplotlib.style.use('ggplot')\n", + "\n", + "%matplotlib inline\n", + "\n", + "pd.options.display.max_columns = 80" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0HHPERHHPERTRIPoriginorigin_dwellorigin_STorigin_ETtrip_ETTTMODE
00103519811.035198e+09home14.00000017.3333337.3333337.7000000.366667drive_alone
11103519811.035198e+09work9.3833337.70000017.08333317.3333330.250000drive_alone
22103519821.035198e+09home10.41666719.5833336.0000006.2500000.250000drive_alone
33103519821.035198e+09work10.2500006.25000016.50000019.5833333.083333drive_alone
44103527421.035274e+09home13.58333319.1666678.7500009.1666670.416667drive_alone
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 HHPER HHPERTRIP origin origin_dwell origin_ST \\\n", + "0 0 10351981 1.035198e+09 home 14.000000 17.333333 \n", + "1 1 10351981 1.035198e+09 work 9.383333 7.700000 \n", + "2 2 10351982 1.035198e+09 home 10.416667 19.583333 \n", + "3 3 10351982 1.035198e+09 work 10.250000 6.250000 \n", + "4 4 10352742 1.035274e+09 home 13.583333 19.166667 \n", + "\n", + " origin_ET trip_ET TT MODE \n", + "0 7.333333 7.700000 0.366667 drive_alone \n", + "1 17.083333 17.333333 0.250000 drive_alone \n", + "2 6.000000 6.250000 0.250000 drive_alone \n", + "3 16.500000 19.583333 3.083333 drive_alone \n", + "4 8.750000 9.166667 0.416667 drive_alone " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips = pd.read_csv('/home/emma/ual_model_workspace/fall-2018-models/notebooks-emma/HWtrips_031418.csv')\n", + "\n", + "trips.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare TOD and Dwell columns" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#select people who make both home-work and work-home trips:\n", + "tripsII = trips.groupby('HHPER').filter(lambda x: len(x) == 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#make sure all home-work trip rows are listed first\n", + "tripsIII = tripsII.sort_values(['HHPER','origin']).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#move work-home trip info up into home-work trip rows\n", + "\n", + "tripsIII['work_dwell'] = tripsIII.groupby('HHPER', group_keys=False).origin_dwell.shift(-1)\n", + "tripsIII['work_ST'] = tripsIII.groupby('HHPER', group_keys=False).origin_ST.shift(-1)\n", + "tripsIII['WH_trip_ST'] = tripsIII.groupby('HHPER', group_keys=False).origin_ET.shift(-1)\n", + "tripsIII['WH_trip_ET'] = tripsIII.groupby('HHPER', group_keys=False).trip_ET.shift(-1)\n", + "tripsIII['WH_TT'] = tripsIII.groupby('HHPER', group_keys=False).TT.shift(-1)\n", + "tripsIII['WH_mode'] = tripsIII.groupby('HHPER', group_keys=False).MODE.shift(-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII = tripsIII.groupby('HHPER').first().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HHPERindexUnnamed: 0HHPERTRIPoriginhome_dwellhome_STHW_trip_STHW_trip_ETHW_TTHW_modework_dwellwork_STWH_trip_STWH_trip_ETWH_TTWH_mode
010351981001.035198e+09home14.00000017.3333337.3333337.7000000.366667drive_alone9.3833337.70000017.08333317.3333330.250000drive_alone
110351982221.035198e+09home10.41666719.5833336.0000006.2500000.250000drive_alone10.2500006.25000016.50000019.5833333.083333drive_alone
210352742441.035274e+09home13.58333319.1666678.7500009.1666670.416667drive_alone7.5833339.16666716.75000019.1666672.416667drive_alone
310353643661.035364e+09home11.33333319.5833336.9166677.4166670.500000drive_alone8.6333337.41666716.05000019.5833333.533333drive_alone
410372952881.037295e+09home17.16666721.83333315.00000015.4166670.416667drive_alone6.00000015.41666721.41666721.8333330.416667drive_alone
\n", + "
" + ], + "text/plain": [ + " HHPER index Unnamed: 0 HHPERTRIP origin home_dwell home_ST \\\n", + "0 10351981 0 0 1.035198e+09 home 14.000000 17.333333 \n", + "1 10351982 2 2 1.035198e+09 home 10.416667 19.583333 \n", + "2 10352742 4 4 1.035274e+09 home 13.583333 19.166667 \n", + "3 10353643 6 6 1.035364e+09 home 11.333333 19.583333 \n", + "4 10372952 8 8 1.037295e+09 home 17.166667 21.833333 \n", + "\n", + " HW_trip_ST HW_trip_ET HW_TT HW_mode work_dwell work_ST \\\n", + "0 7.333333 7.700000 0.366667 drive_alone 9.383333 7.700000 \n", + "1 6.000000 6.250000 0.250000 drive_alone 10.250000 6.250000 \n", + "2 8.750000 9.166667 0.416667 drive_alone 7.583333 9.166667 \n", + "3 6.916667 7.416667 0.500000 drive_alone 8.633333 7.416667 \n", + "4 15.000000 15.416667 0.416667 drive_alone 6.000000 15.416667 \n", + "\n", + " WH_trip_ST WH_trip_ET WH_TT WH_mode \n", + "0 17.083333 17.333333 0.250000 drive_alone \n", + "1 16.500000 19.583333 3.083333 drive_alone \n", + "2 16.750000 19.166667 2.416667 drive_alone \n", + "3 16.050000 19.583333 3.533333 drive_alone \n", + "4 21.416667 21.833333 0.416667 drive_alone " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tripsIII.rename(columns = {'origin_dwell':'home_dwell','origin_ST':'home_ST','origin_ET':'HW_trip_ST',\n", + " 'trip_ET':'HW_trip_ET','TT':'HW_TT','MODE':'HW_mode'},inplace = True)\n", + "\n", + "tripsIII.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['TOD'] = (\n", + " ((tripsIII.HW_trip_ET.between(3,6,inclusive = False)) | (tripsIII.HW_trip_ET==3))*1 +\n", + " ((tripsIII.HW_trip_ET.between(6,9,inclusive = False)) | (tripsIII.HW_trip_ET==6))*2 +\n", + " ((tripsIII.HW_trip_ET.between(9,15.5,inclusive = False)) | (tripsIII.HW_trip_ET==9))*3 +\n", + " ((tripsIII.HW_trip_ET.between(15.5,18.5,inclusive = False)) | (tripsIII.HW_trip_ET==15.5))*4 +\n", + " ((tripsIII.HW_trip_ET>=18.5))*5 +\n", + " ((tripsIII.HW_trip_ET.between(0,3,inclusive = False)) | (tripsIII.HW_trip_ET==0))*5\n", + ")\n", + "\n", + "tripsIII['TOD'] = tripsIII['TOD'] - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['TOD'] = pd.to_numeric(tripsIII['TOD'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['dwell_work'] = (\n", + " ((tripsIII.work_dwell.between(0,4.5,inclusive = False)) | (tripsIII.work_dwell==0))*1 +\n", + " ((tripsIII.work_dwell.between(4.5,7.75,inclusive = False)) | (tripsIII.work_dwell==4.5))*2 +\n", + " ((tripsIII.work_dwell.between(7.75,9.0,inclusive = False)) | (tripsIII.work_dwell==7.75))*3 +\n", + " ((tripsIII.work_dwell.between(9.0,10.5,inclusive = False)) | (tripsIII.work_dwell==9.0))*4 +\n", + " ((tripsIII.work_dwell>=10.5))*5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add the demographic variables" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNPERNORELATGENDAGEAGEBHISPRACE1RACE2RACE3RACE4O_RACENTVTYCNTRYLICUSERTRANSTPTYP1TPTYP2TPTYP3TPTYP4TPTYP5TPTYP6TPTYP7O_TPTYPCLIP1CLIP2CLIP3COMPMETPASSTLFLEXEMPLYWKSTATO_WKSTATJOBSWLOCWNAMEWCITYWSTAT...HVLOGPTRIPSTOLLFTOLLR1TOLLR2TOLLR3TOLLR4TOLLR5TOLLR6TOLLR7TOLLR8TOLLR9TOLLR10TOLLB1TOLLB2TOLLB3TOLLB4TOLLB5TOLLB6TOLLB7TOLLB8TOLLB9TOLLB10HOVLNOGOWHYNOGOWHY_OInCompleteMoto_tripWCTFIPWTRACTSCTFIPSTRACTWPrimaryCityWSTFIPW2PrimaryCityW2STFIPSPrimaryCitySSTFIPPERWGTEXPPERWGT
0103198511174NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.01.0NaNNaNNaNNaNNaNNaN...1.02.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.05208617.647568
1103198522273NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.01.0NaNNaNNaNNaNNaNNaN...1.02.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.05208617.647568
2103203611146NaN21.0NaNNaNNaNNaN1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.01.0NaNNaN1.01.0HIDDENSAN DIEGOCA...NaN5.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.073.017032.0NaNNaNSAN DIEGO6.0NaNNaNNaNNaN1.223974414.701494
3103203622247NaN21.097.0NaNNaNMULTI-RACIAL1NaN1.01.02.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.02.02.03.0NaNNaNNaNNaNNaNNaN...NaN18.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.863473292.558373
4103203633115NaN21.097.0NaNNaNMULTI-RACIAL1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaN4.03.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.0NaNNaNNaN1.0NaNNaN73.017030.0NaNNaNNaNNaNSAN DIEGO6.00.941412318.965100
\n", + "

5 rows × 148 columns

\n", + "
" + ], + "text/plain": [ + " SAMPN PERNO RELAT GEND AGE AGEB HISP RACE1 RACE2 RACE3 RACE4 \\\n", + "0 1031985 1 1 1 74 NaN 2 1.0 NaN NaN NaN \n", + "1 1031985 2 2 2 73 NaN 2 1.0 NaN NaN NaN \n", + "2 1032036 1 1 1 46 NaN 2 1.0 NaN NaN NaN \n", + "3 1032036 2 2 2 47 NaN 2 1.0 97.0 NaN NaN \n", + "4 1032036 3 3 1 15 NaN 2 1.0 97.0 NaN NaN \n", + "\n", + " O_RACE NTVTY CNTRY LIC USER TRANS TPTYP1 TPTYP2 TPTYP3 \\\n", + "0 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "1 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "2 NaN 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "3 MULTI-RACIAL 1 NaN 1.0 1.0 2.0 NaN NaN NaN \n", + "4 MULTI-RACIAL 1 NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " TPTYP4 TPTYP5 TPTYP6 TPTYP7 O_TPTYP CLIP1 CLIP2 CLIP3 COMP MET \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " PASSTL FLEX EMPLY WKSTAT O_WKSTAT JOBS WLOC WNAME WCITY WSTAT \\\n", + "0 2.0 2.0 2.0 1.0 NaN NaN NaN NaN NaN NaN \n", + "1 2.0 2.0 2.0 1.0 NaN NaN NaN NaN NaN NaN \n", + "2 2.0 2.0 1.0 NaN NaN 1.0 1.0 HIDDEN SAN DIEGO CA \n", + "3 2.0 2.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " ... HVLOG PTRIPS TOLLF TOLLR1 TOLLR2 TOLLR3 TOLLR4 TOLLR5 TOLLR6 \\\n", + "0 ... 1.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "1 ... 1.0 2.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "2 ... NaN 5.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "3 ... NaN 18.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "4 ... NaN 4.0 3.0 NaN NaN NaN NaN NaN NaN \n", + "\n", + " TOLLR7 TOLLR8 TOLLR9 TOLLR10 TOLLB1 TOLLB2 TOLLB3 TOLLB4 TOLLB5 \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " TOLLB6 TOLLB7 TOLLB8 TOLLB9 TOLLB10 HOVL NOGOWHY NOGOWHY_O InComplete \\\n", + "0 NaN NaN NaN NaN NaN 2.0 NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN 2.0 NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN 1.0 NaN NaN NaN \n", + "\n", + " Moto_trip WCTFIP WTRACT SCTFIP STRACT WPrimaryCity WSTFIP \\\n", + "0 1.0 NaN NaN NaN NaN NaN NaN \n", + "1 1.0 NaN NaN NaN NaN NaN NaN \n", + "2 1.0 73.0 17032.0 NaN NaN SAN DIEGO 6.0 \n", + "3 1.0 NaN NaN NaN NaN NaN NaN \n", + "4 1.0 NaN NaN 73.0 17030.0 NaN NaN \n", + "\n", + " W2PrimaryCity W2STFIP SPrimaryCity SSTFIP PERWGT EXPPERWGT \n", + "0 NaN NaN NaN NaN 0.052086 17.647568 \n", + "1 NaN NaN NaN NaN 0.052086 17.647568 \n", + "2 NaN NaN NaN NaN 1.223974 414.701494 \n", + "3 NaN NaN NaN NaN 0.863473 292.558373 \n", + "4 NaN NaN SAN DIEGO 6.0 0.941412 318.965100 \n", + "\n", + "[5 rows x 148 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "person = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_PER.csv')\n", + "\n", + "person.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "person = person[['SAMPN','PERNO','GEND','AGE','HISP','RACE1','RACE2','RACE3','RACE4',\n", + " 'HOURS','EDUCA','INDUS']]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "person['HHPER'] = person['SAMPN'].map(str) + person['PERNO'].map(str)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNHHVEHOWNINCOMHHSIZ
010319852132
110320361175
210320532226
310324252172
410325580211
\n", + "
" + ], + "text/plain": [ + " SAMPN HHVEH OWN INCOM HHSIZ\n", + "0 1031985 2 1 3 2\n", + "1 1032036 1 1 7 5\n", + "2 1032053 2 2 2 6\n", + "3 1032425 2 1 7 2\n", + "4 1032558 0 2 1 1" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh_df = pd.read_csv('/home/data/CHTS_csv_format/data/Deliv_HH.csv')[\n", + " ['SAMPN','HHVEH',\n", + " 'OWN','INCOM','HHSIZ']]\n", + "\n", + "hh_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SAMPNPERNOGENDAGEHISPRACE1RACE2RACE3RACE4HOURSEDUCAINDUSHHPERHHVEHOWNINCOMHHSIZ
01031985117421.0NaNNaNNaNNaN6NaN103198512132
11031985227321.0NaNNaNNaNNaN6NaN103198522132
21032036114621.0NaNNaNNaN40.0654.0103203611175
31032036224721.097.0NaNNaNNaN6NaN103203621175
41032036311521.097.0NaNNaNNaN1NaN103203631175
\n", + "
" + ], + "text/plain": [ + " SAMPN PERNO GEND AGE HISP RACE1 RACE2 RACE3 RACE4 HOURS EDUCA \\\n", + "0 1031985 1 1 74 2 1.0 NaN NaN NaN NaN 6 \n", + "1 1031985 2 2 73 2 1.0 NaN NaN NaN NaN 6 \n", + "2 1032036 1 1 46 2 1.0 NaN NaN NaN 40.0 6 \n", + "3 1032036 2 2 47 2 1.0 97.0 NaN NaN NaN 6 \n", + "4 1032036 3 1 15 2 1.0 97.0 NaN NaN NaN 1 \n", + "\n", + " INDUS HHPER HHVEH OWN INCOM HHSIZ \n", + "0 NaN 10319851 2 1 3 2 \n", + "1 NaN 10319852 2 1 3 2 \n", + "2 54.0 10320361 1 1 7 5 \n", + "3 NaN 10320362 1 1 7 5 \n", + "4 NaN 10320363 1 1 7 5 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "demo = person.merge(hh_df,on = 'SAMPN',how = 'left')\n", + "\n", + "demo.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get rid of null values" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "na_dict = {\n", + " 'GEND':[9],\n", + " 'AGE':[998,999],\n", + " 'HOURS':[998,999],\n", + " 'EDUCA':[8,9],\n", + " 'HHVEH':[98,99],\n", + " 'OWN':[7,8,9],\n", + " 'INCOM':[98,99],\n", + " 'HHSIZ':[98,99],\n", + " 'INDUS':[98,99],\n", + "}\n", + "\n", + "for col in na_dict:\n", + " for vals in na_dict[col]:\n", + " demo[col] = demo[col].replace(vals,np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "demo = demo.dropna(subset = ['GEND', 'AGE', 'HOURS', 'EDUCA','HHVEH','OWN','INCOM','HHSIZ','INDUS'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21285\n", + "17943\n" + ] + } + ], + "source": [ + "tripsIII['HHPER'] = tripsIII['HHPER'].map(str)\n", + "\n", + "trips1 = pd.merge(tripsIII, demo, on='HHPER')\n", + "\n", + "print (len(tripsIII.index))\n", + "print (len(trips1.index))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare data for use in MNL estimation (make dummy columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['minority'] = np.where((trips1['HISP'].isin([1.0]) |\n", + " trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0]) |\n", + " trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0]) |\n", + " trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0]) |\n", + " trips1['RACE1'].isin([5.0]) | trips1['RACE2'].isin([5.0]) | trips1['RACE3'].isin([5.0]) | trips1['RACE4'].isin([5.0]) |\n", + " trips1['RACE1'].isin([97.0]) | trips1['RACE2'].isin([97.0]) | trips1['RACE3'].isin([97.0]) | trips1['RACE4'].isin([97.0])),1,0)\n", + "\n", + "trips1['HISP'] = np.where(trips1['HISP'].isin([1.0]),1,0)\n", + "trips1['black'] = np.where((trips1['RACE1'].isin([2.0]) | trips1['RACE2'].isin([2.0]) | trips1['RACE3'].isin([2.0]) | trips1['RACE4'].isin([2.0])),1,0)\n", + "trips1['native'] = np.where((trips1['RACE1'].isin([3.0]) | trips1['RACE2'].isin([3.0]) | trips1['RACE3'].isin([3.0]) | trips1['RACE4'].isin([3.0])),1,0)\n", + "trips1['asian'] = np.where((trips1['RACE1'].isin([4.0]) | trips1['RACE2'].isin([4.0]) | trips1['RACE3'].isin([4.0]) | trips1['RACE4'].isin([4.0])),1,0)\n", + "trips1['PI'] = np.where((trips1['RACE1'].isin([5.0]) | trips1['RACE2'].isin([5.0]) | trips1['RACE3'].isin([5.0]) | trips1['RACE4'].isin([5.0])),1,0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['hh_inc_less75k'] = np.where(trips1['INCOM'].isin([1.0,2.0,3.0,4.0,5.0]),1,0)\n", + "trips1['hh_inc_75kless100k'] = np.where(trips1['INCOM'].isin([6.0]),1,0)\n", + "trips1['hh_inc_150kplus'] = np.where(trips1['INCOM'].isin([8.0,9.0,10.0]),1,0)\n", + "trips1['hh_inc_150kless250k'] = np.where(trips1['INCOM'].isin([8.0,9.0]),1,0)\n", + "trips1['hh_inc_250kplus'] = np.where(trips1['INCOM'].isin([10.0]),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['lessGED'] = np.where(trips1['EDUCA'].isin([1.0]),1,0)\n", + "trips1['GED'] = np.where(trips1['EDUCA'].isin([2.0]),1,0)\n", + "trips1['someBach'] = np.where(trips1['EDUCA'].isin([3.0]),1,0)\n", + "trips1['Assoc'] = np.where(trips1['EDUCA'].isin([4.0]),1,0)\n", + "trips1['Bach'] = np.where(trips1['EDUCA'].isin([5.0]),1,0)\n", + "\n", + "trips1['lessGED_GED'] = np.where(trips1['EDUCA'].isin([1.0,2.0]),1,0)\n", + "\n", + "trips1['no_higher_ed'] = (trips1['EDUCA'] < 5).astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['age_16less25'] = np.where(((trips1.AGE.between(16,25,inclusive = False)) | (trips1.AGE==16)),1,0)\n", + "trips1['age_25less40'] = np.where(((trips1.AGE.between(25,40,inclusive = False)) | (trips1.AGE==25)),1,0)\n", + "trips1['age_40less50'] = np.where(((trips1.AGE.between(40,50,inclusive = False)) | (trips1.AGE==40)),1,0)\n", + "trips1['age_50less60'] = np.where(((trips1.AGE.between(50,60,inclusive = False)) | (trips1.AGE==50)),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['female'] = trips1['GEND'] - 1\n", + "\n", + "trips1['tenure_2'] = trips1['OWN'] - 1\n", + "\n", + "trips1['noveh'] = np.where(trips1.HHVEH.isin([0.0]),1,0)\n", + "\n", + "trips1['hh_size_1per'] = np.where(trips1.HHSIZ.isin([1.0]),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['sector_constr'] = np.where(trips1['INDUS'].isin([23]),1,0)\n", + "trips1['sector_mfg'] = np.where(trips1['INDUS'].isin([31]),1,0)\n", + "trips1['sector_retail'] = np.where(trips1['INDUS'].isin([44,45]),1,0)\n", + "trips1['sector_transport'] = np.where(trips1['INDUS'].isin([48]),1,0)\n", + "trips1['info'] = np.where(trips1['INDUS'].isin([51]),1,0)\n", + "trips1['finance'] = np.where(trips1['INDUS'].isin([52]),1,0)\n", + "trips1['scitech'] = np.where(trips1['INDUS'].isin([54]),1,0)\n", + "trips1['sector_edu_serv'] = np.where(trips1['INDUS'].isin([61]),1,0)\n", + "trips1['sector_healthcare'] = np.where(trips1['INDUS'].isin([62]),1,0)\n", + "trips1['sector_oth_serv'] = np.where(trips1['INDUS'].isin([81]),1,0)\n", + "trips1['sector_gov'] = np.where(trips1['INDUS'].isin([92]),1,0)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "trips1['TOD_3to6'] = np.where(trips1['TOD'].isin([0]),1,0)\n", + "# trips1['TOD_6to9'] = np.where(trips1['TOD'].isin([1]),1,0)\n", + "trips1['TOD_9to1530'] = np.where(trips1['TOD'].isin([2]),1,0)\n", + "trips1['TOD_1530to1830'] = np.where(trips1['TOD'].isin([3]),1,0)\n", + "trips1['TOD_1830up'] = np.where(trips1['TOD'].isin([4]),1,0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate the model for dwell time at work" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "@orca.table(cache=True)\n", + "def tripsA():\n", + " return trips1" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "m = SmallMultinomialLogitStep()\n", + "m.name = 'dwell_work'\n", + "m.tables = ['tripsA']\n", + "m.choice_column = 'dwell_work'\n", + "m.model_expression = OrderedDict([\n", + " ('intercept', [1,2,3,5]), \n", + " \n", + " ('TOD_3to6',[2,5]),\n", + "# ('TOD_6to9'),\n", + " ('TOD_9to1530',[1,2,4,5]),\n", + " ('TOD_1530to1830',[1,2,4]),\n", + " ('TOD_1830up',[1,4]),\n", + " \n", + " ('sector_mfg',[1,2]),\n", + " ('sector_retail',[1,5]),\n", + " ('sector_transport',[4,5]),\n", + " ('info',[1]),\n", + " ('finance',[1,4,5]),\n", + " ('scitech',[1,2]),\n", + " ('sector_edu_serv',[2,4,5]),\n", + " ('sector_healthcare',[1,2,4,5]),\n", + " ('sector_gov',[1,2]),\n", + " \n", + " ('age_16less25',[1,2]),\n", + " ('age_25less40',[1]),\n", + " ('age_40less50',[1]),\n", + " ('age_50less60',[1]),\n", + " \n", + " ('female',[[1,2],5]),\n", + " \n", + " ('minority',[1,2]),\n", + " \n", + " ('hh_inc_less75k',[1,4,5]), \n", + " ('hh_inc_75kless100k',[5]),\n", + "# ('100kless150k')\n", + " ('hh_inc_150kplus',[1,2,4]),\n", + " \n", + " ('lessGED_GED',[1]),\n", + " ('Assoc',[1,4]),\n", + " \n", + " ('HOURS',[1,2,4,5]),\n", + " \n", + " ('noveh',[4]),\n", + " \n", + " ('hh_size_1per',[4]),\n", + " \n", + " ('tenure_2',[4]),\n", + " \n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log-likelihood at zero: -28,878.1445\n", + "Initial Log-likelihood: -28,878.1445\n", + "Estimation Time for Point Estimation: 9.52 seconds.\n", + "Final log-likelihood: -24,197.0957\n", + " Multinomial Logit Model Regression Results \n", + "===================================================================================\n", + "Dep. Variable: _chosen No. Observations: 17,943\n", + "Model: Multinomial Logit Model Df Residuals: 17,881\n", + "Method: MLE Df Model: 62\n", + "Date: Sat, 23 Mar 2019 Pseudo R-squ.: 0.162\n", + "Time: 15:41:41 Pseudo R-bar-squ.: 0.160\n", + "AIC: 48,518.191 Log-Likelihood: -24,197.096\n", + "BIC: 49,001.479 LL-Null: -28,878.144\n", + "========================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "----------------------------------------------------------------------------------------\n", + "intercept_1 0.8927 0.125 7.132 0.000 0.647 1.138\n", + "intercept_2 0.7383 0.102 7.233 0.000 0.538 0.938\n", + "intercept_3 0.3676 0.086 4.260 0.000 0.198 0.537\n", + "intercept_5 -2.3202 0.129 -17.950 0.000 -2.574 -2.067\n", + "TOD_3to6_2 -0.4000 0.118 -3.380 0.001 -0.632 -0.168\n", + "TOD_3to6_5 1.0418 0.071 14.626 0.000 0.902 1.181\n", + "TOD_9to1530_1 1.6632 0.064 25.814 0.000 1.537 1.789\n", + "TOD_9to1530_2 0.9464 0.050 19.002 0.000 0.849 1.044\n", + "TOD_9to1530_4 -0.7953 0.051 -15.446 0.000 -0.896 -0.694\n", + "TOD_9to1530_5 -0.9901 0.083 -11.996 0.000 -1.152 -0.828\n", + "TOD_1530to1830_1 2.9298 0.140 20.955 0.000 2.656 3.204\n", + "TOD_1530to1830_2 1.7683 0.130 13.615 0.000 1.514 2.023\n", + "TOD_1530to1830_4 -1.1271 0.217 -5.182 0.000 -1.553 -0.701\n", + "TOD_1830up_1 1.7121 0.187 9.160 0.000 1.346 2.078\n", + "TOD_1830up_4 -1.0139 0.211 -4.804 0.000 -1.428 -0.600\n", + "sector_mfg_1 -0.7079 0.143 -4.952 0.000 -0.988 -0.428\n", + "sector_mfg_2 -0.6238 0.106 -5.875 0.000 -0.832 -0.416\n", + "sector_retail_1 -0.7501 0.101 -7.433 0.000 -0.948 -0.552\n", + "sector_retail_5 -0.2827 0.109 -2.595 0.009 -0.496 -0.069\n", + "sector_transport_4 0.2460 0.099 2.496 0.013 0.053 0.439\n", + "sector_transport_5 0.3908 0.123 3.169 0.002 0.149 0.633\n", + "info_1 -0.5215 0.136 -3.829 0.000 -0.788 -0.255\n", + "finance_1 -0.5278 0.157 -3.358 0.001 -0.836 -0.220\n", + "finance_4 0.1900 0.085 2.240 0.025 0.024 0.356\n", + "finance_5 -0.3907 0.153 -2.550 0.011 -0.691 -0.090\n", + "scitech_1 -0.5299 0.106 -4.978 0.000 -0.739 -0.321\n", + "scitech_2 -0.2690 0.079 -3.387 0.001 -0.425 -0.113\n", + "sector_edu_serv_2 0.3393 0.058 5.842 0.000 0.225 0.453\n", + "sector_edu_serv_4 -0.5565 0.058 -9.527 0.000 -0.671 -0.442\n", + "sector_edu_serv_5 -0.4902 0.092 -5.349 0.000 -0.670 -0.311\n", + "sector_healthcare_1 -0.4660 0.093 -5.024 0.000 -0.648 -0.284\n", + "sector_healthcare_2 -0.4117 0.077 -5.343 0.000 -0.563 -0.261\n", + "sector_healthcare_4 -0.2910 0.064 -4.542 0.000 -0.417 -0.165\n", + "sector_healthcare_5 0.3154 0.084 3.738 0.000 0.150 0.481\n", + "sector_gov_1 -0.6975 0.115 -6.070 0.000 -0.923 -0.472\n", + "sector_gov_2 -0.4604 0.082 -5.599 0.000 -0.622 -0.299\n", + "age_16less25_1 -0.4374 0.116 -3.762 0.000 -0.665 -0.210\n", + "age_16less25_2 0.3590 0.078 4.591 0.000 0.206 0.512\n", + "age_25less40_1 -0.6918 0.090 -7.710 0.000 -0.868 -0.516\n", + "age_40less50_1 -0.2679 0.081 -3.291 0.001 -0.427 -0.108\n", + "age_50less60_1 -0.2232 0.074 -3.032 0.002 -0.367 -0.079\n", + "female_[1, 2] 0.1611 0.040 4.075 0.000 0.084 0.239\n", + "female_5 -0.3918 0.056 -6.996 0.000 -0.502 -0.282\n", + "minority_1 -0.3457 0.064 -5.437 0.000 -0.470 -0.221\n", + "minority_2 -0.2746 0.045 -6.083 0.000 -0.363 -0.186\n", + "hh_inc_less75k_1 -0.1604 0.063 -2.541 0.011 -0.284 -0.037\n", + "hh_inc_less75k_4 -0.1140 0.043 -2.661 0.008 -0.198 -0.030\n", + "hh_inc_less75k_5 -0.1488 0.059 -2.513 0.012 -0.265 -0.033\n", + "hh_inc_75kless100k_5 -0.1505 0.073 -2.057 0.040 -0.294 -0.007\n", + "hh_inc_150kplus_1 0.2085 0.079 2.648 0.008 0.054 0.363\n", + "hh_inc_150kplus_2 0.1973 0.057 3.470 0.001 0.086 0.309\n", + "hh_inc_150kplus_4 0.1488 0.050 2.958 0.003 0.050 0.247\n", + "lessGED_GED_1 -0.1978 0.075 -2.627 0.009 -0.345 -0.050\n", + "Assoc_1 -0.2177 0.094 -2.312 0.021 -0.402 -0.033\n", + "Assoc_4 0.1568 0.054 2.916 0.004 0.051 0.262\n", + "HOURS_1 -0.0495 0.002 -20.905 0.000 -0.054 -0.045\n", + "HOURS_2 -0.0312 0.002 -15.814 0.000 -0.035 -0.027\n", + "HOURS_4 0.0151 0.002 7.985 0.000 0.011 0.019\n", + "HOURS_5 0.0479 0.003 18.447 0.000 0.043 0.053\n", + "noveh_4 -0.4456 0.138 -3.227 0.001 -0.716 -0.175\n", + "hh_size_1per_4 0.1820 0.059 3.106 0.002 0.067 0.297\n", + "tenure_2_4 -0.1070 0.047 -2.298 0.022 -0.198 -0.016\n", + "========================================================================================\n" + ] + } + ], + "source": [ + "m.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "m.name = 'dwell_work'" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering model step 'auto_ownership'\n", + "Registering model step 'dwell_work'\n", + "Registering model step 'TOD_choice'\n", + "Registering model step 'work_TOD_choice'\n", + "Registering model step 'primary_mode_choice'\n", + "Registering model step 'WLCM'\n" + ] + } + ], + "source": [ + "mm.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving 'dwell_work.yaml': /home/emma/activitysynth/activitysynth/configs\n", + "Model saved to configs/dwell_work-model-object.pkl\n", + "Registering model step 'dwell_work'\n" + ] + } + ], + "source": [ + "m.tags = ['dwell_work','emma']\n", + "mm.register(m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate the model for Home-to-Work Trip End Times" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "###model with p-values less than .01 (except hours4)\n", + "\n", + "m = SmallMultinomialLogitStep()\n", + "m.name = 'work_TOD_choice'\n", + "m.tables = ['tripsA']\n", + "m.choice_column = 'TOD'\n", + "m.model_expression = OrderedDict([\n", + " ('intercept', [0,1,3,4]), \n", + " \n", + " ('sector_constr',[2,3]),\n", + " ('sector_mfg',[0,2,3]),\n", + " ('sector_retail',[2]),\n", + " ('sector_transport',[0]),\n", + " ('info',[0,2,3]),\n", + " ('finance',[0,2,3]),\n", + " ('scitech',[0,3]),\n", + " ('sector_edu_serv',[0,2,3]),\n", + " ('sector_healthcare',[0,2,3,4]),\n", + " ('sector_oth_serv',[0,3]),\n", + " ('sector_gov',[2,3]),\n", + " \n", + " ('age_16less25',[2,3,4]),\n", + " ('age_25less40',[0]),\n", + " \n", + " ('female',[0,3,4]),\n", + " \n", + " ('minority',[0,4]),\n", + " ('asian',[2]),\n", + " \n", + " ('hh_inc_less75k',[4]), \n", + "# ('75kless150k')\n", + " ('hh_inc_150kless250k',[0]),\n", + " ('hh_inc_250kplus',[0,2]),\n", + "\n", + " ('lessGED',[0,2,3]),\n", + " ('GED',[0,2,3]),\n", + " ('someBach',[0,2]),\n", + " ('Assoc',[0,2]),\n", + " ('no_higher_ed',[4]),\n", + " ('Bach',[0,2,4]),\n", + "# ('Grad')\n", + " \n", + " ('HOURS',[0,2,3,4]),\n", + "\n", + " ('noveh',[2]),\n", + "\n", + " ('hh_size_1per',[2]),\n", + " \n", + " ('tenure_2',[2]),\n", + " \n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log-likelihood at zero: -28,878.1445\n", + "Initial Log-likelihood: -28,878.1445\n", + "Estimation Time for Point Estimation: 15.89 seconds.\n", + "Final log-likelihood: -16,303.6766\n", + " Multinomial Logit Model Regression Results \n", + "===================================================================================\n", + "Dep. Variable: _chosen No. Observations: 17,943\n", + "Model: Multinomial Logit Model Df Residuals: 17,878\n", + "Method: MLE Df Model: 65\n", + "Date: Sat, 23 Mar 2019 Pseudo R-squ.: 0.435\n", + "Time: 15:43:55 Pseudo R-bar-squ.: 0.433\n", + "AIC: 32,737.353 Log-Likelihood: -16,303.677\n", + "BIC: 33,244.025 LL-Null: -28,878.144\n", + "=========================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "-----------------------------------------------------------------------------------------\n", + "intercept_0 -3.5975 0.186 -19.354 0.000 -3.962 -3.233\n", + "intercept_1 -0.5666 0.077 -7.322 0.000 -0.718 -0.415\n", + "intercept_3 -1.3824 0.161 -8.574 0.000 -1.698 -1.066\n", + "intercept_4 -5.3857 0.381 -14.143 0.000 -6.132 -4.639\n", + "sector_constr_2 -0.8889 0.127 -7.007 0.000 -1.138 -0.640\n", + "sector_constr_3 -1.5972 0.365 -4.371 0.000 -2.313 -0.881\n", + "sector_mfg_0 0.3854 0.093 4.156 0.000 0.204 0.567\n", + "sector_mfg_2 -0.4664 0.083 -5.639 0.000 -0.628 -0.304\n", + "sector_mfg_3 -0.5908 0.190 -3.109 0.002 -0.963 -0.218\n", + "sector_retail_2 0.5408 0.061 8.850 0.000 0.421 0.661\n", + "sector_transport_0 0.6952 0.113 6.162 0.000 0.474 0.916\n", + "info_0 -0.9847 0.235 -4.193 0.000 -1.445 -0.524\n", + "info_2 0.3645 0.077 4.738 0.000 0.214 0.515\n", + "info_3 -1.7137 0.418 -4.099 0.000 -2.533 -0.894\n", + "finance_0 -0.8665 0.220 -3.943 0.000 -1.297 -0.436\n", + "finance_2 -0.3262 0.088 -3.701 0.000 -0.499 -0.153\n", + "finance_3 -2.2459 0.508 -4.421 0.000 -3.242 -1.250\n", + "scitech_0 -0.4545 0.150 -3.036 0.002 -0.748 -0.161\n", + "scitech_3 -1.6604 0.288 -5.758 0.000 -2.226 -1.095\n", + "sector_edu_serv_0 -1.0373 0.146 -7.129 0.000 -1.323 -0.752\n", + "sector_edu_serv_2 -0.8126 0.058 -14.037 0.000 -0.926 -0.699\n", + "sector_edu_serv_3 -1.2649 0.158 -8.029 0.000 -1.574 -0.956\n", + "sector_healthcare_0 -0.6587 0.133 -4.956 0.000 -0.919 -0.398\n", + "sector_healthcare_2 -0.3930 0.060 -6.573 0.000 -0.510 -0.276\n", + "sector_healthcare_3 -1.0317 0.172 -5.996 0.000 -1.369 -0.694\n", + "sector_healthcare_4 1.0408 0.176 5.904 0.000 0.695 1.386\n", + "sector_oth_serv_0 -0.7809 0.168 -4.649 0.000 -1.110 -0.452\n", + "sector_oth_serv_3 -0.8287 0.213 -3.893 0.000 -1.246 -0.411\n", + "sector_gov_2 -0.9998 0.073 -13.717 0.000 -1.143 -0.857\n", + "sector_gov_3 -1.2509 0.198 -6.312 0.000 -1.639 -0.863\n", + "age_16less25_2 0.9403 0.073 12.929 0.000 0.798 1.083\n", + "age_16less25_3 1.8572 0.111 16.767 0.000 1.640 2.074\n", + "age_16less25_4 1.0176 0.225 4.530 0.000 0.577 1.458\n", + "age_25less40_0 -0.4045 0.081 -4.998 0.000 -0.563 -0.246\n", + "female_0 -0.6178 0.069 -8.963 0.000 -0.753 -0.483\n", + "female_3 -0.2522 0.092 -2.728 0.006 -0.433 -0.071\n", + "female_4 -0.6180 0.154 -4.026 0.000 -0.919 -0.317\n", + "minority_0 0.1839 0.067 2.738 0.006 0.052 0.316\n", + "minority_4 0.4116 0.147 2.792 0.005 0.123 0.700\n", + "asian_2 0.2826 0.062 4.569 0.000 0.161 0.404\n", + "hh_inc_less75k_4 0.5729 0.157 3.644 0.000 0.265 0.881\n", + "hh_inc_150kless250k_0 -0.3779 0.113 -3.333 0.001 -0.600 -0.156\n", + "hh_inc_250kplus_0 -1.0206 0.289 -3.536 0.000 -1.586 -0.455\n", + "hh_inc_250kplus_2 0.3136 0.078 4.000 0.000 0.160 0.467\n", + "lessGED_0 1.9522 0.159 12.244 0.000 1.640 2.265\n", + "lessGED_2 -0.6981 0.101 -6.942 0.000 -0.895 -0.501\n", + "lessGED_3 0.6164 0.153 4.024 0.000 0.316 0.917\n", + "GED_0 1.5688 0.140 11.184 0.000 1.294 1.844\n", + "GED_2 -0.4103 0.064 -6.461 0.000 -0.535 -0.286\n", + "GED_3 0.4453 0.108 4.124 0.000 0.234 0.657\n", + "someBach_0 1.3898 0.140 9.945 0.000 1.116 1.664\n", + "someBach_2 -0.3665 0.060 -6.159 0.000 -0.483 -0.250\n", + "Assoc_0 1.1672 0.149 7.818 0.000 0.875 1.460\n", + "Assoc_2 -0.4979 0.067 -7.416 0.000 -0.630 -0.366\n", + "no_higher_ed_4 1.2515 0.294 4.256 0.000 0.675 1.828\n", + "Bach_0 0.6586 0.139 4.739 0.000 0.386 0.931\n", + "Bach_2 -0.2646 0.049 -5.435 0.000 -0.360 -0.169\n", + "Bach_4 1.0120 0.306 3.312 0.001 0.413 1.611\n", + "HOURS_0 0.0077 0.003 2.858 0.004 0.002 0.013\n", + "HOURS_2 -0.0267 0.002 -17.750 0.000 -0.030 -0.024\n", + "HOURS_3 -0.0482 0.004 -13.521 0.000 -0.055 -0.041\n", + "HOURS_4 -0.0160 0.006 -2.596 0.009 -0.028 -0.004\n", + "noveh_2 0.3511 0.114 3.084 0.002 0.128 0.574\n", + "hh_size_1per_2 0.2556 0.057 4.480 0.000 0.144 0.367\n", + "tenure_2_2 0.2035 0.045 4.566 0.000 0.116 0.291\n", + "=========================================================================================\n" + ] + } + ], + "source": [ + "m.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "m.name = 'work_TOD_choice'" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving 'work_TOD_choice.yaml': /home/emma/activitysynth/activitysynth/configs\n", + "Model saved to configs/work_TOD_choice-model-object.pkl\n", + "Registering model step 'work_TOD_choice'\n" + ] + } + ], + "source": [ + "m.tags = ['work_TOD_choice','emma']\n", + "mm.register(m)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/activitysynth/notebooks/TOD_Work_Distribution_Estimation.ipynb b/activitysynth/notebooks/TOD_Work_Distribution_Estimation.ipynb new file mode 100644 index 0000000..c96bcb6 --- /dev/null +++ b/activitysynth/notebooks/TOD_Work_Distribution_Estimation.ipynb @@ -0,0 +1,1859 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "from urbansim_templates import modelmanager as mm\n", + "from urbansim_templates.models import LargeMultinomialLogitStep\n", + "from urbansim_templates.models import SmallMultinomialLogitStep\n", + "import orca\n", + "import os; os.chdir('../')\n", + "import warnings; warnings.simplefilter('ignore')\n", + "\n", + "import pandas as pd\n", + "# import pandana as pdna\n", + "import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from functools import reduce\n", + "\n", + "import scipy.stats as st\n", + "from scipy.stats import skewnorm\n", + "\n", + "# import matplotlib\n", + "# matplotlib.style.use('ggplot')\n", + "\n", + "%matplotlib inline\n", + "\n", + "pd.options.display.max_columns = 80" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "trips1 = pd.read_csv('/home/emma/ual_model_workspace/spring-2019-models/notebooks-emma/HWtrips_032319.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate Distribution for Actual H-W trip end times" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "HW = trips1.HW_trip_ET" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution1(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " st.norm, st.skewnorm,\n", + " st.alpha,st.anglit,st.argus,st.betaprime,st.burr,st.burr12,st.cauchy,\n", + " st.chi,st.chi2,\n", + " st.cosine,\n", + " st.erlang,\n", + " st.exponnorm,\n", + " st.exponweib,st.exponpow,st.f,st.fisk\n", + "\n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)\n", + "\n", + "\n", + "def make_pdf(dist, params, size=10000):\n", + " \"\"\"Generate distributions' Probability Distribution Functions \"\"\"\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Get sane start and end points of distribution\n", + " start = dist.ppf(0.001, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)\n", + " end = dist.ppf(0.999, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)\n", + "\n", + " # Build PDF and turn into pandas Series\n", + " x = np.linspace(start, end, size)\n", + " y = dist.pdf(x, loc=loc, scale=scale, *arg)\n", + " pdf = pd.Series(y, x)\n", + "\n", + " return pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "def best_fit_distribution2(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " st.gausshyper,\n", + " st.foldnorm,st.weibull_min,st.weibull_max,st.genlogistic,\n", + " st.gennorm,\n", + " st.genextreme,st.gamma,st.gengamma,st.gilbrat,st.gumbel_r,\n", + " st.gumbel_l,st.hypsecant,st.invgamma,st.invgauss]\n", + "\n", + " # Best holders\n", + " best_distribution = st.foldnorm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution3(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.johnsonsb, st.johnsonsu,st.ksone,st.logistic,st.loggamma,st.lognorm,st.maxwell,st.mielke,st.nakagami,st.ncx2,st.ncf\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.johnsonsu\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution4(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + " st.nct,st.pearson3,st.powerlognorm,st.powernorm,\n", + " st.rayleigh,st.rice,st.recipinvgauss,st.t,\n", + " st.vonmises,st.vonmises_line,st.wald,st.weibull_min,st.weibull_max\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.t\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution5(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.gompertz,\n", + " st.arcsine,st.beta,st.bradford,st.dgamma,st.dweibull,st.expon,st.fatiguelife,st.foldcauchy,\n", + " st.genpareto,st.genexpon,st.genhalflogistic,st.halfcauchy,st.halflogistic\n", + "\n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.foldcauchy\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "# Create models from data\n", + "def best_fit_distribution6(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + "\n", + " st.semicircular,\n", + " st.halfnorm,st.halfgennorm,st.kappa3,st.laplace,st.levy,st.levy_l,st.loglaplace,\n", + " st.lomax,st.pareto,st.powerlaw,st.rdist,st.kappa4,st.invweibull,\n", + " \n", + " st.reciprocal,st.trapz,st.triang,\n", + " st.truncexpon,st.truncnorm,st.tukeylambda,st.wrapcauchy\n", + " \n", + "#st.levy_stable,\n", + "# st.crystalball,st.kstwobign \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.loglaplace\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "def make_pdf(dist, params, size=10000):\n", + " \"\"\"Generate distributions' Probability Distribution Functions \"\"\"\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Get sane start and end points of distribution\n", + " start = dist.ppf(0.001, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.01, loc=loc, scale=scale)\n", + " end = dist.ppf(0.999, *arg, loc=loc, scale=scale) if arg else dist.ppf(0.99, loc=loc, scale=scale)\n", + "\n", + " # Build PDF and turn into pandas Series\n", + " x = np.linspace(start, end, size)\n", + " y = dist.pdf(x, loc=loc, scale=scale, *arg)\n", + " pdf = pd.Series(y, x)\n", + "\n", + " return pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution7(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + "# st.loglaplace,\n", + " st.burr12, st.fisk, st.skewnorm, st.johnsonsu, \n", + "# st.dweibull, st.gennorm,\n", + " st.laplace, st.t, st.nct\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "johnsonsu(a=-0.71, b=1.00, loc=7.12, scale=1.31)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# %matplotlib notebook\n", + "\n", + "# # Load data\n", + "data = HW\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution7(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estimate distributions for Actual work dwell times" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "dwell_exact = trips1.work_dwell" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "dwell0 = trips1['work_dwell'].loc[trips1['TOD'].isin([0])]\n", + "dwell1 = trips1['work_dwell'].loc[trips1['TOD'].isin([1])]\n", + "dwell2 = trips1['work_dwell'].loc[trips1['TOD'].isin([2])]\n", + "dwell3 = trips1['work_dwell'].loc[trips1['TOD'].isin([3])]\n", + "dwell4 = trips1['work_dwell'].loc[trips1['TOD'].isin([4])]" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = dwell_exact\n", + "\n", + "bins = np.linspace(0, 27, 100)\n", + "\n", + "plt.hist(x, bins, alpha=0.5, label='dwell_exact')\n", + "plt.legend(loc='upper right')\n", + "plt.figure(figsize=(40,20))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "v = dwell0\n", + "w = dwell1\n", + "x = dwell2\n", + "y = dwell3\n", + "z = dwell4\n", + "\n", + "bins = np.linspace(0, 27, 100)\n", + "\n", + "plt.hist(v, bins, alpha=0.5, label='dwell0')\n", + "plt.hist(w, bins, alpha=0.5, label='dwell1')\n", + "plt.hist(x, bins, alpha=0.5, label='dwell2')\n", + "plt.hist(y, bins, alpha=0.5, label='dwell3')\n", + "plt.hist(z, bins, alpha=0.5, label='dwell4')\n", + "plt.legend(loc='upper right')\n", + "plt.figure(figsize=(40,20))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution12(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + "# st.foldcauchy,\n", + "# st.cauchy,\n", + "# st.gennorm,\n", + " st.johnsonsu, \n", + "# st.burr,st.f,\n", + "# st.genlogistic,st.invgauss,\n", + "# st.t, \n", + "# st.tukeylambda, st.loglaplace\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "johnsonsu(a=0.49, b=0.94, loc=9.29, scale=1.26)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell_exact\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution12(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution8(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + " st.cauchy,st.fisk,\n", + " st.hypsecant,st.gennorm,\n", + " st.johnsonsu, \n", + " st.t, \n", + " st.foldcauchy,\n", + " st.tukeylambda\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "johnsonsu(a=-0.16, b=1.09, loc=9.37, scale=1.54)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtoAAAH/CAYAAABpZ3orAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XmcnXV9/v/rmplMZiaZhKwEsgcSEgJhC+DGZpFCFbC1VgUptFqkSlu/tN8WFRWoFmutWCv9KVbqitalX4sIWhcQqAoECEuAkJUkJGTfJpPJbO/fH/d9ksMwy5nJueecM/N6Ph7zYM597uV9nzlDrvmc9/25HRECAAAAUFxVpS4AAAAAGIoI2gAAAEAGCNoAAABABgjaAAAAQAYI2gAAAEAGCNoAAABABgjaADJn+6O2v1jqOgaT7fNtry3yPnt9HW2/1/b9/djfBtvnFqO2/rL9Kdvb0xrm2G46zP0dPJdivt9sV9tusj0jffxN2zcWY9/p/v7d9oeLtT8A5YWgDaBf0tCR++q0vT/v8eXdbRMRfx8R1xS5jgEHWdu1tpttn5q37Erb0c2yZ4pQblHkv462j7VddjdCsP0J21/tY53Zkv5S0nERMS0iVkfE6LznH7J91UBrKPT9VshxIqIjIkZHxLqB1pN3vFf9IRQR742IfzjcfQMoTwRtAP2Sho7RaTBaJ+nivGXf6rq+7ZrBr7J3EdEq6WFJ5+QtPlvS890se6C/+y/Hcy4zMyVtiYhtpS6kN/wcARwugjaAokpHNP/T9rdt75X07vxRztxIrO0/s70x/fo/edu/xvbjtvfY3mz7n7o5xlhJP5I0I280fbLtOtuft73J9ku2P2u7todSH1ASpHPOkvSP3Sx7ID1mj/vOja7b/rDtlyV9uZua/4/tZ2wf3c1zG2yflH5/Vfr6zEsfX2P7+3mv7Vfz6s//hOH0Q7vzrbZ32V5t+4Iezj/nTNvP2d5p+yu2R+bVdYntJ9N9PWT7hLznPpz+7PbYft72ubbfIulvJV2e1vRYN+d6oaR7dehn9+/5o/O2/1HSayV9MX3+c90Vnb5OL9reZvv6Ls/lv98abN/ppE1ll+1HbE/s7ji2a9LX/v22V0p6Pm/ZrLxDTLL9C9t7bd9ne3p6rFd9ypAbNbd9oqQvSDorPd629PlXtKKkP++Vab0/tH1UujxXx/vS53fa/nzPP1YA5YCgDSALvy/pTkljJf1nD+ucLelYSRdJusGHeoX/VdI/RcSY9Pnvd90wInZLuljSurzR9C2SPiZpsaRFkk6R9HpJH+rh+A9IeoMTUyTVpMd6bd6yuTo0ot3XvqdJGi1phqT35x/I9k2SLpd0bkRs7KGW3PmfLWm1Do2sny3pV91sc3b6WuTO/9F0+eskPS1pgqRbJX2lh/PPuVzSm9JzXZg7pzS4f1nSe9N93SHpv5203SyU9D5Jp6Y/p4uU/CzulvRpSd9Kazqt68Ei4id65c/uvV2e/ztJv5F0Tfr8B7vuIy+0XiZpqqSjJU3p4fz+RFKDkp/PBCU/m5Y+jnOJpNMlndjDPt+t5P0wUdKzkr7Rw3r55/W0pGslPZgeb2I353WBpJsl/WF6Xhsldf2U6PcknabkPfhu2+f3dWwApUPQBpCFhyLiRxHRGRH7e1jnpohojognJX1N0rvS5W2S5tqeEBF7I+Lhfhz3ckk3RsTWNHjfLOmKHtb9jZI/BI5XMnL9YEQ0SdqQt2xlXjDua9/t6fOteeds2/8i6TxJb+ylVeJXOhSsz5J0S97jc9R90O7Jqoi4IyI6lLyu02y/KtTl+XxEbEhr+wcd+jlcLenfIuLRtE/5jnT56em51klaaLsmItZExOp+1Hi43i7phxHxvxFxQNKHJbmHdduUBOJj0/NYkv6ce/MPEbGzl/fuj7oc++zcyPNhulzSv0fE0ohokXS9pHNsT8tb55aI2B0RayXdL+nkIhwXQEYI2gCysL6f67yoZFRSSkYgj5e0PP2Y//f6cdyj0n3l73dqdytGRLOkJUpGhs+W9GD61EN5y/L7s/va9+a09zvfBCUjwp+IiD291P0rJWFtqpIQ+30lLQbHKgm0T/eybVcv533fnP53dHcrpnr6OcyU9Hdpu8Uu27uUvAZTI2K5pL9W8sfGFidtQt2OKKctJbn2lif7cR69OTq/7jQ47+hh3a9K+rmk7zpp+fmU++697uv9m3/s3ZJ269DrdjiOVt57LH3P7NQr32ddf769/WwBlBhBG0AWCpkNY3re9zOUfEyuiFgeEe+UNFnSP0v6ge26Ao+xSUlAzN/vS73UkOvTPkuHgvaDecvyg3Zf++6unm1K2hC+afs1PRUREc8rCdgfkPSriNilJDj+qZKR9u72XawZR7r9OSgJkzdFxBF5Xw0R8d205m9GxOslzZZUrWQU/lV1RcT9ee0tJxVYU1/ntim/btujJY3vdkfJJww3RsQCSW9Q0taUmx2np+P0dfz8Y49V8snIRkn70mUNeevm/wHS1343Ku89ZrtR0jj1/h4GUMYI2gBK5aO269N+2yuV9nLbvsL2xIjoVDJSGJI6u9l+s6SJaRjJ+bakj6UXu02S9FFJ3+ylhgcknS/pyHSUVkpGtM9X0p+bH7T7u29JUkT8QtIfK+lvXtxHLdfqUJvI/V0ed7VFUtie01cNfbjW9lTbE5T0Z+d66m+X9AHbp6c966NtX2x7lO0Fts9zcuHk/vSrI91us6RZtntq5SjEZkm9ndf3JF1q+7VpDZ9QDyHW9httn2C7StIeJa0k+bUO5PW7uMuxH4qITUpGm19W0jtdbftqvfKPs81KWnlG9LDfb0t6j+1F6b5vUfKH1oYB1AigDBC0AZTKQ0ou+vsfJX2nv0yX/56k55zMWPIZSe/opiVDEfGMpB9IWpu2NkyWdJOkJ5W0WjylZAq/W7pu26WGcUr6tXP73azk4/qNEbEmb93+7ju/1p9I+jNJd9vuqaf2V5IadSjcd33cdZ970+M/nJ5/byG+N99W0lqxStJyJX3aSnvj/1zS/6fk9XhByUWAkjRSyUWP25QEy3GSbkif+09JtZJ22H5kgDV9TtK70vP6bNcnI+IpSX8l6btKRntzAbc7R0v6LyUhe1l6rt8u5Di9+KaSgL1NycWxV6R1hZKf84fT545V8j7J+ZmkFZI2O5mdput5/URJO87/UzJqP0OHRt8BVCB3/4kkAGQj7TteERGHM+IJAEDZY0QbAAAAyABBGwAAAMgArSMAAABABhjRBgAAADJA0AYqgO1lebco72mdc20PmWnAbL/P9udKXcdgsD3LdhRwI5WKYPsq2w+Vuo5KZ/urtj9R6joGwvZf2v5UqesASo2gDVSAiFgYEfeXuo7BYrtWyXRx/zQIx/od28/bbrZ9n+2Zvaz797aftt1u+8Zunp9k+850uridtr+VafGvPv4Jtn9qe5vtPvsCbZ9s+7H03B/Ln3rQ9hG2v2Z7S/p1Y6bFHwbb77W9Mr375E9s93iXRtvftL3J9h7bL9h+b95zl+fdxbIpfV3C9mmDcybZs32t7SW2D9j+ah/rXmW7o8trcm43652Tvk75fxTcrmQ+8cnFPQOgshC0AZSjSyU9HxGZ3hHP9kQlcyx/VMmdBZfo0A1burNS0t9K+nEPz/+XkvmcZyq5s+VnilZsYdqUzC39nr5WTP+Y+W8lc0KPk/Q1JTfVqU1XuVVSg6RZks6QdIXtP8mg5sNi+xwlc39fquRnuEaH5snuzi2SZkXEGCV37fxELkhHxLfy7mI5WtL7lcz1/niW5zDINiqZA/yOAtf/Tf5r0vUP/vTmO/+iV84XrohokXSvkps1AcMWQRuoALbX2j7f9kjbn7O9Mf36XHoHufx1/zodgdyUH4zSj6Fvs/1j23ttP2z7mPQ527413W637adsn5A+N9b2121vtf2i7Ruc3GXvYIuA7c+kI7hrbF+Ud8yrbK9Oj7fG9uXp8httfzNvva6tExepyx0RbX/P9stpfQ/YXliEl/YPJC2LiO+lweBGSSfZnt/dyhHxtYi4V9Lers/ZvkDJrbn/b0Tsjoi2iHiin/X8afpz3WT7r/u5be729V9RcmOWvpwrqUbS5yLiQER8XpIlvTF9/mJJn46I5ohYK+krSm4J32+2X2f70fRn96jt1+U9N972f6TnvdP2D/u5+4slfS8ilqU3Nvp7SWfn3ttdpesdyD1Mv7pdV8kdS78eBcwa0MfvUL3tf05/f3anvzP16XMFv69tv8X2UiefmPza9qK+6uoqIv4rIn4oaXt/t+3BXyu56dTz3Tx3v6Q3F+k4QEUiaAOV5SOSXiPpZEknKRlpvCHv+SmSxkqaqmRU8zbb4/Kef5eSOxyOUzI6+8l0+QWSzpY0T9IRkt6hQ/8Q/2u6zzmSzlEyQpU/snmmkjsKTlRyt8CvpKFjlKTPS7ooIholvU7S0gLP88R0n/nulTRXyUjx45IOtmXYvj4NH91+9XKchUru9ihJioh9Su6QOJAQ/5q05q/Z3p4GynP6uY/zlJzjBZKut32+JNm+rLfzsz1jAPUulPRUlxD5lF557u7y/Qn9PYjt8Uo+Afi8pAmSPivpx05u+S5J31Aycr5Qyc/21nS7GX2c82V5dXWtU73VavvfbDcrCYebJN3TzTozlfxOfL3AU+3td+gzkk5T8jswXsmnIp3pcz2+r7vUc6qSUej3KXkdvyTpLqd/aNu+u5fX6u4Cz6E7pzhpRXrB9kfz/hjOvUZ/quRult15Tsn/p4Bhi6ANVJbLJd0cEVsiYquS0HxF3vNt6fNtEXGPpCZJx+U9/18R8UhEtCv5B/3kvO0aJc1XMu3ncxGxyXa1ksDwoYjYm45s/nOXY74YEV+OiA4l7QdHSToyfa5T0gm26yNiU0QUMtIqJUHlFaPGEXFHWsMBHRp5Hps+96mIOKKnr16OM1rS7i7LdqevRX9NUxK27lPyB88/K2nFmNiPfdwUEfsi4mlJ/6HkDyNFxJ29nV9ErBtAvX2d+0+UhP1GJ3fz/FMlgbi/3qzkTqDfiIj2iPi2koB7se2jlHx6cU1E7Ezft79Kz3ldH+d8Z7r/eyT9ke1F6Sjxx5SMUvdYa0S8Pz3Ps5S0+xzoZrU/lvRgRKwp8Dx7+h2qUvLa/VVEvBQRHRHx69yoem/v6y7+TNKXIuLhdB9fS+t+Tbqft/TyWr2lwHPo6gElf7BMlvQ2Je/H/5v3/OclfTQimnrYfq+SP9KBYYugDVSWoyW9mPf4xXRZzvY0ROc0KwlUOS9391xE/FLSFyTdJmmz7dttj1EySl3bzTGndrfPiGhOvx2djg6/Q9I1kjY5aVnptiWjGzuVF3ZtV9v+lO1VtvdIWps+VXCITUdID17UlS5ukjSmy6pj1E1rSAH2S1obEV9JA+N3JK2X9Pp+7GN93vddf7bF1te5/6WSc1qhpJf725IGMqtN1/esdOg9NF3SjojYOYD9SpIi4heSPi7pB+l+1yo5h15rTcPqQ0r+QPrzblb5YyV/OBZaR2+/Q3VKPil5hX6+r2dK+usun9RMV4bvkYhYHRFrIqIz/ePvZkl/mNZ+saTGiOjtmoZGvfqPOWBYIWgDlWWjkn9wc2akyw5bRHw+Ik5T8hH+PCUjV9uUjNR1PWZBFylGxE8j4k1KRrmfl/Tl9Kl9euWI45Qumz6V1pBzmZKL3c5XMkI2K11uSbL9Yb9yZoSmrqE6HSHNv9BNSnqZD360nba7HKPCepy7ekrJSOrhmJ73/cGfrV89G0bXr4G0jiyTtMh2ftvFonS5ImJHRFweEVMiYqGSfy8eGcBxur5npUPvofWSxtt+1acOXf8w6ubr8ty6EXFbRMyNiMlKAneNpGcKrK9GXXq0bb9eSYD9fqEnmdbR0+9QS9djpHp9X3exXtInu4xUN6SfEMj2vb28Vvf25zx6O8W82n5H0mIn/eUvK/mj+oO2/ztv/QXKa80ChiOCNlBZvi3pBifTyE1U8jH5N/vYpk+2T7d9ppMZBPYpCQYdaTvIdyV9Mm0hmCnpukKOaftI25ek4fWAkhHUjvTppUouWJuRfkz+oS6b36OkHzynMd3HdiUB/R/yV46If4hXzowwuptQ3Z3/p6S15W2265S8nk9FRHcXdsn2iHS9Kkk1tuvS9prcvsbZvjIdqfxDJaO2/5tue6Pt+3upRZI+arvByQVxf6J0BpToMhtGN1/r0mM4ra82fVznLhfL5rlfyc/jL51cZHttuvyX6bbH2J6QnstFkq5WMltF7rW434VN+XePpHlO+sxrbL9D0vGS7o6ITUp6lP/N9rj09T07Ped1fZzzt/LO8YT03GcomVbuX7obJbc92fY7bY9Oz+t3lbRD/LLLqldK+kFE7O2y/VW213Z3kr38DnUq6a3+rO2j0+O+Nv259Pq+7uLLkq5Jj2Hbo2y/2XZj+npd1MtrlX+Bck36HqmWVJ2+ft3O3277IttHpt/PVzI7Ty5If1TJHxMnp193pTXmX79xjpKfLzBsEbSByvIJJVPQPSXpaSUXTxXjhhZjlPwjuVPJx+/bdWhqur9QEhxWS3pI0p0qbGqwKiUzEmyUtEPJP7rvl6SI+JmSEPmUpMckdb1Y60eS5vvQfMhfT+t6SdKzkn7b3xPsTiR97m9TclHoTiUXdr4z97ztL9r+Yt4mX1bSTvEuJRem7lfarx4RO5RMF/c3Sj4uv17SpRGxLd12utLQ3YtfKblI9ReSPhMR/9PPU5qZ1pQbkd+vvItK01HPD6f1tkp6q5IWiV1K+ojfmi6Xkov3nlbShnGLpMvjlT32hZyPImK7pLcoeS9sV3Ih4FvyXpcrlHxq8rykLZI+2L9TVp2S92STkhH33ygJgblz/nDeiG4oaRPZoOTn/RlJH4yI/85bv07SH6n7tpHezrm336G/UfJaPqrkd+Eflfx+FPy+joglSvq0v5AeY6Wkq3pavxc3KHlfXC/p3en3N0iv+BQh9wnJ70h6yvY+JX8w/ZfSPwbSvvKXc1/pfvalvwe51/H31I/2G2AocvQ9axGAErO9TtK7I+KBUtcyWGxfLen4iOhv8CpLtpdK+p00eFY029OUTKn32lLXMphs/4+SixqfK3Ut5c72X0iaHhF/W+pagFIiaANlzvYkSeskHRcDm10CAACUAK0jQBmzfbqSWR/+lZANAEBlYUQbAAAAyAAj2gAAAEAGCNoAAABABrqdO7MSTZw4MWbNmlXqMgAAADDEPfbYY9siYlJf6w2ZoD1r1iwtWbKk1GUAAABgiLP9YiHrZdo6YvtC28ttr7R9fTfPX2P7adtLbT9k+/h0+Szb+9PlS7vcMAIAAAAoe5mNaKe3Jb5N0puU3IXrUdt3RcSzeavdGRFfTNe/RNJnJV2YPrcqIk7Oqj4AAAAgS1mOaJ8haWVErE5v6fsdSZfmrxARe/IejlJye1wAAACg4mXZoz1V0vq8xxskndl1JdsfkHSdpFpJb8x7arbtJyTtkXRDRDyYYa0AAAAoQFtbmzZs2KCWlpZSl5K5uro6TZs2TSNGjBjQ9lkGbXez7FUj1hFxm6TbbF8m6QZJV0raJGlGRGy3fZqkH9pe2GUEXLavlnS1JM2YMaPY9QMAAKCLDRs2qLGxUbNmzZLdXdwbGiJC27dv14YNGzR79uwB7SPL1pENkqbnPZ4maWMv639H0lslKSIORMT29PvHJK2SNK/rBhFxe0QsjojFkyb1OcMKAAAADlNLS4smTJgwpEO2JNnWhAkTDmvkPsug/aikubZn266V9E5Jd+WvYHtu3sM3S1qRLp+UXkwp23MkzZW0OsNaAQAAUKChHrJzDvc8M2sdiYh229dK+qmkakl3RMQy2zdLWhIRd0m61vb5ktok7VTSNiJJZ0u62Xa7pA5J10TEjqxqBQAAQOWorq7WiSeeqPb2di1YsEBf+9rX1NDQcHB5W1ubampqdOWVV+qDH/ygqqqqdP/99+vSSy892AYyceJE/fznP8+0zkxvWBMR90i6p8uyj+V9/1c9bPcDST/IsjYAAABUpvr6ei1dulSSdPnll+uLX/yirrvuulcs37Jliy677DLt3r1bN910kyTprLPO0t133z1odWZ6wxoAAAAgS2eddZZWrlz5quWTJ0/W7bffri984QuKKM0M0kPmFuwAAAAYXLOu/3Em+137qTcXtF57e7vuvfdeXXjhhd0+P2fOHHV2dmrLli2SpAcffFAnn5zcD/Htb3+7PvKRjxSn4B4QtAEAAFBR9u/ffzAwn3XWWXrPe97T47r5o9mD3TpC0AYAAMCAFDryXGz5vdi9Wb16taqrqzV58mQ999xzg1DZK9GjDQAAgCFn69atuuaaa3TttdeWbDpCRrQBAAAwJORaSnLT+11xxRW67rrrSlYPQRsAAAAVpampqdvlHR0dPW5z7rnn6txzz82oou7ROgIAAABkgKANAAAAZICgDQAAAGSAHm0Aw9d9t/S9znkfyr4OAMCQxIg2AAAAkAFGtAEMPYWMVAMAkDFGtAEAAFBRqqurdfLJJ2vhwoU66aST9NnPfladnZ29brN27Vrdeeedg1RhghFtAAAADFyxP0Us4NqY/Fuwb9myRZdddpl2796tm266qcdtckH7sssuK1qpfWFEGwAAABVr8uTJuv322/WFL3xBEaG1a9fqrLPO0qmnnqpTTz1Vv/71ryVJ119/vR588EGdfPLJuvXWW3tcr5gY0QYAAEBFmzNnjjo7O7VlyxZNnjxZP/vZz1RXV6cVK1boXe96l5YsWaJPfepT+sxnPqO7775bktTc3NztesVE0AYAAEDFiwhJUltbm6699lotXbpU1dXVeuGFF7pdv9D1DgdBGwAAABVt9erVqq6u1uTJk3XTTTfpyCOP1JNPPqnOzk7V1dV1u82tt95a0HqHgx5tAAAAVKytW7fqmmuu0bXXXivb2r17t4466ihVVVXpG9/4hjo6OiRJjY2N2rt378HtelqvmBjRBgAAQEXZv3+/Tj75ZLW1tammpkZXXHGFrrvuOknS+9//fr3tbW/T9773PZ133nkaNWqUJGnRokWqqanRSSedpKuuuqrH9YrJuX6WSrd48eIodgM7gApVzKmmuAU7ALzCc889pwULFpS6jEHT3fnafiwiFve1La0jAAAAQAYI2gAAAEAGCNoAAABABgjaAAAA6Jehco1fXw73PAnaAAAAKFhdXZ22b98+5MN2RGj79u2HNb820/sBAACgYNOmTdOGDRu0devWUpeSubq6Ok2bNm3A2xO0AQAAULARI0Zo9uzZpS6jItA6AgAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkgKANAAAAZICgDQAAAGSAoA0AAABkINOgbftC28ttr7R9fTfPX2P7adtLbT9k+/i85z6Ubrfc9u9mWScAAABQbJkFbdvVkm6TdJGk4yW9Kz9Ip+6MiBMj4mRJn5b02XTb4yW9U9JCSRdK+rd0fwAAAEBFyHJE+wxJKyNidUS0SvqOpEvzV4iIPXkPR0mK9PtLJX0nIg5ExBpJK9P9AQAAABWhJsN9T5W0Pu/xBklndl3J9gckXSepVtIb87b9bZdtp2ZTJgAAAFB8WY5ou5tl8aoFEbdFxDGS/k7SDf3Z1vbVtpfYXrJ169bDKhYAAAAopiyD9gZJ0/MeT5O0sZf1vyPprf3ZNiJuj4jFEbF40qRJh1kuAAAAUDxZBu1HJc21Pdt2rZKLG+/KX8H23LyHb5a0Iv3+LknvtD3S9mxJcyU9kmGtAAAAQFFl1qMdEe22r5X0U0nVku6IiGW2b5a0JCLuknSt7fMltUnaKenKdNtltr8r6VlJ7ZI+EBEdWdUKAAAAFFuWF0MqIu6RdE+XZR/L+/6vetn2k5I+mV11AAAAQHa4MyQAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkAGCNgAAAJABgjYAAACQAYI2AAAAkIGaUhcAAJXsR09u1Od/sUIvbm/WCVPH6O8unK8z50wodVkAgDLAiDYADNC/3b9Sf/HtJ7RiS5NaOzr1+LpduvzfH9a9T28qdWkAgDJA0AaAAbhv+RZ9+ifLZUs3XbJQSz/2Jr3nDbPV3hm67rtPasXmvaUuEQBQYgRtAOinlrYOffSHz0iS/uaC43Tl62bpiIZa3fDmBfr9U6Zqf1uHPvLDZxQRJa4UAFBKBG0A6Kev/nqtNuzcr/lTGvW+s+ccXG5bN168UBNG1eqRNTv0k2deLmGVAIBSI2gDQD+0tnfqjofWSJKuv2i+aqpf+b/RsQ0j9Ffnz5Uk3Xb/Ska1AWAYI2gDQD/c9eRGbdl7QPOnNOqceZO6XeePFk/XxNG1eualPfr1qu2DXCEAoFwQtAGgH7718IuSpD99w2zZ7naduhHVuuI1syRJdz6ybrBKAwCUGYI2ABRo7bZ9emLdLo2qrdbFi47udd23L56mKkv/s+xl7djXOkgVAgDKCUEbAAr0w6UvSZJ+94Qpqq+t7nXdo4+o19nzJqmtI/TDJ14ajPIAAGWGoA0ABYgI/ffSjZKk3z9lakHb/MGp0yRJ93ADGwAYlgjaAFCAVVubtGbbPo0fVavXHTOxoG3eOH+yamuq9Ni6ndqypyXjCgEA5YagDQAF+OXzWyRJ586bpOqq7i+C7Gr0yBqdPXeSIqSfLmNObQAYbgjaAFCAXzyXBO03Lpjcr+0uOmGKJOknBG0AGHYI2gDQh93NbVry4k5VV1lnze1+7uyevHH+ZNnSo2t2qrm1PaMKAQDliKANAH14aOU2dXSGFs8cp7H1I/q17bhRtVo07Qi1dnTqt6u5eQ0ADCcEbQDow29Wb5Mknd3DnSD7cs7c5OJszt78AAAgAElEQVTJXy3fWrSaAADlj6ANAH347eodkqTXzBk/oO1zAf2BFduKVhMAoPwRtAGgF1v3HtDKLU2qH1GtE6ceMaB9nDz9CDXW1WjNtn1av6O5yBUCAMoVQRsAevHwmqSvevGscaqtGdj/Mmuqq/TaORMkSb9ZRZ82AAwXBG0A6EXuAsbXpEF5oM5Mt394zY7DrgkAUBkI2gDQi8Ptz845c3ay/SNrGdEGgOGCoA0APdjVWq2VW5o0sqZqwP3ZOQuOGqPGkTVav2O/Nu3eX6QKAQDljKANAD1YurteknTi1LED7s/Oqa6yFs8aJ0l6hPYRABgWCNoA0IOluxskJbOGFMMZs5M+bYI2AAwPBG0A6METuaA9o1hBO+nT5oJIABgeCNoA0I0I6ckij2jnWlBWbmnS7ua2ouwTAFC+CNoA0I21zbXa1VajiaNHauoR9UXZZ21NlRYePUaS9OSGXUXZJwCgfNWUugAAKEe5/uxTZhwh231vcN8tfa9z3od08vQj9MS6XVq6ftfBW7MDAIYmRrQBoBtP7Cpu20hObn9L1zOiDQBDHUEbALrx5J5sgvYp05Mp/pau36WIKOq+AQDlhaANAF20d0rP762TJJ1w9Nii7nv6+HqNH1WrHftatX4HN64BgKGMoA0AXaxpHqkDnVWaVt+qsQ0jirpv2zolHSV/Yv3Oou4bAFBeMg3ati+0vdz2StvXd/P8dbaftf2U7V/Ynpn3XIftpenXXVnWCQD5lu1JZhk5vjGbEedcO8oT6+jTBoChLLNZR2xXS7pN0pskbZD0qO27IuLZvNWekLQ4Ippt/7mkT0t6R/rc/og4Oav6AKAnz+5NgvbCrIJ2egMcpvgDgKEtyxHtMyStjIjVEdEq6TuSLs1fISLui4jm9OFvJU3LsB4AKMiyPUl/9vFjWjLZ/4lTk77v5zbtUXtHZybHAACUXpZBe6qk9XmPN6TLevIeSffmPa6zvcT2b22/tbsNbF+drrNk69ath18xgGEv4tCIdlatI0c01GrauHq1tHVq9bZ9mRwDAFB6WQbt7u7w0O1cVrbfLWmxpH/KWzwjIhZLukzS52wf86qdRdweEYsjYvGkSdz4AcDhe/nACO1sq9ERI9p1dF12t0nP3SFy2cbdmR0DAFBaWQbtDZKm5z2eJmlj15Vsny/pI5IuiYgDueURsTH972pJ90s6JcNaAUBSXttI434VckPIgcpNG/jMS3uyOwgAoKSyDNqPSppre7btWknvlPSK2UNsnyLpS0pC9pa85eNsj0y/nyjp9ZLyL6IEgEwcvBAyo/7snBOm5oI2I9oAMFRlNutIRLTbvlbSTyVVS7ojIpbZvlnSkoi4S0mryGhJ33MydLQuIi6RtEDSl2x3Kvlj4FNdZisBgExkPbVfzsKpSevIsxv3qLMzVFWV4fA5AKAkMgvakhQR90i6p8uyj+V9f34P2/1a0olZ1gYA3Xk2vSPkwjHZBu3JjXWa1DhSW/ce0PqdzZo5YVSmxwMADD7uDAkAqX3tVVq/f6Rq3anZDQf63uAwnZBeEEmfNgAMTQRtAEitaBopSZoz6oBGDML/HXN92sw8AgBDE0EbAFIvNCVtI/NGZ3shZE5uir9nNjKiDQBDEUEbAFK5oH1c42AF7XRE+6Xdiuj2NgMAgApG0AaA1PI0aM8dnX1/tiRNG1evsfUjtH1fqzbvGZxjAgAGD0EbAFK5Hu3jBql1xLbmT2mUJD3/Mu0jADDUELQBQNLutiq9fKBWdVWdml7fOmjHXXBU0qf9/Mt7B+2YAIDBQdAGAEkrDraNtGgw7x1zcER7EyPaADDUELQBQPn92YPTNpIznxFtABiyCNoAoLwZRwbpQsiceUeOli2t2tqk1vbOQT02ACBbBG0A0ODPoZ3TUFujmeMb1NYRWr2taVCPDQDIFkEbAFS6oC1J86ek7SObaB8BgKGEoA1g2Nt2oFrbW2s0urpDR9e1Dfrx5x+VXBD5HFP8AcCQQtAGMOy9kHchpAdxxpEcRrQBYGgiaAMY9lYcbBspzd0ZFxzFTWsAYCgiaAMY9lbtS+4IOdhT++VMH9eghtpqbd5zQDv3Dd7NcgAA2SJoAxj2VjcnQXvOqNKMaFdVWfOOzI1q0z4CAEMFQRvAsLc6HdGe01CaoC3RPgIAQxFBG8Cw1txubWyp1Qh3alp96do2uCASAIYegjaAYW1N2jYys6FVNSX8P+L8KYxoA8BQQ9AGMKzl2kZml6g/Oyc3ov3C5iZ1dkZJawEAFAdBG8CwVg792ZI0tmGEJjeO1P62Dr20a39JawEAFAdBG8Cwlptx5JgSj2hLOjjzyHJmHgGAIYGgDWBYOziiXQZBe+6RoyVJL2whaAPAUEDQBjBsRZRX0M6NaK/Y3FTiSgAAxUDQBjBsbTlQo30d1TpiRLvG13aUuhzNy41ob2ZEGwCGAoI2gGFrVRmNZkvSsZOTEe2VW5rUwcwjAFDxCNoAhq2Dt14v8YwjOWPrR2jKmDodaO/U+h3NpS4HAHCYCNoAhq1y6s/OmZfeuIb2EQCofDWlLgAASqUsg/bk0Xrgha164Tc/1gVbtvS84nkfGryiAAADwog2gGErF7TLYQ7tnNzMIy80jSxxJQCAw0XQBjAsHei0NuyvVZVCMxpaS13OQQfn0m6qK3ElAIDDRdAGMCyta65Vp6zp9a0aWVU+M3zMTUe0V+8bqfbOEhcDADgsBG0Aw1K5Te2XM3pkjabWtao1qvTiftpHAKCSEbQBDEvleCFkztzRLZKkFfRpA0BFI2gDGJbKOWjPG53URJ82AFQ2gjaAYengzWrKMGjnRrQJ2gBQ2QjaAIalcpzaL+c4gjYADAkEbQDDzo7Wau1qq9Ho6g5Nqm0vdTmvcuyoJGiv2VerNmYeAYCKRdAGMOzk92fbJS6mGw01oen1B9QWVVrbzAWRAFCpCNoAhp1c0J5dhm0jOVwQCQCVj6ANYNg5OId2Q/kGbS6IBIDKR9AGMOyU84wjOfOYSxsAKh5BG8CwU85zaOfQOgIAlY+gDWBYae+U1jXXSpJml3HryDGjWmSF1jSP1IHOMrxiEwDQJ4I2gGFl/f5atUWVjq5rVUNNlLqcHtVXh2bUt6ojrDX7aB8BgEpE0AYwrFRC20jOvMbcBZEEbQCoRARtAMPKwQshy7htJOfQBZH0aQNAJSJoAxhWKmpEexQXRAJAJSNoAxhWVlVQ0J7LiDYAVDSCNoBhpZJGtOeMOqAqhdY216qlg5lHAKDSELQBDBt72qq0rXWE6qo6dXRdW6nL6VNddWhWQ6s65YN/IAAAKkemQdv2hbaX215p+/punr/O9rO2n7L9C9sz85670vaK9OvKLOsEMDzkwuqshgOqqpABYtpHAKByZRa0bVdLuk3SRZKOl/Qu28d3We0JSYsjYpGk70v6dLrteEkfl3SmpDMkfdz2uKxqBTA85GYcOaYC2kZycjOPvMCINgBUnJoM932GpJURsVqSbH9H0qWSns2tEBH35a3/W0nvTr//XUk/i4gd6bY/k3ShpG9nWC+AIW7NQPqz77slo2oKkxvRXr6XEW0AqDRZto5MlbQ+7/GGdFlP3iPp3gFuCwB9qqQLIXOOo3UEACpWliPa3XVAdnu/Y9vvlrRY0jn92db21ZKulqQZM2YMrEoAw0YlTe2XM3tUq2ocWre/Vvs7rPrq8r1tPADglbIc0d4gaXre42mSNnZdyfb5kj4i6ZKIONCfbSPi9ohYHBGLJ02aVLTCAQw9nSGtba68oF1bFZrVcEAha9U+RrUBoJJkGbQflTTX9mzbtZLeKemu/BVsnyLpS0pC9pa8p34q6QLb49KLIC9IlwHAgGxsGaGWzipNqm1TY01nqcvpl4MXRDZxQSQAVJKCgrbtE/q744hol3StkoD8nKTvRsQy2zfbviRd7Z8kjZb0PdtLbd+VbrtD0t8rCeuPSro5d2EkAAxEJfZn58wdza3YAaASFdqj/cV0VPqrku6MiF2FbBQR90i6p8uyj+V9f34v294h6Y4C6wOAXlVy0J7HBZEAUJEKGtGOiDdIulxJ3/QS23faflOmlQFAEQ2FoE3rCABUloJ7tCNihaQbJP2dktlBPm/7edt/kFVxAFAsuZvVzGmovKA9q+GARrhT6/ePVHN7hdzSEgBQcI/2Itu3Kum1fqOkiyNiQfr9rRnWBwBFUckj2iOqkmn+JGklM48AQMUodET7C5Iel3RSRHwgIh6XpIjYqGSUGwDKVnO7tbGlViPcqen1raUuZ0AOtY8QtAGgUhR6MeTvSdofER2SZLtKUl1ENEfENzKrDgCKYE3aNjKjoVU1WU5qmiGCNgBUnkL/yfm5pPq8xw3pMgAoe5XcNpLDBZEAUHkKDdp1EdGUe5B+35BNSQBQXAeDdgVeCJmTm0ubKf4AoHIUGrT32T4198D2aZL2Z1MSABRXbsaRYyp4RHtm/QHVulMvtdSqqb1C+18AYJgptEf7g0ru3rgxfXyUpHdkUxIAFNdQaB2pqUrqf76pXiuaRuqUUhcEAOhTQUE7Ih61PV/ScZIs6fmIaMu0MgAogoihEbSlpE87Cdp1BG0AqACFjmhL0umSZqXbnGJbEfH1TKoCgCLZcqBG+zqqdcSIdo2v7Sh1OYdlXtqnzcwjAFAZCgratr8h6RhJSyXl/qUKSQRtAGVt1RAZzZakuUzxBwAVpdAR7cWSjo+IyLIYACi2Sr71ele5Kf5W7GOKPwCoBIVeuv6MpClZFgIAWRgq/dlScsOdkVWd2tRSqz0tXCYDAOWu0BHtiZKetf2IpIP/WkXEJZlUBQBFMpSCdrWlY0cd0LK99Vqxea9Omzm+1CUBAHpRaNC+McsiACAruaBdyXNo55s3ukXL9tbrhc1NBG0AKHOFTu/3K9szJc2NiJ/bbpBUnW1pAHB4WjqsDftrVe3QjIbWUpdTFAcviNy8t8SVAAD6UlCPtu0/k/R9SV9KF02V9MOsigKAYnixuVadsqbXt2pk1dC4lvvgBZGbm0pcCQCgL4VeDPkBSa+XtEeSImKFpMlZFQUAxTCU+rNzDs6lzYg2AJS9QoP2gYg4+Lmr7Rol82gDQNnKTe03VPqzJWlafavqqzu1Ze8B7W5m5hEAKGeFBu1f2f6wpHrbb5L0PUk/yq4sADh8q/YlN3YZCnNo51RZOnZU2qe9hVFtAChnhQbt6yVtlfS0pPdJukfSDVkVBQDFMJTuCpmPCyIBoDIUOutIp6Qvp18AUPYi8qf2aylxNcV1XNqnzQWRAFDeCgratteom57siJhT9IoAoAi2ttZob3u1xtS0a0JtR6nLKap5jGgDQEUo9IY1i/O+r5P0dkncKQFA2cqfccQucTFFRusIAFSGgnq0I2J73tdLEfE5SW/MuDYAGLBVQ+yOkPmm1rVpVG21tjW1ase+oXEjHgAYigptHTk172GVkhHuxkwqAoAiGIpzaOfY0rFHNurJ9bv0wua9es2cCaUuCQDQjUJbR/457/t2SWsl/VHRqwGAIlk9hEe0JWne5NF6cv0urSBoA0DZKnTWkfOyLgQAimkot45I0rwjkw8VX2DmEQAoW4W2jlzX2/MR8dnilAMAh+9Ap7Vhf62qFJrRMDR7mOdNSYL2ci6IBICy1Z9ZR06XdFf6+GJJD0han0VRAHA4XmyuVaesmfUHNLLqVTOTDgkL0qD9/KY9igh5qE2tAgBDQKFBe6KkUyNiryTZvlHS9yLivVkVBgADtaopbRsZPTTbRiRpUuNIjR9Vqx37WrVpd4uOPqK+1CUBALoo9BbsMyTlf/7aKmlW0asBgCJY3ZzOONIwdIO2bc3PjWq/vKfE1QAAulNo0P6GpEds32j745IelvT17MoCgIFbNYSn9ss3f8oYSdJzm+jTBoByVOisI5+0fa+ks9JFfxIRT2RXFgAM3Kp9dZKG7owjOfOPSka0n9vEiDYAlKNCR7QlqUHSnoj4F0kbbM/OqCYAGLCIGNI3q8m3IB3Rfv5lRrQBoBwVFLTTdpG/k/ShdNEISd/MqigAGKhtTa3a216txpoOTaxtL3U5mZp75GhVWVq9tUktbR2lLgcA0EWhI9q/L+kSSfskKSI2iluwAyhDq7YmN3A5ZlSLhvqMd3UjqjV74ih1hrRyCzeuAYByU2jQbo2IkBSSZHtUdiUBwMCt3rpP0tBvG8lZcFTugkj6tAGg3BQatL9r+0uSjrD9Z5J+LunL2ZUFAAOz+uCI9vAK2vRpA0D5KXTWkc/YfpOkPZKOk/SxiPhZppUBwACsHGZBm7m0AaB89Rm0bVdL+mlEnC+JcA2grOV6lY8dLkH7qENzaXMrdgAoL322jkREh6Rm22MHoR4AGLDm1nZt2LlfI9ypmUP4rpD5jh5bp8a6Gu3Y16qtTcPjnAGgUhTUOiKpRdLTtn+mdOYRSYqIv8ykKgAYgFVbkv89zWpo1Yj+3CWggtnWgilj9MjaHXp+015NbqwrdUkAgFShQfvH6RcAlK2VW5MLAueObilxJYNr/lGNSdB+eY/Onjep1OUAAFK9Bm3bMyJiXUR8bbAKAoCBWrE57c8ePQxaKO675eC383ePlzRNzy/9jdTx3Veud96HBAAojb4+XP1h7hvbP8i4FgA4LIcuhBxmI9qNyfk+t5e2EQAoJ30F7fzL1+dkWQgAHK5c0J47HEa08xyXtsqsbBqp1k5mHQGActFX0I4evgeAsnKgvUNrt+9TlaXZw2TGkZxRNZ2aWX9AbVGl1ftGlrocAECqr6B9ku09tvdKWpR+v8f2XtvcHQFA2Vi7rVmdIc2cMEp11cNvXID2EQAoP70G7YiojogxEdEYETXp97nHYwarSADoy4otyYwjx0waXeJKSmNB435J0nN760tcCQAgZ5jMNAtgqMvNODL3yOEZtBeOSUa0n9nDiDYAlItMg7btC20vt73S9vXdPH+27cdtt9v+wy7Pddhemn7dlWWdACrfyq1p0J48TIN2OqK9bE+9Yvh1zgBAWSr0hjX9Zrta0m2S3iRpg6RHbd8VEc/mrbZO0lWS/qabXeyPiJOzqg/A0LIyN4f25NHS7hIXUwJH1bVp/Ih27Wir0UstIzStvq3UJQHAsJfliPYZklZGxOqIaJX0HUmX5q8QEWsj4ilJnRnWAWCIa+/o1OptSdAerj3atrRwzKFRbQBA6WUZtKdKWp/3eEO6rFB1tpfY/q3ttxa3NABDybodzWrrCE09ol6jRmb2QV3ZO76RoA0A5STLf5G6u2tCfzoHZ0TERttzJP3S9tMRseoVB7CvlnS1JM2YMWPglQKoaCu25LWNDGMHR7SZeQQAykKWI9obJE3PezxN0sZCN46Ijel/V0u6X9Ip3axze0QsjojFkyZNOrxqAVSsg3eEHOZB+4SDrSPMPAIA5SDLEe1HJc21PVvSS5LeKemyQja0PU5Sc0QcsD1R0uslfTqzSgFUtINBe5hO7Zczq6FVo6o79PKBWm07UK2JIzuk+27pe8PzPpR9cQAwDGU2oh0R7ZKulfRTSc9J+m5ELLN9s+1LJMn26bY3SHq7pC/ZXpZuvkDSEttPSrpP0qe6zFYCAAflblZz7OTGEldSWlWWFqR3iKR9BABKL9OrhiLiHkn3dFn2sbzvH1XSUtJ1u19LOjHL2gAMDR2dcXBEe7j3aEtJ+8iSXaO0bE+9zpnYVOpyAGBY486QACra+h3Namnr1JQxdRpbP6LU5ZTc8VwQCQBlg6ANoKI9/3LSNnLclOHdNpKzkCn+AKBsELQBVLQXNidBez5BW5I0d/QB1bpTa5tHam87/4sHgFLi/8IAKtrydER73pEEbUmqrQrNSy+IfG4v0/wBQCkRtAFUtOdf3iOJ1pF8C3Mzj9A+AgAlRdAGULFa2jq0dnuzqsyMI/lyN655hqANACVF0AZQsVZtbVJHZ2jWxFGqG1Fd6nLKxkKCNgCUhUzn0QaALOX6syvmQshC7tJYBMc37leNQyua6tTcbjXUxKAcFwDwSoxoA6hYyzdzIWR36qpD80a3qFPWM8ynDQAlQ9AGULEqbkR7EJ00tlmS9NTuhhJXAgDDF0EbQMViar+eLUr7tJ8kaANAyRC0AVSk3fvbtGl3i+pGVGnmhFGlLqfsLDo4ok3rCACUCkEbQEXK3RFy7uRGVVe5xNWUn3mjWzSyqlMv7h+pXa3MyAIApUDQBlCRaBvp3YiqQ/NpP8U0fwBQEgRtABWJCyH7tmgMF0QCQCkRtAFUpIMj2gTtHp00Nr0gkhFtACgJgjaAihMRev7lPZIY0e7NIqb4A4CSImgDqDgbdu7XnpZ2TRhVq8mNI0tdTtma1dCqxpoObT4wQi+3cCNgABhsBG0AFefZTclo9vFHj5HNjCM9qfKhPm3m0waAwUfQBlBxnt14KGijd4vGMvMIAJQKQRtAxTk4on0UQbsv3IodAEqHoA2g4uRGtBcyot2n3MwjS3c3qDNKXAwADDMEbQAVZXdzm17atV91I6o0e+LoUpdT9o6qa9NRda3a216tVfu4cBQABhNBG0BFybWNHDdlDLdeL9CpRyTtI4/tGlXiSgBgeCFoA6goyzbulkR/dn+cmvZpP76LPm0AGEwEbQAVJX9qPxTm1CP2SSJoA8BgI2gDqCgHp/ZjRLtgC8e0qLaqUyv31WlXa3WpywGAYYOgDaBiHGjv0MotTbKlBUdx6/VC1VaFFo1JZh95gmn+AGDQELQBVIwVm5vU3hmaPXGUGmq5pXh/5C6IpH0EAAYPQRtAxeBGNQNHnzYADD6CNoCKwa3XBy43or10d4M6uHENAAwKgjaAisHUfgM3eWS7ptW3al9HtZbvrSt1OQAwLBC0AVSEjs7QMy8lI9qLph1R4moq02m59hEuiASAQUHQBlARVm5p0v62Dk0bV6/xo2pLXU5FOnTjGu4QCQCDgaANoCI8tWGXJGnRtLElrqRynTYuvRX7Tka0AWAwELQBVISnX0r6s0+cStvIQM0fvV+jqjv04v6R2tzC9IgAkDWCNoCK8NSGJGgzoj1wNVXSaensIw/vpH0EALJG0AZQ9to6Og/OoX3C0QTtw3Hm+CZJ0iMEbQDIHEEbQNl7YfNetbZ3ataEBo1tGFHqcirameOSmUce2Tm6xJUAwNBH0AZQ9p5O20ZOZFq/w3bi2P0aWdWpF5rqtKO1utTlAMCQRtAGUPaeTIP2SfRnH7aRVaFT0mn+HqV9BAAyRdAGUPaefimZ2u/EqQTtYjhjfNI+wgWRAJAtgjaAstbS1qHlL++VLS0kaBfFoT5tgjYAZImgDaCsLX95r9o6QsdMGq3RI5n7uRhOGbtPNQ49u6dee9r4ZwAAssL/YQGUtYN3hGQ0u2gaakKLxjarU9Zj3I4dADJD0AZQ1h5flwTtU2Yw40gxnUH7CABkjqANoKw9vm6nJOmUGeNKXMnQkuvT/u0OgjYAZIWgDaBsbW86oBe3N6t+RLXmT2ksdTlDyunj9qnaoaf2NGhPS1upywGAIYmgDaBsPZG2jSyaNlY11fzvqphG13Tq5LHN6gjr4dU7Sl0OAAxJXMIPoGw9sZ62kSy9fkKTHts1Sv+7cpvedPyRxdnpfbcUtt55HyrO8QCgjDFEBKBsPf5iMqJ9KhdCZuINE/ZKkv535bYSVwIAQxNBG0BZau/o1JMbcjOOMKKdhZPH7ld9dadWbGnSlj0tpS4HAIYcgjaAsvTC5iY1t3Zo+vh6TWocWepyhqTaqtCZ45okSb9etb3E1QDA0JNp0LZ9oe3ltlfavr6b58+2/bjtdtt/2OW5K22vSL+uzLJOAOUnN63fqYxmZ+r1E5Kg/RDtIwBQdJkFbdvVkm6TdJGk4yW9y/bxXVZbJ+kqSXd22Xa8pI9LOlPSGZI+bpt/bYFhJDfjyCnT6c/O0uvHpyPaK7cpIkpcDQAMLVmOaJ8haWVErI6IVknfkXRp/goRsTYinpLU2WXb35X0s4jYERE7Jf1M0oUZ1gqgzDyRG9Geyd/YWZrf2KLxo2q1cXeL1mzbV+pyAGBIyTJoT5W0Pu/xhnRZ0ba1fbXtJbaXbN26dcCFAigv25sOaPW2faobUaX5U8aUupwhrcrS646ZIInZRwCg2LKcR9vdLCv0c8mCto2I2yXdLkmLFy/mM09giHh07aH+7NqaLuMBhc7TjIK94diJuvupTXpgxTZd8dpZpS4HAIaMLEe0N0ianvd4mqSNg7AtgAr3yJrkToWnzxpf4kqGh3OOmyQp6dM+0N5R4moAYOjIMmg/Kmmu7dm2ayW9U9JdBW77U0kX2B6XXgR5QboMwDDwyNpkqrkzZxO0B8NRY+s1f0qj9rV2aEn6aQIA4PBlFrQjol3StUoC8nOSvhsRy2zfbPsSSbJ9uu0Nkt4u6Uu2l6Xb7pD090rC+qOSbk6XARji9ra06dmNe1RTZW5UM4jOPW6yJOn+5VtKXAkADB2ZzqMdEfdExLyIOCYiPpku+1hE3JV+/2hETIuIURExISIW5m17R0Qcm379R5Z1Aigfj724U50hnThtrOprq0tdzrBxbto+ct9yLiwHgGLhzpAAykquP/sM2kYG1Wkzx6lxZI1WbmnS+h3NpS4HAIYEgjaAspIL2vRnD64R1VV6w9yJkqT7X2BUGwCKgaANoGy0tHXoqQ27ZUunzSRoD7Zc+8iv6NMGgKIgaAMoG0vX71JrR6fmTxmjsfUjSl3OsHPOvOSCyP9duV0tbUzzBwCHi6ANoGzQNlJaU8bWacFRY7S/rePgzwIAMHAEbQBlI3cL8NfMIWiXyhvnJ+0jP39uc4krAYDKR9AGUBb2t3boiXW7ZEuvnTOx1OUMWxccP0WS9D/LNisiSlwNAFQ2gjaAsvDo2h1q7ejUiVPHamwD/dmlcuLUsZoy5v9v787j5KrqvI9/zq2tu3rfs3T2BLKRQBYEwiIKgiKi4gYOg44IzIPLjM5rRJ95ZnyNw0uY0ZEHFt0AACAASURBVEEUxgXwQXEQeYgiI0ggEDZZExISQpbO2ul0p/d9qfU8f1R1p7M0dEJX366q7/v1qtc9d61f56bq/urcc8/J4VDXAFsOdrodjohIWlOiLSITwmCzkXPmqDbbTY5juHhhFZCo1RYRkZOnRFtEJoS/7E4k2ufOVaLttg8tSibabx9yORIRkfSmRFtEXNfeG2ZrfRd+r8OKmSVuh5P13jerjIIcLzsbe9jb0ut2OCIiaUuJtoi47uU9rVgLy6eXkOPzuB1O1vN7HT4wP9Gn9lOq1RYROWlKtEXEdYPtsweHABf3De99RERETo4SbRFx3Uu7WwE4Z06Zy5HIoAtOrcDvcdhQ205zd8jtcERE0pISbRFx1YG2Pva29FKQ4+W0qUVuhyNJ+QEv584rx1p44q0Gt8MREUlLXrcDEJHs9uzOZgDOm1eO16Pf/hPJR5dM5pntTfzP5gauOXsmrPu+2yGJiKQVXdVExFXPbm8C4P2nVLociRzt4oVV+L0Or+9r41DngNvhiIikHdVoi4hrBiKxof6zLzi1wuVostgINdUFwIWlM1jTVMRjv7+fL80c16hERNKearRFxDWv7W1jIBJn0ZRCqgpz3A5HjuOjkzoA+NMhtZ8XETlRSrRFxDXrdiSbjag2e8L6YEUXuZ44GzvzqOv3uR2OiEhaUaItIq55dkfiQcgLT1X77Ikq6LV8oKILgMcOFbscjYhIelGiLSKu2NfSy96WXopyfZw+TQncRHZ5svnIow06TyIiJ0KJtoi44tlksxF16zfxvb+8mwJvjK3dudT0BNwOR0QkbejqJiKueGpbYmjvD8xXs5GJLsdjhx6KfPhgicvRiIikDyXaIjLuOvrCvLKnDY9jlGiniSuntAPwSEMJMetyMCIiaUKJtoiMu2e2NxGLW86aXUpx0O92ODIKy4v7mBkM0Rjy8WJrvtvhiIikBSXaIjLu1mw9BMAliya5HImMljHwyWSt9sMHS12ORkQkPSjRFpFx1R+O8dzORLd+Fy+scjkaORGDifaTTYV0RXT5EBF5N/qmFJFx9UJNMwOROEuri5hclOt2OHICqnMjnF3aQyjuqE9tEZFRUKItIuNqzdZEbyMfUrORtDT4UOTv1PuIiMi7UqItIuMmEovz9PZEon3JIjUbSUeXTeqgwBtjU2ceW7ty3A5HRGRCU6ItIuPmpd2tdPRFmFORx5wK9VyRjnI9dqhW+4G6MpejERGZ2JRoi8i4eXRTPQAfWzoVY4zL0cjJunpaKwCP1BfTE9VlRERkJPqGFJFxMRCJ8WSyW7/Ll052ORp5L07JD3FmSQ+9MQ9/bNBDkSIiI1GiLSLj4rmdzXSHoiyeWshsNRtJe1dXtwHwwIEyrEaKFBE5LiXaIjIu/ufNRLORy5dMcTkSGQuXVnVS4ouytTuXTZ3qplFE5HiUaItIyvWGoqzdluht5KNLlWhnghyP5TNTE7Xa99WWuxyNiMjEpERbRFJu7bZGBiJxVswoYWqxaj8zxTXTW3GwPHaomIYBn9vhiIhMOEq0RSTlVr9xEICPna7a7ExSnRvhw5M6iVrDr2rV1Z+IyNGUaItISjV09vNCTTN+j8PH1Gwk41w3oxmABw6U0quu/kREjqBvRRFJqd+/cRBr4eKFVRQH/W6HI2PsjOJ+lhX30hX1srpew7KLiAynRFtEUsZay/9bfwCAT62odjkaSZXrZrQA8H/3lxNXV38iIkOUaItIyqzf386+1j6qCgOcP6/C7XAkRT5U2Ul1bpi9fQGebCp0OxwRkQlDibaIpMxgbfYnl1XjcTTkeqbyOnD9zERb7Z/srtIANiIiSUq0RSQlekJRHtvcAMCnlqvZSKb7zNQ2KvwRtnbn8mxLgdvhiIhMCEq0RSQl/vBGHb3hGGfOLGWOhlzPeDkey/WzErXaP95dqVptERGUaItIClhruf+V/QBcc/YMl6OR8fL56lZKfFE2dubxclue2+GIiLhOibaIjLlX97axs7GHioIAlyya5HY4Mk6CXst1MwdrtatcjkZExH1KtEVkzA3WZl+1chp+r75mssk101op9EZ5pT2fv7SqyZCIZDddAUVkTDV1DbDmrUN4HMNV75vudjgyzgp9cW5IttX+952T1FZbRLKaEm0RGVO/ebWWaNxy0YJKJhfluh2OuOCL01so90d4syvIGvWrLSJZTIm2iIyZ/nCM+1/eB8AXV81yNRZxT9Br+fqcJgD+o2YS0bjLAYmIuCSlibYx5lJjzA5jzC5jzM3HWR8wxvwuuf5VY8zM5PKZxph+Y8ym5OtnqYxTRMbGwxsO0N4XYWl1Ee+bVep2OOKiz1a3MT03xO7eHH5fX+J2OCIirkhZom2M8QB3AR8GFgJXGWMWHrXZl4B2a+1c4HbgtmHrdltrT0++bkxVnCIyNmJxyz0v7gXg+vPnYIxGgsxmfsfyzbmNAPxw1yR6o7qBKiLZJ5XffGcCu6y1e6y1YeBB4IqjtrkC+FWy/DDwQaOrs0haWrP1EPtb+5heGuTSxerST+DyyR2cVthHY8jHT/dWuB2OiMi4S2WiPRU4MGy+LrnsuNtYa6NAJ1CWXDfLGLPRGPOcMea8FMYpIu+RtZafP78HgOvOm4XH0e9lAcfAd+fXA/CLfRXU9vldjkhEZHylMtE+3pX26I6eRtqmAZhurT0D+AbwgDHmmEfXjTHXG2PWG2PWNzc3v+eAReTkPF/TwpsHOijN8/Pp5dPcDkcmkOUlfXxicjvhuMMtOya7HY6IyLhKZaJdBwy/4lYD9SNtY4zxAkVAm7U2ZK1tBbDWbgB2A6cc/QbW2l9Ya1dYa1dUVOi2pIgbrLX8aO1OAG44fza5fo/LEclEc/MpDQQ9MdY0FWkQGxHJKqlMtF8H5hljZhlj/MDngEeP2uZR4Npk+VPAM9Zaa4ypSD5MiTFmNjAP2JPCWEXkJD23s5mNtR2U5fm55uwZbocjE1BVTpSbZie6+/s/26YQiqtpkYhkh5Ql2sk2118B1gDbgIestVuNMf9qjPlYcrN7gTJjzC4STUQGuwA8H9hsjHmTxEOSN1pr21IVq4icnERtdg0AN1wwm6Df63JEMlFdN7OFOXkD7OnN4a49lW6HIyIyLlJ6VbTWPg48ftSyfx5WHgA+fZz9VgOrUxmbiLx3z+5sZtOBRG32X52l2mwZWcCx3Lqojk+/Npef7qngo43dnFJV4HZYIiIppY5NReSkxOKW2/68HYAbL5ij2mx5VytL+ri6upWIdbh59Wbi8aOfjxcRySy6MorISVm9oY7th7qZWpyrttkyajef0sDa5kLeqO3g17++my/MaB154wu/PX6BiYikgGq0ReSE9YWj/PCpHQD846WnkuNTTyMyOoW+ON9bcBCA7++czK6egMsRiYikjmq0ReSE3fPCXhq7QiypLuLyJVPG5qDrvj82x5EJ75KqLj45pY3f15fy91umsfp9u/E7akYiIplHNdoickIaOvv52XO7AfjORxbgaBRIOQnfnV/P1JwwW7qC/Hi3eiERkcykRFtETsi//WkbfeEYlyyq4qzZZW6HI2mq0Bfn9iW1GCz/taeS19uDbockIjLmlGiLyKg9t7OZx7Y0kOvz8C+XL3I7HElzZ5b0ccOsZuIYvrZ5Oq1htfUXkcyiRFtERmUgEuNf/vgWAF+/aB5TinNdjkgywTfnHmJ5cS8NA36+vnk6MTXVFpEMoochRWRUfvrsbva19jGvMp+/WTUrsXC0DzCqmzYZgc+Bu5bu57KXT+HF1gLu2FXFN+Y1uh2WiMiYUI22iLyrrfWd3LVuFwDf+/hi/F59dcjYmZQT5cdLanGw/HhPFc80a8RIEckMulqKyDsKR+N886E3icYt1549Qw9ASkqsKuvhG3MTNdlfe3M6O7rVv7aIpD8l2iLyjn7yTA3bD3UzvTTItz483+1wJIP9r9lNXDapg56Yh795YxbN3SG3QxIReU+UaIvIiDbWtvNfz+7GGPjBp5cS9OuxDkkdx8APFx9gaVEfBwf8XH//egYiMbfDEhE5aUq0ReS4OvsjfPW3G4nFLV9aNYszZ5W6HZJkgRyP5e4z9jE1J8zG2g6+8dAmYnF1RSIi6UmJtogcw1rLzas3U9fez5LqIv7xUjUZkfFTGYhyz7J9FAS8PL7lEP/7D1uwVsm2iKQfJdoicozfvLKfP791iIKAl59cdYZ6GZFxt6BggF9+cSU5PocHXz/ArX/ermRbRNKOGlyKyBE27G/je3/aBsCtVy5hRlneez/oaPvbFhlm5cxSfvr55Xz51+v5+fN7yA94+eoH553cwUbzf1D9vYvIGFM1lYgMOdjRzw33byAci/OFc2Zy2ZLJbockWe7C+ZXc/tnTMQZ++NROfrR2p2q2RSRtKNEWEQD6wlG+/Kv1tPSEOXduOf902QK3QxIB4PKlU/jPzyzFMfCjtTX84MkdSrZFJC0o0RYRYnHL3/9uE283dDGzLMidV5+B16OvB5k4PnFGNXd87gw8juGudbu55bFtxNUbiYhMcLqSimQ5ay3/9MhbrNnaSEGOl3uuXUFx0O92WCLHuHzpFO66ehk+j+GeF/fyjYc2EY7G3Q5LRGRESrRFstztT+3kt6/VEvA6/PILK5lbWeB2SCIjunTxJO65diV5fg+PbKrn2l++Rmd/xO2wRESOS4m2SBa798W9/PiZXXgcw51XL2PlTA1KIxPfBadU8LsbzqaiIMDLe1r5zM9epq69z+2wRESOoe79RLLUPS/s4d8eS3Tj9/1PnMbFC6tcjkjkKO/QJd9i4A9n+PjC9pXsaOzm8p+8yJ1XL2PV3PLxi09E5F2oRlskC/3sud1DSfYtn1jMZ1ZOczkikRNXnRth9Y3n8P5TK2jvi3DNva9y9/N71COJiEwYSrRFsoi1ltuf2smtf96OMXDblafx+ffNcDsskZNWFPRx77Ur+eoH5hK3cMvj27jpgTfo7FO7bRFxnxJtkSwRicX51urN3PF0DY6Bf79yCZ9dOd3tsETeM49j+OaHTuXn1ywnP+Dl8S2H+PAdz/Pqnla3QxORLKdEWyQL9IaiXPer9Ty0vo4cn8PPr1nBp1eouYhklksWTeKxr53L0mnF1HcOcNXdr/CDNTuIxNQFoIi4Qw9DimS4/a293HD/BrYf6qY0z889i99mWeMmaHyXHS/89rjEJzKWZpTl8fCNZ3PH2hruenYXd67bxdptjfz7zFyWFPW7HZ6IZBnVaItksHU7mrj8Jy+y/VA3s8vzWP2357CsWN2gSWbzeRz+4ZJT+e2Xz2JaaS7bD3Xz8VfmcsuOyfRFjdvhiUgWUaItkoGisTi3P7WTv7nvdboGoly8sIpHvrKKWeV5bocmMm7Oml3Gmr87n+vPnw3A3fsquOSlU3iisRB1TCIi40FNR0Qyybrvs7/Pz99vmcYbHXkYLN+c28hN1ZtxXn7K7ehExl3Q7+U7H1nAR8Nr+NbWarZ153LjppmcXdrDP8+vZ0HBgNshikgGU422SIaw1vLQwRI+8tI83ujIY1IgzH+v2MNX5zTh6G65ZLklRf38z1k1/OuCgxT7orzcls9lL83jO1uncmhAdU4ikhpKtEUywN6WXv7q3lf5x7em0RvzcFlVB0+cU8M5Zb1uhyYyYXgd+OvprTx77g6+ML0FY+CBujLOf2E+39s+mZaekNshikiGUaItksbC0Th3PlPDJT96nr/saqXYF+U/T6vlzqW1FPtjbocnMiEV+2N8d0E9a87ZyUeqOgjHHe7dX8F5t63j1j9vp7lbCbeIjA3dLxNJQ9ZannjrELc+sZ39rYleRK5cVs13itZQpgRbZFTm5of4r9Nr2drVxO27JrG2uZCfPbebX/5lL588YyrXnTebuZX5bocpImlMibZImnmjtp1bHtvGhv3tAMypyON7VyzmnLnlsO5xl6MTST+LCge4Z9k+Ns39W+5K9rv94OsHePD1A1y0oJIvrprF2bPLcPSwg4icICXaImliw/527nymhnU7mgEoy/PzdxefwlUrp+H1qBWYyHt1+rRi7v7rFexu7uHeF/fy8IY61m5rYu22JmaWBbnqzOl8ank1ZfkBt0MVkTRhbIZ0JrpixQq7fv16t8OQiWzd98fuWOM0aqK1llf2tHHnuhr+sqsVgFxPnC9Ob+FvZzdR4E3h0NKj+RvH8t9UxG1H/Z9v6Qnx36/U8uDrtTR0JroB9HscPrSoik8um8p58yrw6UeuSFYyxmyw1q54t+1Uoy0yAQ1EYjy6qZ77XtrH2w1dAOR7Ylw7o4UvzWihVO2wRVKuPD/A1y+ax00XzuHZHc088Fotz+5o4k+bG/jT5gZKgj4uWzKZK06fyvLpJWpaIiLHUKItMoHsbenlofUHePC1Wtr7IkCiicg1Z8/gi/FHKPIpwRYZb16Pw0ULq7hoYRUHO/p5ZONBHtl4kJqmHn7zSi2/eaWWqcW5XLywiosXVnHmrFLVdIsIoKYjkk0maDOHzrP+gT9trmf1hjreqO0YWn7a1CK+cM5MLlsymRyfZ8LGL5IxTqBJmLWWbQ3d/HHTQR59s36oaQlAYY6XD8yv5OKFkzh3XjlFub6RDzSaz/VYNlUb7ffIODWPE0lXajoiMoF1RhzWNRfyRGMRzzy9lnA00dY6z+/hw6dN5qozp7NsejHG6Fa0yERkjGHhlEIWTinkW5fOZ1NdB09ubeSptw+xu7mXRzbV88imehwDS6qLOXduOavmlrNsRjEBr8ft8EVknCjRFhknjQNenmku5ImmQl5qzSdiE7eWDTFWlfZw5dR2Lq3sJOjdCHtIvERk/JzkXSMHWAYsy4Gbl8GeXj9PFXycp7c3sbG2nU0HOth0oIM71+0i1+dhxcwSVs4sZcWMEk6PGoLezLizLCLHUqItkiL9McOrbXm80FrAC60F7OzJGVrnYDmrpIdLqzq5pKqLyTkRFyMVkbE0Oy/MDRfM4YYL5tAbivLa3jZe3NXCizUt7Gjs5oWaFl6oaQHAYxazsKCf5cV9rCjpZUlhP9Nyw+hmlkhmUKItMkY6Ix7e6AiyviPI+vY8NnYECdvDD0QFPTHOKu3lkspOLqrs0giOIlkgL+DlwvmVXDi/EoCmrgFe3dvGhv3trN/fxrb6TrZ0BdnSFeS+2nIACr1RFhf2s7hwgEWF/Sxu7mFWWZ56NRFJQ0q005keanFNKG6o6clha1cOmzqDbOjIO6LGGsBgWVLYx3nl3ZxX1sOy4j78jm4Ri2SzysIcLl86hcuXTgGg96nb2NQZZH17kI2dQd7qyqUl7OOltgJeaitI7LT5OYJ+D3Mr85lbmc+8ygLmVeYzryqf6pIgHiXgIhOWEm2Rd9EW9rCrN8DbXbls7c5la1cuNT2BoTbWg/wmzpKifpYX97K8pI8Vxb3q71pE3lGeN86qsh5WlfUAYC00hby81ZWbeHXnsjVURX3nAJvrOtlc13nE/gGvw5yKfGaV5zGjLMiMsiDTSxPlSYU5qgUXcZkS7WwwUbuPGo1xqo2PxqF+wM/u3gC7ewPs6gmwuzeHXb0B2iPHfkwMltnBEAsL+zmtsJ8Vxb0sLuonoBprEXkPjIGqnChVOd18sLI7sfDCq+noC7OrqYedjT3UNHUny900doV4u6FraGCr4fxeh+mlQWaUBqkuyWVycS6T64uZnBtmck6ESYEIPnX3LZJSSrQlK0RjcZr6fdQN+DjQ56eu38+B/sPTQyEfMXv8mp88T4w5eSHmFwywqKCfRYX9LCgYIC+Vw5+LiAxTHPSzYmYpK2aWHrG8sz/CrqYe9rf2sr+1j9q2Pva39lLb1kdLTyI539XUM2yP6UMlg6UiEGVyIJJIvHMilPujVAQilG9rpDw/QHlBgPJ8v7okFDlJSrQlrfWHY7T3hWnuDtHUHaKxa4CmroHD5e4QjV0hWntDWLtgxOMYLJMCYWbnhZiTF2JuXog5+YnypEBEPQCIyIRUlOtj+YwSls8oOWZdTyhKbWsftW291LX309A5wKGajdQP+GgY8NEUOvx68+gK8a1HDgBXmONNJt0BKvIDlOT5KM71Uxz0UZTroySYKCfmE2WNjimiRFsGufhgZdxCb8yhJ+qhO+rQHfXQHXHojHppv+8XtEe8tIc9tEW8dEQ8tIW9tEc8tIe9DMRH90VuDJT7I0zLDVOdOzhNvKblhpmSG1GzDxHJKPkB79CgOkPW/XGoGI1DUyiRdDeEfBwa8NES9tIS8tJScCrN3SFaekK09oTpGojSNRBlT3Pv6N/fE6PIF6O4tJyCHC/5AV9y6iU/OR2ar/kj+Z4YBd44+d4Y+clpjmOPrOjQw/2SZlKaaBtjLgXuADzAPdbaW49aHwB+DSwHWoHPWmv3Jdd9G/gSEAO+Zq1dk8pYZXTC0Tj9kRj94diwaZT+cGJ5XzjKQF0J/TGHvpjDQMyhP+7Qk0ygj54Oli0nV2XsN3FK/DFKS8uYVBigsiCHqsIAlYU5VBYEqCrMoaowh7J8P77nbxvjfw0RkfTldWBKboQpucfpx//Ca4aK8bilsz9CS0+I5u4QzT0hOvsjdGx9mo6Ih45kJcjQK5yY74klXgfrj20/fqyZI67J9cTJdeKJ6cZEDyw5Pg9Bv4dcX/KVLAf9HnKGlQNeD36vQ8DrJKdHzvs9DgGfQ8DjIeBLzOsBUhlLKUu0jTEe4C7gYqAOeN0Y86i19u1hm30JaLfWzjXGfA64DfisMWYh8DlgETAFWGuMOcVamzFdOFhricUt0eQrFrNE4/HDy46aj8UtkdiR89GWfGLWEIkbYtYQtSTmk8vC8cGpQzhuCCWXha0zbF1i/dC65HzEDp83hOIO/TGH6Jo/j+Kvm3bC/x7BYTUZBd5EudAbo9QfpcQXo+Q401JfjKAnfvxmHf3JV9MJhyIi8t6N5UPhY3msk+A4hpI8PyV5fuZVFRxeMTDyF2zcQk/UoTPioWPpl+kORegZiNITSry6B8vJaXfd2/REPfQk724OVsYMXnv6Yw5EgIGeEd9zrPhMHL9jCTgWf27+EYm51+Pgcwxej8HncfA6JrHMY/A6TmL54HRw+dA+R64fvnxwf49j8DgGr2NwHIPHGBwHPMYMrfM4BsccWfYOlo/ax+s4ifLwfYzRj4lxlMoa7TOBXdbaPQDGmAeBK4DhifYVwHeT5YeBO40xJrn8QWttCNhrjNmVPN7LI73ZwY5+vv37LVhriVtLLM5QOW5JTi3xOMcuswzbb/g8xOJ2qDy0X9wO7WstxIYde2g/e+R+sbg9IkmOxceimcLsMTjGifEYS9ATJ8eJE/QkahhyPINle7g8WPuQXB70xCgcTKR9h5PqQm+cPE8Mr5ryiYhkDMdAoS9OoS/OtOqid99h3fErceKWZKJt6I879C+/gf5wjL5wjIFIYpq4uxpN3lVNzA8ktwlF44SjcULRGOFYnFAkfnja3TJUCRUaqnByiFiHSAx6Y0BkYGz/YSYIYxhKuAeTeMccTsiNMRiTOI+OGVxGctmweY6cHz51DMOOc3jeMWBI/BgYei8Ov9fQNse8txm2TXKa/FsMh+ODw7EdsY7EMTjucobi4Hjrhs2TjGO0UploTwUODJuvA9430jbW2qgxphMoSy5/5ah9p77Tm7X1hvnta7XvNeZxNfyXq8dJ/PodPj/4S9l71HaDv5o9HfvwGovHWLwGvI7FayyOSfwS9xmL3xl8xYfKAcfiO2qZ3wwrj7BPrieuAVdERGTcOCbR13ieFyAGw2vU36vj3CmwFsLJO7qhmCF81leTiXoiYY/E44k7zrE4kXhyGkvccY7GE+XBO9KR5HbR5B3paMwed//E8sPbxpOVcsMr6YbKFmLxOLF4otIvZu3QNBobXmF4eN/Y0HaJSsBEBSJErU38kpGUSmWifbx8/+gzOtI2o9kXY8z1wPXJ2Z79t310xwlFOLGVAy1uByHjSuc8++icZ580OeffyYJjjca3xuIgaXLO5QTNGM1GqUy06ziysW41UD/CNnXGGC9QBLSNcl+stb8AfjGGMU8Yxpj11toVbsch40fnPPvonGcfnfPso3Oe3VLZMvZ1YJ4xZpYxxk/i4cZHj9rmUeDaZPlTwDPWWptc/jljTMAYMwuYB7yWwlhFRERERMZUymq0k22uvwKsIdG93y+ttVuNMf8KrLfWPgrcC9yffNixjUQyTnK7h0g8OBkFbsqkHkdEREREJPOZRAWyTDTGmOuTTWMkS+icZx+d8+yjc559dM6zmxJtEREREZEUUO/FIiIiIiIpoER7AjLGXGqM2WGM2WWMudnteCT1jDH7jDFbjDGbjDHr3Y5Hxp4x5pfGmCZjzFvDlpUaY54yxtQkpyVuxihja4Rz/l1jzMHkZ32TMeYjbsYoY8cYM80Ys84Ys80Ys9UY8/Xkcn3Os5gS7Qlm2ND1HwYWAlclh6SXzHehtfZ0dQOVse4DLj1q2c3A09baecDTyXnJHPdx7DkHuD35WT/dWvv4OMckqRMFvmmtXQCcBdyUvH7rc57FlGhPPEND11trw8Dg0PUiksastc+T6F1puCuAXyXLvwI+Pq5BSUqNcM4lQ1lrG6y1byTL3cA2EqNa63OexZRoTzzHG7r+HYefl4xggSeNMRuSI55Kdqiy1jZA4iINVLocj4yPrxhjNieblqgZQQYyxswEzgBeRZ/zrKZEe+IZ1fDzknFWWWuXkWgydJMx5ny3AxKRlPgpMAc4HWgAfuhuODLWjDH5wGrg76y1XW7HI+5Soj3xjGr4ecks1tr65LQJ+AOJJkSS+RqNMZMBktMml+ORFLPWNlprY9baOHA3+qxnFGOMj0SS/d/W2t8nF+tznsWUaE88oxm6XjKIMSbPGFMwWAY+BLz1zntJhngUuDZZvhb4o4uxyDgYTLiSPoE+6xnDGGNIjHi9zVr7n8NW6XOexTRgzQSU7O7pRxweuv4Wl0OSFDLGAHMwJgAAAdlJREFUzCZRiw3gBR7QOc88xpjfAu8HyoFG4F+AR4CHgOlALfBpa60enssQI5zz95NoNmKBfcANg+13Jb0ZY84FXgC2APHk4u+QaKetz3mWUqItIiIiIpICajoiIiIiIpICSrRFRERERFJAibaIiIiISAoo0RYRERERSQEl2iIiIiIiKeB1OwARERk7xpgy4Onk7CQgBjQn5/ustee4EpiISBZS934iIhnKGPNdoMda+wO3YxERyUZqOiIikiWMMT3J6fuNMc8ZYx4yxuw0xtxqjPm8MeY1Y8wWY8yc5HYVxpjVxpjXk69V7v4FIiLpRYm2iEh2Wgp8HTgNuAY4xVp7JnAP8NXkNncAt1trVwJXJteJiMgoqY22iEh2en1w6G9jzG7gyeTyLcCFyfJFwEJjzOA+hcaYAmtt97hGKiKSppRoi4hkp9CwcnzYfJzD1wYHONta2z+egYmIZAo1HRERkZE8CXxlcMYYc7qLsYiIpB0l2iIiMpKvASuMMZuNMW8DN7odkIhIOlH3fiIiIiIiKaAabRERERGRFFCiLSIiIiKSAkq0RURERERSQIm2iIiIiEgKKNEWEREREUkBJdoiIiIiIimgRFtEREREJAWUaIuIiIiIpMD/B8bhWip+VqJtAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell0\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution8(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "foldcauchy(c=12.34, loc=-0.05, scale=0.73)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtQAAAH/CAYAAAB3rkqYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmcXGWd9v/r6u4sQMKaRJYAAUF+BIWIAReMAg/MoKMwPsrIIqKPijyaQQfnUUDHAWYUxgXUgRnEZVQUFdyGYVAEFQR1lKBBloAEjBADJATIvnXX9/fHOdV9urq6u5I6J9195/N+vfrVdZY6566qE7j6ru+5b0eEAAAAAGyZjpFuAAAAADCWEagBAACANhCoAQAAgDYQqAEAAIA2EKgBAACANhCoAQAAgDYQqAGUwvY/2L5qpNuxNdk+zvaiko855Pto+522b9uM4y22fXQZbdtcti+1vTxvw/62V7d5vN7XUub1ZrvT9mrb++TLX7d9YRnHzo/3RdsXlHU8AKMPgRrAAHm4qP/UbK8rLJ/e7DkR8U8RcXbJ7djiwGp7vO21tg8vrDvTdjRZd18JzS1F8X20fYDtUTdZgO1/tv2VYfbZT9I5kg6KiOkR8WhETCpsv9P227a0Da1eb62cJyJ6ImJSRDy2pe0pnG/AHzwR8c6I+Hi7xwYwehGoAQyQh4tJeQB6TNLrC+u+0bi/7a6t38qhRcRGSb+W9OrC6ldJerDJup9v7vFH42seZfaVtDQinh7phgyFzxFAGQjUADZb3kP5bdvftL1K0luKvZb1nlXb77K9JP/5u8LzX2b7t7ZX2n7K9iebnGMnSf8laZ9C7/g02xNtf872E7b/bPsy2+MHaerPlQXmujmS/qXJup/n5xz02PXectsX2H5S0heatPnvbN9ne88m2xbbPix//Lb8/XlBvny27e8U3tuvFNpf/MbgiL7D+XLbz9l+1PZfDPL6615qe4HtZ21/yfaEQrtOtH1Pfqw7bb+wsO2C/LNbaftB20fbfp2kD0o6PW/T3U1e6wmSfqi+z+6Lxd522/8i6eWSrsq3f6ZZo/P36U+2n7Z9XsO24vW2ve1rnZWXPGf7N7anNDuP7a78vX+P7YWSHiysm1E4xVTbP7G9yvbPbO+dn2vAtwb1XnDbL5J0haQ5+fmezrf3KyHJP++FeXt/YHuPfH29He/Otz9r+3ODf6wARgsCNYAt9QZJ10raSdK3B9nnVZIOkPQaSR9xXy3vv0r6ZETsmG//TuMTI2KFpNdLeqzQO75U0kclzZZ0qKQXSzpK0vmDnP/nkl7pzO6SuvJzvbyw7kD19VAPd+zpkiZJ2kfSe4onsn2RpNMlHR0RSwZpS/31v0rSo+rrKX+VpNubPOdV+XtRf/135etfIeleSbtJulzSlwZ5/XWnSzo+f62H1F9THtC/IOmd+bG+LOk/nZXLHCLp3ZIOzz+n1yj7LG6U9AlJ38jb9JLGk0XEj9T/s3tnw/YPSfqVpLPz7e9vPEYhnJ4maS9Je0rafZDX93ZJ2yv7fHZT9tmsH+Y8J0o6QtKLBjnmW5RdD1MkPSDpmkH2K76ueyXNlXRHfr4pTV7XX0i6WNKb8te1RFLjtz6vlfQSZdfgW2wfN9y5AYwsAjWALXVnRPxXRNQiYt0g+1wUEWsj4h5JX5V0ar5+k6QDbe8WEasi4tebcd7TJV0YEcvygH2xpDMG2fdXygL/TGU90XdExGpJiwvrFhYC8HDH7s63byy8Ztv+rKRjJB07RInD7eoL0HMkXVJYfrWaB+rBPBIRX46IHmXv63TbA8JbweciYnHeto+r73M4S9K/RcRdeR3xl/P1R+SvdaKkQ2x3RcQfI+LRzWhju06W9IOI+EVEbJB0gSQPsu8mZcH3gPx1zMs/56F8PCKeHeLa/a+Gc7+q3pPcptMlfTEi5kfEeknnSXq17emFfS6JiBURsUjSbZJmlXBeABUiUAPYUo9v5j5/UtbLKGU9ijMlPZR/Pf/azTjvHvmxisfdq9mOEbFW0jxlPb2vknRHvunOwrpi/fRwx34qr80u2k1ZD+8/R8TKIdp9u7JQtpeysPodZaUBBygLrvcO8dxGTxYer81/T2q2Y26wz2FfSR/KyySes/2csvdgr4h4SNIHlP1RsdRZeU/THuK8FKRelnLPZryOoexZbHcekJ8ZZN+vSLpV0nXOSnUu9fC10cNdv8Vzr5C0Qn3vWzv2VOEay6+ZZ9X/Omv8fIf6bAGMAgRqAFuqldEn9i483kfZ19uKiIci4hRJ0yR9WtJ3bU9s8RxPKAuCxeP+eYg21Ouo56gvUN9RWFcM1MMdu1l7nlZWPvB12y8brBER8aCyIP1eSbdHxHPKAuL/UdZz3uzYZY3w0fRzUBYaL4qInQs/20fEdXmbvx4RR0naT1Knsl71Ae2KiNsKZSmHtdim4V7bE8V2254kademB8q+MbgwIg6W9Epl5Uj10WgGO89w5y+eeydl33QskbQmX7d9Yd/iHxrDHXeJCteY7cmSdtHQ1zCAUY5ADaBK/2B7u7we9kzltda2z7A9JSJqynr+QlKtyfOfkjQlDx1135T00fyms6mS/kHS14dow88lHSfpeXmvq5T1UB+nrH62GKg399iSpIj4iaS3Kqs/nj1MW+aqr7zjtoblRkslhe39h2vDMOba3sv2bsrqp+s171dLeq/tI/Ka8km2X297B9sH2z7G2Q2M6/Kfnvx5T0maYXuwEoxWPCVpqNd1vaSTbL88b8M/a5CwavtY2y+03SFppbISkGJbt+T9e33Due+MiCeU9R4/qay2udP2Wer/R9hTykpwxg1y3G9KeoftQ/NjX6LsD6rFW9BGAKMEgRpAle5UdvPdj5XVhf40X/9aSQucjRDyKUlvblJKoYi4T9J3JS3KSxKmSbpI0j3KSiR+r2xovEsan9vQhl2U1VPXj/uUsq/Zl0TEHwv7bu6xi239kaR3SbrR9mA1r7dLmqy+EN+43HjMVfn5f52//qHC+lC+qawk4hFJDymro1Zeu/5/Jf27svfjD8puxpOkCcpuPnxaWYDcRdJH8m3fljRe0jO2f7OFbfqMpFPz13VZ48aI+L2k90m6TlnvbT3INrOnpO8pC9P356/1m62cZwhfVxakn1Z2k+oZebtC2ed8Qb7tAGXXSd0tkh6W9JSz0WAaX9ePlJXRfF9ZL/w+6utNBzBGufm3jACw5fK64Icjop0eTAAAxgR6qAEAAIA2EKgBAACANlDyAQAAALSBHmoAAACgDQRqIFG2D7L9O9urbJ8zzL5vs33nENtvs/3OwbZXKZ80ZIuHFLM90/a8Mts0Ftj+iu1/Hul2pMb2orE6Fbjty2yfPdLtAFJEoAbS9UFJt0XE5Ij43Eg3ZgT9k7Kh+Uphew/bN9heYjtsz2jY/inbD+d/yDxo+61DHOsY2/fmQ7ott/39fCbFxv12tb1sqD96Rhvbs2zfbXtt/nvQ6bPz1/d922ts/8n2aYVtR9uuFWZiXG37zK3zKraOzXyvVjf89Nj+13zbTNvzbD+b/9xqe2bh6Z+U9GHb46t+TcC2hkANpGtfZWPybrNs7yHpGEk/KPGwNUk/kvTGQbavkfR6ZTPrnSnps7ZfMci+D0j6y4jYWdlYyg8rGxO60b9IWtBOo7emPLD9p7KxnHeR9FVlk94MFuSulLRR0vOUjcn877YPKWxfUpiJcVJEfLXC5m9Vm/teFd8HZe/XOmWT4EjZLIxvUjaj5BRJN0j6VuG5T0h6UNnMngBKRKAGEmT7p8qC5BV5L9YLbO9k+2t5T+efbH8kn1mu2fOPz3tXV9i+QpIbtr/L9oK8F/YB24fn68+z/Uhh/RsKz7nQ9tcLyzPyHt6ufHlX2/+R9/w+a/sHDef8gO2ltp+w/fZ83RG2n6ofI1/3Rtvz88XjJf02ItYXtu9t+3v5+7A8f30ti4inIuLfJN01yPZ/jIgHI6KWT5xyh6SXD3GsJYVVPcomCim+7pdLeqGk/9icdjbKP7OFtp/Je9j3LGw7xPYt+banbF/QzrkkHS2pS9JnImJD/g2JJR3bpF07KPvj5B8iYnVE3KksCJ7RZhvkbMbLG/NvAJ6xfUf9mh/sOrD9fNs/zdc9bfsbtnce5PgdhWt+ue3rbDedHn0IR6vF96qJNymbTfMOSYqI5yJiUT75jNXkelI2O+dfbWYbAQyDQA0kKCKOVfY/2bl5b9YfJP2rsl7T/SW9WtlU2W9vfK7tKcpmJ/yIsl6uRyQdVdh+sqQL8+fvqKy3a3m++RFJc/LzXCTp63kvcSuukbS9pEMkTZN0eWHb7vkx95L0DklX2t4lIu7Kz318Yd+35MeSsqnF69ONy3anpBsl/UnSjPx438q3vTIPXoP9vLLF19HL9naSjtAQ3xTY3sf2c8p6Gv9e2eyExfZeqWx68i0eksn2scpmXPwbSXsoe/311z1Z2cyCP1LWS36ApJ/k204b5j3ZZ5BTHiLp99F/GKnf5+sbvUBST36N1t3TsO+0POj/0fbleQhvxQckLZY0VVlv7gXKpnIf9DpQFkQvyd+LgyXtrex6b+YcSX+t7N/Tnspmm7yyvnGY9+68fLfNea8anSnpaw3PVX49rVf2b/7jDc9ZIOmwFo4NYDN0Db8LgLEuDxBvlvTifDrrVbY/rawX8EsNu79W0gMR8Z38uZ9RFkzq3inpE3mYlaSF9Q0RcX1hv2/bPl/Skcq+0h6qfXtIeo2k3SLi2Xz17YVdNkm6OCK6Jd1ke7WkgyT9j7KvyN8i6Yd57+BfSnpP/ryd1Rf2lbdlT0n/Lz+WlE1NrrxntGlPZBuuUhYObx5sh4h4TNLOedvfpewr+bpzJP06Iu62/aI22nG6pC9HxG8lKf9cnnVW//1ySU9GxKfzfdcrn0o7Iq6VdO0WnG+SpBUN61Yom2Z9c/d9UNKs/Pe+yj7vyyS9u4V2bFL2B8S+EbFQeU+u7aGug4Xqu6aXOZuu/B8HOf67lf3Rujg/7oWSHrN9RkR056U8w9mc96pX/sfMq5X9gdlPROyc/9FxprI/GopWqfzrHNjmEaiBbcMUSePV/3+uf1LWM9doT0mP1xciImw/Xti+t7Ke6AGc3YB3rrJePykLC1NaaN/ekp4phOlGywvBR5LW5seWstrTBbYnKeuBvSOvFZWyHsNiMNlb0p8ajlUJ259UVqpxTGMPYjMR8Yztr0q6x9mNidOUBeqXlNCcPSX9tnCu1baXK/v8B/08W5X/gVM3U9JqZd9eFO2oLMw1GnLfiHhS0pP5+j/a/qCk/1ZrgfqTynqXf2xbkq6OiEs1xHVge5qkzyn7pmWysm9yB7su95X0fdu1wroeZb3hf26hfdLmvVdFb5V0Z0T8sdnGiFhj+yplfxQcHBFL802TJT3XYtsAtIiSD2Db8LSy3rp9C+v2UfP/6T+hLHBIkpwlkb0L2x+X9PzGJ9neV9IXlJUn7Jb3zt2nvvrrNcpKOup2bzjmroPVqg4lIv4s6VeS3qCsx/2awubfKyspKJ5nn2LNdaH9czxwBIXiz5xW22T7ImU97n8RESs34+V0KQvSOyrrTd9D0gO2n5T0WUlH2n4y/8ZhcyxR4bPPey93U/b5N/088/1OH+Y92Ufqf6Nc3uN+v6RD82un7lA1L335g6Qu2wcW1h02yL5SVvriQbb13zFiVUR8ICL2V3aj6Lm2/5eGuA6UlXuEpEMjYkdl334Mdr7HJb0mInYu/EzMr8lmI3IUf+p16pvzXhW9VVlv/VA6lP2bK/7hfLCyb00AlIhADWwDIqJH0nWSPmZ7ch5+z1XWu9vovyUdYvt/54HjHPUPv1+U9Pe2X+LMAfnxdlAWRJZJkrMbB19YeN58Sa/Ka4Z3knR+oX1PSPqhpH+zvYvtcbZftRkv8WvKhgl8kaTvF9bfIulw2xPz5d8o+4PhUts72J5o+6i8DXc0BMPGnzvqB82PNyFfnFA4fr2c4jRJx0dEsdxkgPw9PsjZzW1TlZUy/C4insnfjxnKyh1mSfqopN9JmpV/nnJ2U+fRLbw/10p6u7Ph2SYoq6v9dUQsUlZLvLvt99uekF8fL83fk28M8548Nsj5blPWU3tOfsy5+fqfNu4YEWskfU/SxflncpSkk5T/YeRs2Lx98mttb0mXqlBC5Gy87a80a4Tt1+XXpyWtzNvUoyGuA2U9uKslPZd/U/D/hnhfr1L2b2rf/HxTbZ9UeG1DvXf12uaW36vC63qFspB8fcP6422/2Han7R2VXU/Pqv8IMa9Wdm0BKBGBGth2/K2yXuJHldWLXivpy407RcTTkk5WFlyWSzpQ0i8K26+X9LH8+auUDUm3a0Q8IOnTynqLn1IWbovPu0XSt5X1Gt+tLMgVnaGsF/1BZSMXvH8zXtv3lX/9nge0+jmfUhZMTsqXe5T1VB4g6TFlN6y9eTPOU7dOWehS3t51hW0fV9b7/3CT3sh6r2W9t3svZTcDrpJ0r7Ih+d6Qt3VDRDxZ/1FWV7spfyzb0/M23DtcYyPiJ5L+QdnNpk8o65E+Jd+2StlNna9XVlrxsLIRYrZYRGxUdrPeW5WVF/wfSX+dr5ftC2wXQ917JG2n7HP/pqT/GxH1HtrDlV1TayT9Utm3HsWJivZW4TprcKCyGy5X58f4t4i4bZjr4KL8nCuU/XH5vSFe6meVjUjyY9urlNX0v3SI/QfYgvdKymqjv5d/dkU7K3v/Vigr4zlA0gmRj3Lj7F6FmSp3GEkAktxCaR8AjHq2H5H07oi4tWH9TGVfjR/ZSi3zWGH7LZIOiYjzh905Uc7Gar5HWXnGppFuz2jn7EbkRyIb9hFAiQjUAMY8229UNvnJCyKiNtz+AACUiVE+AIxptm9T9jX2GYRpAMBIoIcaAAAAaAM3JQIAAABtIFADAAAAbRhzNdRTpkyJGTNmjHQzAAAAkLi777776YiYOtx+Yy5Qz5gxQ/PmzRvpZgAAACBxtv/Uyn6UfAAAAABtIFADAAAAbSBQAwAAAG0YczXUAAAAqN6mTZu0ePFirV+/fqSbUrmJEydq+vTpGjdu3BY9n0ANAACAARYvXqzJkydrxowZsj3SzalMRGj58uVavHix9ttvvy06BiUfAAAAGGD9+vXabbfdkg7TkmRbu+22W1s98QRqAAAANJV6mK5r93USqAEAADAqdXZ2atasWXrhC1+ok08+WWvXru23/pBDDtFhhx2myy67TLVaTZJ02223aaeddtKsWbM0a9YsHXfccZW3k0ANAACAUWm77bbT/Pnzdd9992n8+PG66qqr+q2///77dcstt+imm27SRRdd1Pu8OXPmaP78+Zo/f75uvfXWyttJoAYAAMCoN2fOHC1cuHDA+mnTpunqq6/WFVdcoYgYgZYxygcAAACGMeO8/67kuIsu/auW9uvu7tYPf/hDnXDCCU2377///qrValq6dKkk6Y477tCsWbMkSSeffLI+/OEPl9PgQRCoAQAAMCqtW7euNxjPmTNH73jHOwbdt9g7PWfOHN14442Vt6+OQA0AAIAhtdqTXLZ6rfRwHn30UXV2dmratGlasGDBVmhZf9RQAwAAYMxatmyZzj77bM2dO3fEhvmjhxoAAABjSr0UZNOmTerq6tIZZ5yhc889d8TaQ6AGAADAqLR69eqm63t6egZ9ztFHH62jjz66ohY1R8kHAAAA0AYCNQAAANAGAjUAAADQhvQD9c8uGekWAAAAIGHpB2oAAACgQgRqAAAAoA0EagAAAIxKnZ2dmjVrlg455BAddthhuuyyy1Sr1YZ8zqJFi3TttddupRZmGIcaAAAAwyv7vrRjzh92l+LU40uXLtVpp52mFStW6KKLLhr0OfVAfdppp5XW1OHQQw0AAIBRb9q0abr66qt1xRVXKCK0aNEizZkzR4cffrgOP/xw/fKXv5QknXfeebrjjjs0a9YsXX755YPuVyZ6qAEAADAm7L///qrValq6dKmmTZumW265RRMnTtTDDz+sU089VfPmzdOll16qT33qU7rxxhslSWvXrm26X5kI1AAAABgzIkKStGnTJs2dO1fz589XZ2en/vCHPzTdv9X92kGgBgAAwJjw6KOPqrOzU9OmTdNFF12k5z3vebrnnntUq9U0ceLEps+5/PLLW9qvHdRQAwAAYNRbtmyZzj77bM2dO1e2tWLFCu2xxx7q6OjQNddco56eHknS5MmTtWrVqt7nDbZfmeihBgAAwKi0bt06zZo1S5s2bVJXV5fOOOMMnXvuuZKk97znPXrjG9+o66+/Xsccc4x22GEHSdKhhx6qrq4uHXbYYXrb29426H5lcr0OZayYPXt2bFYh+c8uaWlYFgAAAPRZsGCBDj744JFuxlbT7PXavjsiZg/3XEo+AAAAgDYQqAEAAIA2EKgBAACANhCoAQAA0NRYu9duS7X7OgnUAAAAGGDixIlavnx58qE6IrR8+fK2xqdm2DwAAAAMMH36dC1evFjLli0b6aZUbuLEiZo+ffoWP59ADQAAgAHGjRun/fbbb6SbMSZQ8gEAAAC0gUANAAAAtIFADQAAALSBQA0AAAC0gUANAAAAtCHpQL2pp6Zz7tlb3/vt4pFuCgAAABKVdKC++f4ndcOTu+jc6+4Z6aYAAAAgUUkH6u6etGf2AQAAwMhLOlB3dnikmwAAAIDEJR2oTZ4GAABAxdIO1CJRAwAAoFppB2ryNAAAACqWdKCmhBoAAABVSzpQmy5qAAAAVCztQD3SDQAAAEDykg7UHfRQAwAAoGJpB+qkXx0AAABGg6QjJ8PmAQAAoGppB2ryNAAAACqWdKAGAAAAqpZ0oI7i44hB9wMAAAC2VNKBupioa+RpAAAAVCDpQB2FRE0PNQAAAKqQdKCu1QqPydMAAACoQNKBul8NtUjUAAAAKF/SgboWxZKPEWwIAAAAkpV0oC6GaAI1AAAAqpB0oC4WfVDyAQAAgCokHahr9FADAACgYkkH6ug3DjWJGgAAAOVLO1D3K/kAAAAAypd0oKbkAwAAAFVLOlBHMFMiAAAAqpV4oG7+GAAAAChL2oGaGmoAAABULO1AzSgfAAAAqFilgdr2CbYfsr3Q9nlD7Pcm22F7dpnn56ZEAAAAVK2yQG27U9KVkl4jaaakU23PbLLfZEnnSPp12W3od1MiRR8AAACoQJU91EdKWhgRj0bERknfknRSk/3+SdInJK0vuwHFCE0PNQAAAKpQZaDeS9LjheXF+bpetl8sae+IuHGoA9k+y/Y82/OWLVvWcgP6D5vX8tMAAACAllUZqN1kXW+std0h6XJJHxjuQBFxdUTMjojZU6dObbkB3JQIAACAqlUZqBdL2ruwPF3SksLyZEkvlHSb7UWSXibphjJvTOx3U2JZBwUAAAAKqgzUd0k60PZ+tsdLOkXSDfWNEbEiIqZExIyImCHpfySdGBHzympAv3Go6aEGAABABSoL1BHRLWmupJslLZB0XUTcb/ti2ydWdd7+bWj+GAAAAChLV5UHj4ibJN3UsO6jg+x7dAXnLzwu++gAAABA6jMl9ntMogYAAED5kg7UtcJdiTXyNAAAACqQdKDuP7ELiRoAAADlSztQM2weAAAAKpZ0oK4Fw+YBAACgWkkH6iLyNAAAAKqQdKCm5AMAAABVSzpQF0s+anRRAwAAoAJJB+r+o3yMWDMAAACQsKQDdY2ZEgEAAFCxpAN1MURT8gEAAIAqJB2oAQAAgKolHaj7Tz1ODzUAAADKl3Sg5qZEAAAAVC3pQN3vpsQRbAcAAADSlXSg5qZEAAAAVC3tQF18TJ4GAABABdIO1P1SNIkaAAAA5Us8UPc9rpGnAQAAUIG0A7WYKREAAADVSjpQF3ulg0QNAACACiQdqCn5AAAAQNUSD9TFcahJ1AAAAChf2oF60AUAAACgHGkH6kIPNSUfAAAAqELSgbrfTYl0UQMAAKACSQfqiOaPAQAAgLIkHahr/Uo+SNQAAAAoX9KBuog4DQAAgCokHaj7TeZCogYAAEAFkg7U3JQIAACAqiUdqIshulYbwYYAAAAgWWkHaio+AAAAULGkA3W/kg9G+QAAAEAFkg7UxX5p4jQAAACqkHSgLtZN00ENAACAKiQdqIs3JVLyAQAAgCqkHai5KREAAAAVSzpQ978pceTaAQAAgHQlHaj7lXzQRw0AAIAKJB2oixm6Rp4GAABABZIO1LXgpkQAAABUK+lATYQGAABA1ZIO1NyUCAAAgKolHaiLZR41EjUAAAAqkHagLj4mTwMAAKACaQfq4k2JI9gOAAAApCvxQF18TKQGAABA+ZIO1P2HzRvBhgAAACBZSQfqfj3UFH0AAACgAmkH6uJj8jQAAAAqkHag5qZEAAAAVCzxQN38MQAAAFCWtAN14TETuwAAAKAKSQfqGiUfAAAAqFjSgTq4KxEAAAAVSzpQ00MNAACAqiUdqItqNSI1AAAAypd0oO4/sQsAAABQvqQDNVOPAwAAoGpJB2p6qAEAAFC1pAN1/x5qIjUAAADKl3SgZtQ8AAAAVC3pQK1+JR8kagAAAJQv6UDNTYkAAACoWtKBupihGYYaAAAAVUg7UPebKZFEDQAAgPIlHaiLvdKUfAAAAKAKSQdqMjQAAACqlnagLnRL1yiiBgAAQAUSD9SFxyPXDAAAACQs7UAths0DAABAtZIO1LVa32NG+QAAAEAVkg7UTD0OAACAqqUdqPvNlEiiBgAAQPkSD9SFxyPXDAAAACQs7UDNTYkAAACoWNKBut9MifRRAwAAoAJJB+p+E7uQpwEAAFCBSgO17RNsP2R7oe3zmmw/2/a9tufbvtP2zDLPzygfAAAAqFplgdp2p6QrJb1G0kxJpzYJzNdGxIsiYpakT0i6rMw2BCUfAAAAqFiVPdRHSloYEY9GxEZJ35J0UnGHiFhZWNxBJQ/GEQzzAQAAgIp1VXjsvSQ9XlheLOmljTvZfq+kcyWNl3RsmQ0o1k3XqPkAAABABarsoXaTdQNSbURcGRHPl/QhSR9peiD7LNvzbM9btmxZyw1g2DwAAABUrcpAvVjS3oXl6ZKWDLH/tyT9dbMNEXF1RMyOiNlTp05tuQFUfAAAAKBqVQbquyQdaHs/2+MlnSLphuIOtg8sLP7vhucrAAAeF0lEQVSVpIfLbEC/QE2iBgAAQAUqq6GOiG7bcyXdLKlT0pcj4n7bF0uaFxE3SJpr+zhJmyQ9K+nMktvQ+5gaagAAAFShypsSFRE3SbqpYd1HC4/fV+n5qzw4AAAAoMRnSiz2Sgc91AAAAKhA0oGamxIBAABQtaQDdY2bEgEAAFCxpAN1sV+amxIBAABQhaQDNSUfAAAAqFrSgbr/TYkj2BAAAAAkK+lAHUMsAQAAAGVIOlDXCncl1moj2BAAAAAkK+lAHf0e00MNAACA8iUdqMWweQAAAKhY0oG6302JI9gOAAAApCvpQF0M0YxDDQAAgCqkHaj7F1EDAAAApUs6UFPyAQAAgKolHaj7dVBT8gEAAIAKpB2oCyG6Rp4GAABABRIP1IXHI9cMAAAAJCztQF18TMkHAAAAKpB0oOamRAAAAFQt6UDNsHkAAACoWkuB2vYLq25I2RpLPJjYBQAAAFVotYf6Ktu/sf0e2ztX2qKSNOZn8jQAAACq0FKgjohXSjpd0t6S5tm+1vbxlbasTY35Oaj5AAAAQAVarqGOiIclfUTShyS9WtLnbD9o+39X1bh2NJZ40EMNAACAKrRaQ32o7cslLZB0rKTXR8TB+ePLK2zfFmsM0EzsAgAAgCp0tbjfFZK+IOmCiFhXXxkRS2x/pJKWtWlgiQeJGgAAAOVrNVC/VtK6iOiRJNsdkiZGxNqIuKay1rWBmxIBAACwNbRaQ32rpO0Ky9vn60atAYF6ZJoBAACAxLUaqCdGxOr6Qv54+2qaVI7GmxIZhxoAAABVaDVQr7F9eH3B9kskrRti/xE3oIKaPA0AAIAKtFpD/X5J19teki/vIenN1TSpHI0zJZKnAQAAUIWWAnVE3GX7/5N0kCRLejAiNlXasjY1DpPXGLABAACAMrTaQy1JR0iakT/nxbYVEV+rpFVlYJQPAAAAbAUtBWrb10h6vqT5knry1SFp1AbqxnGomXocAAAAVWi1h3q2pJkxhuomBpZ8jEw7AAAAkLZWR/m4T9LuVTakbANuSiRQAwAAoAKt9lBPkfSA7d9I2lBfGREnVtKqEjT2UDMONQAAAKrQaqC+sMpGVGFgDTUAAABQvlaHzbvd9r6SDoyIW21vL6mz2qa1acDMLiPSCgAAACSupRpq2++S9B1Jn89X7SXpB1U1qgwDbkokUQMAAKACrd6U+F5JR0laKUkR8bCkaVU1qgwDSj7I0wAAAKhAq4F6Q0RsrC/Y7tIoL6LgpkQAAABsDa0G6tttXyBpO9vHS7pe0n9V16z2DRg2b4TaAQAAgLS1GqjPk7RM0r2S3i3pJkkfqapRZWjskKaDGgAAAFVodZSPmqQv5D9jwoBAPTLNAAAAQOJaCtS2/6gmmTQi9i+9RSUZeFMikRoAAADla3Vil9mFxxMlnSxp1/KbUx5KPgAAALA1tFRDHRHLCz9/jojPSDq24ra1pXFUD8ahBgAAQBVaLfk4vLDYoazHenIlLSrJgIkSydMAAACoQKslH58uPO6WtEjS35TemhI11kw3jksNAAAAlKHVUT6OqbohZRtYQ02iBgAAQPlaLfk4d6jtEXFZOc0pD/EZAAAAW8PmjPJxhKQb8uXXS/q5pMeraFQZBtyUSMIGAABABVoN1FMkHR4RqyTJ9oWSro+Id1bVsHbVA7QVCnlAwAYAAADK0OrU4/tI2lhY3ihpRumtKVE9QHc6WyZOAwAAoAqt9lBfI+k3tr+vLJu+QdLXKmtVCeod0h0KSeamRAAAAFSi1VE+Pmb7h5Lm5KveHhG/q65Z5bElBT3UAAAAqEarJR+StL2klRHxWUmLbe9XUZtK0VfykUdpEjUAAAAq0FKgtv2Pkj4k6fx81ThJX6+qUWWoV3jUa6i5KREAAABVaLWH+g2STpS0RpIiYonGyNTjzh8RpwEAAFCFVgP1xsju6gtJsr1DdU0qx4BRPkjUAAAAqECrgfo625+XtLPtd0m6VdIXqmtW+6KhhjroowYAAEAFWh3l41O2j5e0UtJBkj4aEbdU2rI29Q6bV6+hro1cWwAAAJCuYQO17U5JN0fEcZJGdYguqvXelEjPNAAAAKozbMlHRPRIWmt7p63QntLUa6jrL5CJXQAAAFCFVmdKXC/pXtu3KB/pQ5Ii4pxKWlWCaOihJk4DAACgCq0G6v/Of8aMaBjlg3GoAQAAUIUhA7XtfSLisYj46tZqUFl6x6Gu91CTpwEAAFCB4Wqof1B/YPu7FbelVL3jUOfL5GkAAABUYbhA7cLj/atsSNkaR/mghxoAAABVGC5QxyCPR716DXWH+y8DAAAAZRrupsTDbK9U1lO9Xf5Y+XJExI6Vtq4NjPIBAACArWHIQB0RnUNtH81q9FADAABgKxh2Ypexih5qAAAAbA3JBurGUT5qNSI1AAAAypdsoB4wDvXINQUAAAAJSzdQN8yUSKIGAABAFZIN1APGoR7BtgAAACBdyQbq+k2JHb3LRGoAAACUL9lA3XtTYt5DzT2JAAAAqEKlgdr2CbYfsr3Q9nlNtp9r+wHbv7f9E9v7lnXuWkMNdVD0AQAAgApUFqhtd0q6UtJrJM2UdKrtmQ27/U7S7Ig4VNJ3JH2i/HbkNdTkaQAAAFSgyh7qIyUtjIhHI2KjpG9JOqm4Q0T8LCLW5ov/I2l6WSdvHIeaPA0AAIAqVBmo95L0eGF5cb5uMO+Q9MOyTj5gpkS6qAEAAFCBrgqP7SbrmqZa22+RNFvSqwfZfpaksyRpn332aenk9ZsQO+o11ORpAAAAVKDKHurFkvYuLE+XtKRxJ9vHSfqwpBMjYkOzA0XE1RExOyJmT506taWTN47yQZ4GAABAFaoM1HdJOtD2frbHSzpF0g3FHWy/WNLnlYXppaWevbeHmpIPAAAAVKeyQB0R3ZLmSrpZ0gJJ10XE/bYvtn1ivtsnJU2SdL3t+bZvGORwm61x2DzGoQYAAEAVqqyhVkTcJOmmhnUfLTw+rqpz9049TrEHAAAAKpTsTIn1iVxcuDWSsg8AAACULdlAXe+htiSLyV0AAABQjWQDdT09dzh6x++rkagBAABQsmQDde841Oor+yBOAwAAoGwJB+q+Gup6DzUd1AAAAChbsoE6mtVQ00cNAACAkiUbqGvFGmqmHwcAAEBFkg3U/Xuo+68DAAAAypJuoFa9h7oQqCn5AAAAQMmSDdT9xqGm5AMAAAAVSTZQ95V8hDp6b0oEAAAAypVsoO67KVFM7AIAAIDKJBuooz4OtSj5AAAAQHUSDtTZ7w4XUjSBGgAAACVLNlAXpx6v11BT8gEAAICyJRyo+4b56HDDOgAAAKAkyQbqenTuUKjT9R7qkWsPAAAA0pRuoC7elNiwDgAAAChLwoE6+93Rr+Rj5NoDAACANCUbqPvGoY7ekT566KEGAABAyRIO1H2P6y+yRhc1AAAASpZsoI78tsQO9Y1FTQc1AAAAypZuoK6PmudCDzWJGgAAACVLNlDXyzs6FL03JVJDDQAAgLIlG6h7x6G25N6SDwI1AAAAypVsoC6Wd3T2rhuZtgAAACBdyQbqvnGo+4bNo4YaAAAAZUs4UPeN8lGfKbFWG7HmAAAAIFHJBup6eYdVnCmRHmoAAACUK9lA3TsOtaVOSj4AAABQkWQDdd8NiFEYh3qEGgMAAIBkJRuoe2uoC8Pm0UMNAACAsiUcqLPf2dTj9XUEagAAAJQr2UBd740uTj3ewygfAAAAKFnCgTr7zTjUAAAAqFK6gbrWNw51302JBGoAAACUK91AnYfnzkIPNXkaAAAAZUs2UPc0uSmxh3HzAAAAULJkA3Wtd9i8oOQDAAAAlUk3UNf6xqGm5AMAAABVSTdQ96uh7r8OAAAAKEuygbo+5rQlph4HAABAZZIN1FHooa5PPc5NiQAAAChbsoG6J/pqqDvzdUw9DgAAgLIlG6h7Z0pUcabEEWwQAAAAkpRuoC6M8mFuSgQAAEBF0g3UvTXUTD0OAACA6iQbqOs3IFqhzt6SDwI1AAAAypVsoK5n5073TT1eq41cewAAAJCmZAN1T2HqcYseagAAAFQj2UBdi+LU49k68jQAAADKlm6gro/yoeh9kT0kagAAAJQs3UDdr4aakg8AAABUI9lA3VMbWPLBxC4AAAAoW7KBureGWqGO/KZEph4HAABA2dIP1IUe6h66qAEAAFCyhAN19rvTfT3U5GkAAACULd1A3TtTYnHYPBI1AAAAypVuoM7Dc6ejcFMigRoAAADlSjZQ9xRrqCn5AAAAQEWSDdS1Wva7wyFzUyIAAAAqkm6g7h02Lyv7kKihBgAAQPmSD9TZKB/1dSPXHgAAAKQp2UDd01vyod6SD25KBAAAQNmSDdTRZKbEGl3UAAAAKFmygbo4ykdnbw/1CDYIAAAASUo2UNd7ozudjfQhUfIBAACA8qUbqPPsbIXcsA4AAAAoS8KBuq+HurN3HOraCLYIAAAAKUo2UNcncelw9I5D3UOeBgAAQMmSDdT1cunspkRqqAEAAFCNZAN1T2HYvK685KObkg8AAACULNlA3b+Gul7yQQ81AAAAypVkoI6I3pIPqy9Qd/cQqAEAAFCuJAN17w2JCrk4ygc11AAAAChZmoE6D85d7v+bkg8AAACULc1AXZ8lMX919ZkSuwnUAAAAKFmSgboenPt6qLP1PdRQAwAAoGRJBup6cK7fjNg7ygc11AAAAChZkoF6YA81NdQAAACoRqWB2vYJth+yvdD2eU22v8r2b213235TWeftraHOSz06eid2IVADAACgXJUFatudkq6U9BpJMyWdantmw26PSXqbpGvLPHd9RsTGHuoagRoAAAAl66rw2EdKWhgRj0qS7W9JOknSA/UdImJRvq3UOcF7x6FuqKFm6nEAAACUrcqSj70kPV5YXpyvq1xfDXW23DuxCz3UAAAAKFmVgdpN1m1RorV9lu15tuctW7Zs2P1rtf49012MQw0AAICKVBmoF0vau7A8XdKSLTlQRFwdEbMjYvbUqVOH3b9xlI9OaqgBAABQkSoD9V2SDrS9n+3xkk6RdEOF5+vVOMpHV/4q6aEGAABA2SoL1BHRLWmupJslLZB0XUTcb/ti2ydKku0jbC+WdLKkz9u+v4xzN/ZQd4hxqAEAAFCNKkf5UETcJOmmhnUfLTy+S1kpSKl68tE8OjvyYN1BoAYAAEA10pwpsaexhjpbT6AGAABA2ZIM1ANqqBnlAwAAABVJMlAPqKE2JR8AAACoRpKBumfAONT91wMAAABlSTJQD5wpkZIPAAAAVCPJQF0f5aOjYWKX+noAAACgLEkG6sYa6npPNT3UAAAAKFuSgbpxlA+mHgcAAEBVkg7UfT3U1FADAACgGkkG6u6GUT46GOUDAAAAFUkyUA/WQ90TBGoAAACUK8lA3d1TH+VDvb+tUAS91AAAAChXkoF6U08Wmsd39IXncfnjTT0MnQcAAIDyJBqos9BcDNTj87KPjQRqAAAAlCjpQD2uoy8818P1pm4CNQAAAMqTZKDemJd8jHOzkg9qqAEAAFCeJAN1Xw81NdQAAACoVpKBemP34DXUGyj5AAAAQImSDNS9PdT9Sj5q/bYBAAAAZUg7UBd7qCn5AAAAQAWSDNQbu/ObEqmhBgAAQMWSDNS941AXSz7q41B3M8oHAAAAypN2oG5S8sHELgAAAChT0oG6ackHo3wAAACgREkG6r6JXQozJZoaagAAAJQvyUC9qck41OMo+QAAAEAF0gzUTUs+6uNQc1MiAAAAypN2oPbAmRI3UkMNAACAEiUZqOvTizMONQAAAKqWZKBuNmwegRoAAABVSDRQ10f5YBxqAAAAVCvJQL2xSclH77B5zJQIAACAEiUZqNd390iStuvs643uGzavZ0TaBAAAgDQlGajXbRwiUDPKBwAAAEqUXKCOiN5RPiYUSj62y8ehXr+JQA0AAIDyJBeoN/TOklhTh/vW13ur122i5AMAAADlSS5Q95Z7dPTviSZQAwAAoArJBer6DYkTO/uP5lEP1Os3EqgBAABQnuQCdbMbErPlLGDTQw0AAIAyJReo6zcdTmwo+ZiYB+y19FADAACgROkF6t6Sj4Ye6t5RPgjUAAAAKE96gTrvgZ7Y0byGmpIPAAAAlCm9QN1klkRJ2r4eqCn5AAAAQImSC9TrNuY11J3Na6jpoQYAAECZkgvUazd2Sxp8lA9qqAEAAFCm5AL16g1ZoJ7c1T9Qj3Oo06FNPaFNPUw/DgAAgHKkF6jXZ4F6Ulf/nmibkT4AAABQvvQCdV7yMalzYC/0Dnmvdb0XGwAAAGhXeoF6kB5qSdoxX7dqPYEaAAAA5UgvUA9SQy1Jk8dlgXrluk1btU0AAABIV3qBOu993qFJoK73UK9cT6AGAABAOZIL1Ks2DF/ysXIdJR8AAAAoR3qB+uk/S2p+U+KO4+ihBgAAQLmSC9QrNnVKknYeP7AXese8DIQaagAAAJQluUD9zMYuSdKu45qUfPT2UFPyAQAAgHIkFajXbezR+lqHJnTUtH2zko8uRvkAAABAuZIK1M+s3ShJ2nV8t+yB2+s91CsI1AAAAChJUoH62TVZoN6lSbmHJO06Liv1WL5641ZrEwAAANKWVKB+Zk1fD3Uz0yZk65ctfWKrtQkAAABpSypQL1+zQdLgPdRTJ2SlHss2dG21NgEAACBtSQXqJ1aslyTtMbF5jfSOXTWN76hpdU+n1m5kpA8AAAC0L61A/Vw9UDevkbYLZR+rNmy1dgEAACBdaQXqR+6VNHgPtSRNHZ9tW0qgBgAAQAmSCtRL1o+TNHSgrm9b8ty6rdImAAAApC2ZQB0/vUSPrxsvSdprkJIPSdp3+2zboqfXbpV2AQAAIG3JBOplG7u0qrtTO3V1a7fxzUf5kKQZ22elHn9avmZrNQ0AAAAJSyZQL1w9QZJ0wKQNTWdJrOvtoSZQAwAAoATJBOpH1kyUJD1/h6FvNqz3UP/x6TWKiMrbBQAAgLQlE6jvXbmdJOmgSeuH3O95E7q1U1e3nl27SU+uHHpfAAAAYDjJBOrfPre9JOnwnYcu5bClQ3fKRvi45/EVlbcLAAAAaUsiUD+zZqMWrpmo8R01HbLj8L3OL9oxC9S/X/xc1U0DAABA4pII1D97cKkk6chd1mh8x/B10fVe7F89urzSdgEAACB9SQTqm2//uSTp+KkrW9r/5buu0XjXNP/x5/TMmsHHrAYAAACGM+YD9VM//KR+umxHdTr0l89rrSZ6h66aXrrrGkVItz7wVMUtBAAAQMrGfKD+/KKp6g7r+KkrtfvE7paf9/rds/rpb971WFVNAwAAwDZgTAfqex5/Tl97bIqs0N8+f/N6ml+3+3OaPLFLv3vsOc1b9ExFLQQAAEDqxmygvn/JCr3za/PUHdaZ+yxvaXSPou27Qm9/xQxJ0j/d+IC6e2oVtBIAAACpG3OBesW6Tfrgd+7RSVf8QstWbdArdl2lCw56YouO9W7/QM/bcYLuWbxCn7z5oZJbCgAAgG3BmAvUjz2zVtfNW6yeWk2n771c//GSRS0NldfMDl01fe6UF6uzw/r8zx/VJTctoKcaAAAAm2XMBeodu3r0vuc/pVuO+oM+NvPPmrCFYbrupfvvpsv+5rDeUP3Gf/+lfrnwaUW0d1wAAABsG7qqPLjtEyR9VlKnpC9GxKUN2ydI+pqkl0haLunNEbFoqGPuu/1G/d0B5Q51d9KsvTRl0gT9/fX36J7FK3TaF3+tA6ZN0usO3UNHHTBFL9prJ00c11nqOQEAAJAGV9UTa7tT0h8kHS9psaS7JJ0aEQ8U9nmPpEMj4mzbp0h6Q0S8eajjzj5oesy76r3lNfSY86WfXSIdc75Wrd+kr/5ykb78i0X9JnwZ39mhGVO21/5TJmm/qTto9x0nasqkCZo6eYKmTBqvyRPHafLELk3o6pDt8toGAACAEWP77oiYPex+FQbql0u6MCL+Ml8+X5Ii4pLCPjfn+/zKdpekJyVNjSEaVXqgLjrmfEnSpp6afrHwad264CnNW/SsHnpqlVp5mzo7rB3Gd2rShC7tMKFLE8Z1aFxnh8Z3dmh8V9/vcfXlrg51dVgdtjo7sh9b6syXO1zfJnXU97PV0WF1um9dRx7ibcm9v/MF1de5b1t9e76shu3Zc9zveFLjORr2L5xzqD8pBvuDY+jnDLFtkGcO/ZyhTrZ55xnqXFvyPmz58YbYuAXvEcYWPsp00CmTDj7JNMzaZ5eWAnWVJR97SXq8sLxY0ksH2ycium2vkLSbpKcrbNfgfpZl/XHHnK+jD5qmow+aJklavaFbf1y2Ro8+vVqLnl6rpavW6+nVG7Rs1QY9vXqjVm/o1uoN3drYXdPK9d1aub71CWYAAAAwtlUZqJv9cdbYz9vKPrJ9lqSz8sUNPvaC+9ps2zAuqPbwGElTNFJ/sCEFXD9oF9cQ2sH1s/Xt28pOVQbqxZL2LixPl7RkkH0W5yUfO0kaMG1hRFwt6WpJsj2vla53oBmuH7SD6wft4hpCO7h+Rq8qh827S9KBtvezPV7SKZJuaNjnBkln5o/fJOmnQ9VPAwAAAKNNZT3UeU30XEk3Kxs278sRcb/tiyXNi4gbJH1J0jW2FyrrmT6lqvYAAAAAVah0HOqIuEnSTQ3rPlp4vF7SyZt52KtLaBq2XVw/aAfXD9rFNYR2cP2MUpUNmwcAAABsC8bc1OMAAADAaDJmArXtE2w/ZHuh7fNGuj0YnWx/2fZS2/cV1u1q+xbbD+e/d8nX2/bn8mvq97YPH7mWYzSwvbftn9leYPt+2+/L13MNYVi2J9r+je178uvnonz9frZ/nV8/385v1JftCfnywnz7jJFsP0YH2522f2f7xnyZ62cMGBOBOp/G/EpJr5E0U9KptmeObKswSn1F0gkN686T9JOIOFDST/JlKbueDsx/zpL071upjRi9uiV9ICIOlvQySe/N/1vDNYRWbJB0bEQcJmmWpBNsv0zSv0i6PL9+npX0jnz/d0h6NiIOkHR5vh/wPkkLCstcP2PAmAjUko6UtDAiHo2IjZK+JemkEW4TRqGI+LkGjmV+kqSv5o+/KumvC+u/Fpn/kbSz7T22TksxGkXEExHx2/zxKmX/U9tLXENoQX4drM4Xx+U/IelYSd/J1zdeP/Xr6juS/peZe3ybZnu6pL+S9MV82eL6GRPGSqBuNo35XiPUFow9z4uIJ6QsMEmalq/nusKg8q9PXyzp1+IaQovyr+vnS1oq6RZJj0h6LiK6812K10jv9ZNvXyFpt63bYowyn5H0QUm1fHk3cf2MCWMlULc0RTmwmbiu0JTtSZK+K+n9EbFyqF2brOMa2oZFRE9EzFI2O/CRkg5utlv+m+sHvWy/TtLSiLi7uLrJrlw/o9BYCdStTGMODOap+tfw+e+l+XquKwxge5yyMP2NiPhevpprCJslIp6TdJuyWvydbdfnfSheI73XT759Jw0sWcO24yhJJ9pepKy09VhlPdZcP2PAWAnUrUxjDgymOMX9mZL+s7D+rflIDS+TtKL+tT62TXn94ZckLYiIywqbuIYwLNtTbe+cP95O0nHK6vB/JulN+W6N10/9unqTpJ8Gk0NssyLi/IiYHhEzlOWcn0bE6eL6GRPGzMQutl+r7C+1+jTmHxvhJmEUsv1NSUdLmiLpKUn/KOkHkq6TtI+kxySdHBHP5OHpCmWjgqyV9PaImDcS7cboYPuVku6QdK/6ahgvUFZHzTWEIdk+VNlNYp3KOqyui4iLbe+vrMdxV0m/k/SWiNhge6Kka5TV6j8j6ZSIeHRkWo/RxPbRkv4+Il7H9TM2jJlADQAAAIxGY6XkAwAAABiVCNQAAABAGwjUAAAAQBsI1AAAAEAbCNQAAABAG7qG3wUAMJrY3k3ST/LF3SX1SFqWL6+NiFeMSMMAYBvFsHkAMIbZvlDS6oj41Ei3BQC2VZR8AEBCbK/Ofx9t+3bb19n+g+1LbZ9u+ze277X9/Hy/qba/a/uu/OeokX0FADD2EKgBIF2HSXqfpBdJOkPSCyLiSElflPS3+T6flXR5RBwh6Y35NgDAZqCGGgDSdVdEPCFJth+R9ON8/b2SjskfHydpZjaLuiRpR9uTI2LVVm0pAIxhBGoASNeGwuNaYbmmvv/+d0h6eUSs25oNA4CUUPIBANu2H0uaW1+wPWsE2wIAYxKBGgC2bedImm3797YfkHT2SDcIAMYahs0DAAAA2kAPNQAAANAGAjUAAADQBgI1AAAA0AYCNQAAANAGAjUAAADQBgI1AAAA0AYCNQAAANAGAjUAAADQhv8fTgAX8hSX7nMAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell1\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution8(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution9(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + " st.burr, st.skewnorm,\n", + " st.genlogistic,\n", + " st.mielke,\n", + " st.gompertz\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "genlogistic(c=0.31, loc=8.97, scale=0.79)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell2\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution9(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution10(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + "# st.gennorm,\n", + " st.exponnorm,\n", + " st.foldnorm, st.gumbel_l,\n", + " st.logistic,\n", + " st.t,\n", + " st.foldcauchy,\n", + " st.tukeylambda,st.skewnorm\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "foldnorm(c=1.70, loc=0.05, scale=3.15)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell3\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution10(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [], + "source": [ + "#For HW\n", + "def best_fit_distribution11(data, bins=200, ax=None):\n", + " \"\"\"Model data by finding best fit distribution to data\"\"\"\n", + " # Get histogram of original data\n", + " y, x = np.histogram(data, bins=bins, density=True)\n", + " x = (x + np.roll(x, -1))[:-1] / 2.0\n", + "\n", + " # Distributions to check\n", + " DISTRIBUTIONS = [ \n", + " \n", + " st.cauchy,\n", + " st.gennorm,st.genlogistic, st.invgauss,\n", + " st.johnsonsu,\n", + " st.powernorm,\n", + " st.foldcauchy,\n", + " st.loglaplace\n", + " \n", + " ]\n", + "\n", + " # Best holders\n", + " best_distribution = st.norm\n", + " best_params = (0.0, 1.0)\n", + " best_sse = np.inf\n", + "\n", + " # Estimate distribution parameters from data\n", + " for distribution in DISTRIBUTIONS:\n", + "\n", + " # Try to fit the distribution\n", + " try:\n", + " # Ignore warnings from data that can't be fit\n", + " with warnings.catch_warnings():\n", + " warnings.filterwarnings('ignore')\n", + "\n", + " # fit dist to data\n", + " params = distribution.fit(data)\n", + "\n", + " # Separate parts of parameters\n", + " arg = params[:-2]\n", + " loc = params[-2]\n", + " scale = params[-1]\n", + "\n", + " # Calculate fitted PDF and error with fit in distribution\n", + " pdf = distribution.pdf(x, loc=loc, scale=scale, *arg)\n", + " sse = np.sum(np.power(y - pdf, 2.0))\n", + "\n", + " # if axis pass in add to plot\n", + " try:\n", + " if ax:\n", + " pd.Series(pdf, x).plot(ax=ax, label=distribution.name,legend=True)\n", + " end\n", + " except Exception:\n", + " pass\n", + "\n", + " # identify if this distribution is better\n", + " if best_sse > sse > 0:\n", + " best_distribution = distribution\n", + " best_params = params\n", + " best_sse = sse\n", + "\n", + " except Exception:\n", + " pass\n", + "\n", + " return (best_distribution.name, best_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gennorm(beta=0.71, loc=8.42, scale=1.40)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# # Load data\n", + "data = dwell4\n", + "\n", + "# Plot for comparison\n", + "plt.figure(figsize=(12,8))\n", + "ax = data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True\n", + " #, color=plt.rcParams['axes.color_cycle'][1]\n", + " )\n", + "# Save plot limits\n", + "dataYLim = ax.get_ylim()\n", + "\n", + "# Find best fit distribution\n", + "best_fit_name, best_fir_paramms = best_fit_distribution11(data, 200, ax)\n", + "best_dist = getattr(st, best_fit_name)\n", + "\n", + "# Update plots\n", + "ax.set_ylim(dataYLim)\n", + "ax.set_title(u'Trips to Work\\n All Best Fitted Distributions')\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "# Make PDF\n", + "pdf = make_pdf(best_dist, best_fir_paramms)\n", + "\n", + "# Display\n", + "plt.figure(figsize=(12,8))\n", + "ax = pdf.plot(lw=2, label='PDF', legend=True)\n", + "data.plot(kind='hist', bins=50, density=True, alpha=0.5, label='Data', legend=True, ax=ax)\n", + "\n", + "param_names = (best_dist.shapes + ', loc, scale').split(', ') if best_dist.shapes else ['loc', 'scale']\n", + "param_str = ', '.join(['{}={:0.2f}'.format(k,v) for k,v in zip(param_names, best_fir_paramms)])\n", + "dist_str = '{}({})'.format(best_fit_name, param_str)\n", + "\n", + "ax.set_title(u'Trips to Work with best-fit distribution \\n' + dist_str)\n", + "ax.set_xlabel(u'Time')\n", + "ax.set_ylabel('Frequency')\n", + "\n", + "print (dist_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use distribution to simulate synthetic home-work trip end times" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "obs = pd.read_csv('/home/emma/ual_model_workspace/spring-2019-models/notebooks-emma/synthetic_032319.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "obs['HW_ET'] = st.johnsonsu.rvs(size= len(obs), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['TOD'] == 0) & ((obs['HW_ET'] < 3) | (obs['HW_ET'] >= 6))]) > 0:\n", + " obs.loc[(obs['TOD'] == 0) & ((obs['HW_ET'] < 3) | (obs['HW_ET'] >= 6)),\n", + " 'HW_ET'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['TOD'] == 0) & ((obs['HW_ET'] < 3) | (obs['HW_ET'] >= 6))]), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['TOD'] == 1) & ((obs['HW_ET'] < 6) | (obs['HW_ET'] >= 9))]) > 0:\n", + " obs.loc[ (obs['TOD'] == 1) & ((obs['HW_ET'] < 6) | (obs['HW_ET'] >= 9)),\n", + " 'HW_ET'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['TOD'] == 1) & ((obs['HW_ET'] < 6) | (obs['HW_ET'] >= 9))]), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['TOD'] == 2) & ((obs['HW_ET'] < 9) | (obs['HW_ET'] >= 15.5))]) > 0:\n", + " obs.loc[(obs['TOD'] == 2) & ((obs['HW_ET'] < 9) | (obs['HW_ET'] >= 15.5)),\n", + " 'HW_ET'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['TOD'] == 2) & ((obs['HW_ET'] < 9) | (obs['HW_ET'] >= 15.5))]), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['TOD'] == 3) & ((obs['HW_ET'] < 15.5) | (obs['HW_ET'] >= 18.5))]) > 0:\n", + " obs.loc[(obs['TOD'] == 3) & ((obs['HW_ET'] < 15.5) | (obs['HW_ET'] >= 18.5)),\n", + " 'HW_ET'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['TOD'] == 3) & ((obs['HW_ET'] < 15.5) | (obs['HW_ET'] >= 18.5))]), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['TOD'] == 4) & ((obs['HW_ET'] < 18.5) | (obs['HW_ET'] >= 27))]) > 0:\n", + " obs.loc[(obs['TOD'] == 4) & ((obs['HW_ET'] < 18.5) | (obs['HW_ET'] >= 27)),\n", + " 'HW_ET'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['TOD'] == 4) & ((obs['HW_ET'] < 18.5) | (obs['HW_ET'] >= 27))]), a=-0.71, b=1.00, loc=7.12, scale=1.31)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "obs.loc[ (obs['HW_ET'] > 24),\n", + " 'HW_ET'] = obs['HW_ET'] - 24" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Assign synthetic work dwell times from distribution" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "# need to create a dwell_exact column first\n", + "\n", + "# obs.loc[obs['TOD'].isin([0]),'dwell_work'] = st.johnsonsu.rvs(size= len(obs['TOD'].isin([0])), a=-0.16, b=1.09, loc=9.37, scale=1.54)\n", + "# obs.loc[obs['TOD'].isin([1]),'dwell_work'] = st.foldcauchy.rvs(size= len(obs['TOD'].isin([1])), c=12.34, loc=-0.05, scale=0.73)\n", + "# obs.loc[obs['TOD'].isin([2]),'dwell_work'] = st.genlogistic.rvs(size= len(obs['TOD'].isin([2])), c=0.31, loc=8.97, scale=0.79)\n", + "# obs.loc[obs['TOD'].isin([3]),'dwell_work'] = st.foldnorm.rvs(size= len(obs['TOD'].isin([3])), c=1.70, loc=0.05, scale=3.15)\n", + "# obs.loc[obs['TOD'].isin([4]),'dwell_work'] = st.gennorm.rvs(size= len(obs['TOD'].isin([4])), beta=0.71, loc=8.42, scale=1.40)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tripsIII['dwell_work'] = (\n", + " ((tripsIII.work_dwell.between(0,4.5,inclusive = False)) | (tripsIII.work_dwell==0))*1 +\n", + " ((tripsIII.work_dwell.between(4.5,7.75,inclusive = False)) | (tripsIII.work_dwell==4.5))*2 +\n", + " ((tripsIII.work_dwell.between(7.75,9.0,inclusive = False)) | (tripsIII.work_dwell==7.75))*3 +\n", + " ((tripsIII.work_dwell.between(9.0,10.5,inclusive = False)) | (tripsIII.work_dwell==9.0))*4 +\n", + " ((tripsIII.work_dwell>=10.5))*5)" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [], + "source": [ + "obs['dwell_exact'] = st.johnsonsu.rvs(size= len(obs), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['dwell_work'] == 1) & (obs['dwell_exact'] >= 4.5)]) > 0:\n", + " obs.loc[(obs['dwell_work'] == 1) & (obs['dwell_exact'] >= 4.5),\n", + " 'dwell_exact'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['dwell_work'] == 1) & (obs['dwell_exact'] >= 4.5)]), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['dwell_work'] == 2) & ((obs['dwell_exact'] < 4.5) | (obs['dwell_exact'] >= 7.75))]) > 0:\n", + " obs.loc[(obs['dwell_work'] == 2) & ((obs['dwell_exact'] < 4.5) | (obs['dwell_exact'] >= 7.75)),\n", + " 'dwell_exact'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['dwell_work'] == 2) & ((obs['dwell_exact'] < 4.5) | (obs['dwell_exact'] >= 7.75))]), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['dwell_work'] == 3) & ((obs['dwell_exact'] < 7.75) | (obs['dwell_exact'] >= 9.0))]) > 0:\n", + " obs.loc[(obs['dwell_work'] == 3) & ((obs['dwell_exact'] < 7.75) | (obs['dwell_exact'] >= 9.0)),\n", + " 'dwell_exact'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['dwell_work'] == 3) & ((obs['dwell_exact'] < 7.75) | (obs['dwell_exact'] >= 9.0))]), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['dwell_work'] == 4) & ((obs['dwell_exact'] < 9.0) | (obs['dwell_exact'] >= 10.5))]) > 0:\n", + " obs.loc[(obs['dwell_work'] == 4) & ((obs['dwell_exact'] < 9.0) | (obs['dwell_exact'] >= 10.5)),\n", + " 'dwell_exact'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['dwell_work'] == 4) & ((obs['dwell_exact'] < 9.0) | (obs['dwell_exact'] >= 10.5))]), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [], + "source": [ + "while len(obs.loc[(obs['dwell_work'] == 5) & ((obs['dwell_exact'] < 10.5) | (obs['dwell_exact'] >= 24))]) > 0:\n", + " obs.loc[(obs['dwell_work'] == 5) & ((obs['dwell_exact'] < 10.5) | (obs['dwell_exact'] >= 24)),\n", + " 'dwell_exact'] = st.johnsonsu.rvs(size= len(obs.loc[(obs['dwell_work'] == 5) & ((obs['dwell_exact'] < 10.5) | (obs['dwell_exact'] >= 24))]), a=0.49, b=0.94, loc=9.29, scale=1.26)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add work dwell time to home-to-work trip end times to get work-to-home trip start times" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": {}, + "outputs": [], + "source": [ + "obs['WH_ST'] = obs['HW_ET'] + obs['dwell_exact']" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [], + "source": [ + "obs.loc[ (obs['WH_ST'] > 24),\n", + " 'WH_ST'] = obs['WH_ST'] - 24" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}