diff --git a/class10/donow/najmabadi_shannon_10_donow.ipynb b/class10/donow/najmabadi_shannon_10_donow.ipynb
new file mode 100644
index 0000000..e154401
--- /dev/null
+++ b/class10/donow/najmabadi_shannon_10_donow.ipynb
@@ -0,0 +1,405 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create a classifier to predict the wine color from wine quality attributes using this dataset: http://archive.ics.uci.edu/ml/datasets/Wine+Quality"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## The data is in the database we've been using\n",
+ "+ host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com'\n",
+ "+ database='training'\n",
+ "+ port=5432\n",
+ "+ user='dot_student'\n",
+ "+ password='qgis'\n",
+ "+ table name = 'winequality'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import pg8000\n",
+ "conn = pg8000.connect(host='training.c1erymiua9dx.us-east-1.rds.amazonaws.com', database='training', port=5432, user='dot_student', password='qgis')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "from sklearn import datasets\n",
+ "from sklearn import tree\n",
+ "from sklearn import metrics\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.cross_validation import cross_val_score"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query for the data and create a numpy array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'ph', 'sulphates', 'alcohol', 'color']\n"
+ ]
+ }
+ ],
+ "source": [
+ "cursor = conn.cursor()\n",
+ "cursor.execute(\"SELECT * FROM information_schema.columns WHERE table_name= 'winequality'\")\n",
+ "column_names = []\n",
+ "for row in cursor.fetchall():\n",
+ " column_names.append(row[3])\n",
+ "print(column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "statement = \"SELECT fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, ph, sulphates, alcohol, color FROM winequality\"\n",
+ "cursor.execute(statement)\n",
+ "wine_quality = []\n",
+ "for row in cursor:\n",
+ " wine_quality.append(row)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fixed_acidity | \n",
+ " volatile_acidity | \n",
+ " citric_acid | \n",
+ " residual_sugar | \n",
+ " chlorides | \n",
+ " free_sulfur_dioxide | \n",
+ " total_sulfur_dioxide | \n",
+ " density | \n",
+ " ph | \n",
+ " sulphates | \n",
+ " alcohol | \n",
+ " color | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 7 | \n",
+ " 0.27 | \n",
+ " 0.36 | \n",
+ " 20.7 | \n",
+ " 0.045 | \n",
+ " 45 | \n",
+ " 170 | \n",
+ " 1.001 | \n",
+ " 3 | \n",
+ " 0.45 | \n",
+ " 8.8 | \n",
+ " W | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 6.3 | \n",
+ " 0.3 | \n",
+ " 0.34 | \n",
+ " 1.6 | \n",
+ " 0.049 | \n",
+ " 14 | \n",
+ " 132 | \n",
+ " 0.994 | \n",
+ " 3.3 | \n",
+ " 0.49 | \n",
+ " 9.5 | \n",
+ " W | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 8.1 | \n",
+ " 0.28 | \n",
+ " 0.4 | \n",
+ " 6.9 | \n",
+ " 0.05 | \n",
+ " 30 | \n",
+ " 97 | \n",
+ " 0.9951 | \n",
+ " 3.26 | \n",
+ " 0.44 | \n",
+ " 10.1 | \n",
+ " W | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n",
+ "0 7 0.27 0.36 20.7 0.045 \n",
+ "1 6.3 0.3 0.34 1.6 0.049 \n",
+ "2 8.1 0.28 0.4 6.9 0.05 \n",
+ "\n",
+ " free_sulfur_dioxide total_sulfur_dioxide density ph sulphates alcohol \\\n",
+ "0 45 170 1.001 3 0.45 8.8 \n",
+ "1 14 132 0.994 3.3 0.49 9.5 \n",
+ "2 30 97 0.9951 3.26 0.44 10.1 \n",
+ "\n",
+ " color \n",
+ "0 W \n",
+ "1 W \n",
+ "2 W "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.DataFrame(wine_quality)\n",
+ "df.columns = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'ph', 'sulphates', 'alcohol', 'color']\n",
+ "df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Split the data into features (x) and target (y, the last column in the table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "array = np.array(df)\n",
+ "x = array[:,:11]\n",
+ "y = array[:,11]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Remember you can cast the results into an numpy array and then slice out what you want"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create a decision tree with the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "dt = DecisionTreeClassifier()\n",
+ "dt = dt.fit(x,y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Run 10-fold cross validation on the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0.97538462, 0.98615385, 0.97692308, 0.98153846, 0.98153846,\n",
+ " 0.98307692, 0.97538462, 0.97230769, 0.98459168, 0.97685185])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scores = cross_val_score(dt,x,y,cv=10) \n",
+ "scores"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## If you have time, calculate the feature importance and graph based on the code in the [slides from last class](http://ledeprogram.github.io/algorithms/class9/#21)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['fixed_acidity',\n",
+ " 'volatile_acidity',\n",
+ " 'citric_acid',\n",
+ " 'residual_sugar',\n",
+ " 'chlorides',\n",
+ " 'free_sulfur_dioxide',\n",
+ " 'total_sulfur_dioxide',\n",
+ " 'density',\n",
+ " 'ph',\n",
+ " 'sulphates',\n",
+ " 'alcohol',\n",
+ " 'color']"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "columns = list(df.columns)\n",
+ "columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0, 1)"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEACAYAAABI5zaHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEAVJREFUeJzt3X2MXNdZx/HvszWl4JaINmrQbohxty1BkZIQlRAoLxOC\niVMkXPEHiZO0dRDIQk1aQQtOKi271oJoJRBtUyByMXGLXFI1rUSQEhr3ZYRa6iSFvEHsxFlvN/Zu\nmqq0pcSowrUf/pixdzLxemY247njM9+PNNLcO2fufXx2/ds755y7G5mJJKksY1UXIEnqP8Ndkgpk\nuEtSgQx3SSqQ4S5JBTLcJalAHcM9InZGxHMR8dhp2nw4Ig5ExCMRcWl/S5Qk9aqbK/c7gatXejEi\nrgEmM/MNwFbgjj7VJklapY7hnplfAr59miabgI832z4AnBMR5/WnPEnSavRjzH0CONSyvdjcJ0mq\niBOqklSgNX04xiLw4y3b5zf3vUhE+ItsJGkVMjN6ad/tlXs0H6dyD/B2gIi4AvhOZj53mgJ9ZDI9\nPV15DcPysC/sC/vi9I/V6HjlHhGfAGrAayLiGWAaeHkjp3NHZt4bEW+JiKeBI8BNq6pEktQ3HcM9\nM6/vos3N/SlHktQPTqhWpFarVV3C0LAvltkXy+yLlyZWO56zqpNF5CDPJ0kliAjyDE2oSpLOIoa7\nJBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtS\ngQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXI\ncJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVqKtwj4iNEbE/Ip6KiG2neP01EXFf\nRDwSEY9HxJa+VypJ6lpk5ukbRIwBTwFXAUvAQ8B1mbm/pc008IrMvC0izgWeBM7LzO+3HSs7nU+S\n9EIRQWZGL+/p5sr9cuBAZi5k5lHgLmBTW5uvA69qPn8V8F/twS5JGpw1XbSZAA61bB+mEfitPgp8\nPiKWgFcC1/anPEnSanQT7t24DXg0M6+MiElgT0RcnJnPtzecmZk5+bxWq1Gr1fpUgiSVoV6vU6/X\nX9IxuhlzvwKYycyNze1bgczMD7S0uRf408z8cnP788C2zPxq27Ecc5ekHp2pMfeHgNdHxLqIeDlw\nHXBPW5t9wK82izgPeCNwsJdCJEn903FYJjOPRcTNwP00fhjszMx9EbG18XLuAP4MuDMiHgUC+KPM\n/NaZLFyStLKOwzJ9PZnDMpLUszM1LCNJOssY7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrsk\nFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCtTxD2RL\nOvPm5xeYmtrF4uJxJibGmJ3dwvr166ouS2cx/0C2VLH5+QU2bLidubntwFrgCJOT0+zZc4sBL8A/\nkC2dlaamdrUEO8Ba5ua2MzW1q8KqdLYz3KWKLS4eZznYT1jL0tLxKspRIQx3qWITE2PAkba9Rxgf\n97+nVs/vHqlis7NbmJycZjngG2Pus7NbKqtJZz8nVKUhcGK1zNLSccbHXS2jF1rNhKrhLklDztUy\nkiTAcJekIhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUBdhXtEbIyI/RHxVERsW6FNLSIejoj/\niIgv9rdMSVIvOt6hGhFjwFPAVcAS8BBwXWbub2lzDvCvwK9l5mJEnJuZ3zzFsbxDVZJ6dKbuUL0c\nOJCZC5l5FLgL2NTW5nrg05m5CHCqYJckDU434T4BHGrZPtzc1+qNwKsj4osR8VBEvK1fBUqSetev\nv6G6BrgM+BUaf3XgKxHxlcx8uk/HlyT1oJtwXwQuaNk+v7mv1WHgm5n5PeB7EfEvwCXAi8J9Zmbm\n5PNarUatVuutYkkqXL1ep16vv6RjdDOh+jLgSRoTqs8CDwKbM3NfS5sLgduBjcAPAg8A12bmE23H\nckJVknq0mgnVjlfumXksIm4G7qcxRr8zM/dFxNbGy7kjM/dHxGeBx4BjwI72YJckDY5/rEOShpx/\nrEOSBBjuklQkw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3\nSSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJek\nAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgboK94jY\nGBH7I+KpiNh2mnY/ExFHI+I3+1eiJKlXHcM9IsaAjwBXAxcBmyPiwhXavR/4bL+LlCT1ppsr98uB\nA5m5kJlHgbuATadodwtwN/CNPtYnSVqFbsJ9AjjUsn24ue+kiBgH3pqZfwNE/8qTJK1GvyZUPwi0\njsUb8JJUoTVdtFkELmjZPr+5r9WbgLsiIoBzgWsi4mhm3tN+sJmZmZPPa7UatVqtx5IlqWz1ep16\nvf6SjhGZefoGES8DngSuAp4FHgQ2Z+a+FdrfCfxTZn7mFK9lp/NJkl4oIsjMnkZEOl65Z+axiLgZ\nuJ/GMM7OzNwXEVsbL+eO9rf0UoAkqf86Xrn39WReuUtSz1Zz5e4dqpJUIMNdkgpkuEtSgQx3SSqQ\n4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnu\nklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5J\nBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoHWVF2ARtf8/AJTU7tYXDzOxMQYs7NbWL9+XdVl\nSUWIzOzcKGIj8EEaV/o7M/MDba9fD2xrbv4P8HuZ+fgpjpPdnE/lm59fYMOG25mb2w6sBY4wOTnN\nnj23GPBSm4ggM6OX93QclomIMeAjwNXARcDmiLiwrdlB4Jcy8xLgT4CP9lKERs/U1K6WYAdYy9zc\ndqamdlVYlVSObsbcLwcOZOZCZh4F7gI2tTbIzL2Z+d/Nzb3ARH/LVGkWF4+zHOwnrGVp6XgV5UjF\n6SbcJ4BDLduHOX14/w5w30spSuWbmBgDjrTtPcL4uHP8Uj/0dUI1Iq4EbgJ+YaU2MzMzJ5/XajVq\ntVo/S9BZYnZ2C3v3Tr9ozH129paKK5OqV6/XqdfrL+kYHSdUI+IKYCYzNza3bwXyFJOqFwOfBjZm\n5twKx3JCVSedWC2ztHSc8XFXy0grWc2Eajfh/jLgSeAq4FngQWBzZu5raXMB8HngbZm59zTHMtwl\nqUerCfeOwzKZeSwibgbuZ3kp5L6I2Np4OXcAU8Crgb+OiACOZublvf8TJEn90NU6976dzCt3SerZ\nGVnnLkk6+xjuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtS\ngQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXI\ncJekAhnuklQgw12SCrSm6gIGaX5+gampXSwuHmdiYozZ2S2sX7+u6rIkqe8iMwd3sogc5Plazc8v\nsGHD7czNbQfWAkeYnJxmz55bDHhJQy0iyMzo5T0jMywzNbWrJdgB1jI3t52pqV0VViVJZ8bIhPvi\n4nGWg/2EtSwtHa+iHEk6o0Ym3CcmxoAjbXuPMD4+Ml0gaYSMTLLNzm5hcnKa5YBvjLnPzm6prCZJ\nOlNGZkIVllfLLC0dZ3zc1TJSO1eULRumvljNhOpIhbuklbmibNmw9MWJHzC7d8+cmdUyEbExIvZH\nxFMRsW2FNh+OiAMR8UhEXNpLEZKqNywryubnF7jxxu1ceeU0N964nfn5hYGeH4ajL078gNm9+72r\nen/HcI+IMeAjwNXARcDmiLiwrc01wGRmvgHYCtyx0vGq+mINixPfuJde+o5K+2IY/gOdUK/XKzs3\n2BcnDMOKstZAq9evZPfu97Jhw+0D/5oMQ1+8+AdMjzLztA/gCuC+lu1bgW1tbe4Arm3Z3gecd4pj\nJTyfk5PvyYMHv5aj5uDBr+Xk5HsSnk+YrqwvXlhHVv41mZ6eruS8mfZFqxtumGnphzzZHzfcMFNR\nDdOV1PDiOqrpi1rtj1vOTWaHrG5/dDMsMwEcatk+3Nx3ujaLp2jTNLo3Dw3DR71hqmMY2BfLhmFF\n2TBcMcNw9MWpl293r6LfLTOaNw8NyzfusNQxDOyLZevXr2PPnluYmvrzlhVlg51AXA601q/J4O9H\nGYa+mJ3dwt69082Lj951XC0TEVcAM5m5sbl9K42PCB9oaXMH8MXM/GRzez/wy5n5XNuxXCojSauQ\nPa6W6ebK/SHg9RGxDngWuA7Y3NbmHuCdwCebPwy+0x7sqylOkrQ6HcM9M49FxM3A/TRW1+zMzH0R\nsbXxcu7IzHsj4i0R8TSNz1Q3ndmyJUmnM9CbmCRJgzGwWYpuboQaBRFxfkR8ISL+MyIej4h3VV1T\nlSJiLCL+PSLuqbqWqkXEORHxqYjY1/z++Nmqa6pCRNzW/Pc/FhG7I+LlVdc0SBGxMyKei4jHWvb9\naETcHxFPRsRnI+KcTscZSLh3cyPUCPk+8AeZeRHwc8A7R7gvAN4NPFF1EUPiQ8C9mflTwCU07hcZ\nKc25vd8FfjozL6YxdHxdtVUN3J00srLVrcDnMvMngS8At3U6yKCu3C8HDmTmQmYeBe4CNg3o3EMl\nM7+emY80nz9P4z/wCvcElC0izgfeAvxt1bVULSJ+BPjFzLwTIDO/n5nfrbisKnwX+D9gbUSsAX4Y\nWKq2pMHKzC8B327bvQn4WPP5x4C3djrOoMK9mxuhRk5E/ARwKfBAtZVU5i+BPwSc+IH1wDcj4s7m\nMNWOiPihqosatMz8NvAXwDM0bob8TmZ+rtqqhsJrT6xAzMyvA6/t9IaR+X3uwyYiXgncDby7eQU/\nUiLi14Hnmp9iovkYZWuAy4C/yszLgP+l8VF8pETE64DfB9YB48ArI+L6aqsaSh0viAYV7ovABS3b\n5zf3jaTmx827gb/PzH+sup6KvBn4jYg4CPwDcGVEfLzimqp0GDiUmV9tbt9NI+xHzZuAL2fmtzLz\nGPAZ4OcrrmkYPBcR5wFExI8B3+j0hkGF+8kboZoz39fRuPFpVP0d8ERmfqjqQqqSme/LzAsy83U0\nvh++kJlvr7quqjQ/ch+KiDc2d13FaE40PwlcERGviIig0Q8jN7HMiz/N3gNsaT5/B9DxonAgv1tm\npRuhBnHuYRMRbwZuAB6PiIdpfLx6X2b+c7WVaQi8C9gdET8AHGQEbwbMzEebn+D+DTgGPAzsqLaq\nwYqITwA14DUR8QwwDbwf+FRE/DawAPxWx+N4E5MklccJVUkqkOEuSQUy3CWpQIa7JBXIcJekAhnu\nklQgw12SCmS4S1KB/h+FQThUEJWxmgAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot(dt.feature_importances_, 'o')\n",
+ "plt.ylim(0,1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Use [this tip for getting the column names from your cursor object](http://stackoverflow.com/questions/10252247/how-do-i-get-a-list-of-column-names-from-a-psycopg2-cursor)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#alternative:\n",
+ "\n",
+ "df = pd.read_sql('SELECT * FROM winequality', conn)\n",
+ "wine = df.as_matrix()\n",
+ "\n",
+ "x = wine[:,:-1]\n",
+ "y = wine[:,-1]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/class9/homework/najmabadi_shannon_9_1.ipynb b/class9/homework/najmabadi_shannon_9_1.ipynb
new file mode 100644
index 0000000..c7e9f66
--- /dev/null
+++ b/class9/homework/najmabadi_shannon_9_1.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Assignment 1\n",
+ "\n",
+ "Use the pseudocode you came up with in class to write your own 5-fold cross-validation function that splits the data set into 5 equal-sized sets\n",
+ "Don't forget to shuffle the input before assigning to sets\n",
+ "You can use the fit(), predict(), and score() functions of your model in your functions\n",
+ "Test the results with the sklearn cross_val_score\n",
+ "In your PR, discuss what challenges you had creating this function and if it helped you better understand cross validation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "import random\n",
+ "import numpy as np\n",
+ "from sklearn import tree\n",
+ "from sklearn import metrics\n",
+ "from sklearn import datasets\n",
+ "from sklearn.cross_validation import cross_val_score\n",
+ "\n",
+ "iris = datasets.load_iris()\n",
+ "x = iris.data[:,2:]\n",
+ "y = iris.target"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "a = list(zip(x,y)) #Zip the dataa\n",
+ "random.shuffle(a) #Shuffle the data\n",
+ "x,y = zip(*a) #Unzip the data (*)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Array 1: score of 0.933333333333\n",
+ "Array 2: score of 0.966666666667\n",
+ "Array 3: score of 0.9\n",
+ "Array 4: score of 0.966666666667\n",
+ "Array 5: score of 0.933333333333\n"
+ ]
+ }
+ ],
+ "source": [
+ "number_of_splices = 5 #Say how many splices we're dividing it into\n",
+ "list_length = len(a) #Make a variable name for the length of the list \n",
+ "splice_size = int(list_length / number_of_splices) #Set a variable for the size of each splice\n",
+ "\n",
+ "for i in range(1, number_of_splices + 1): #Loop through the splices\n",
+ " x_test = x[int(splice_size) * (i-1): int(splice_size * i)] #Separate out testing data. If we break this down, we have int: int, which is making smaller arrays in the size of [int, inclusive: int, exclusive]. The ints say where the index should start and end. In the first int, we multiply the splice size by i-1 because we want to start at index 0. \n",
+ " y_test = y[int(splice_size) * (i-1): int(splice_size * i)] \n",
+ " x_train = x[0: int(splice_size * (i-1))] + x[int(splice_size * i): int(list_length)- 1] #Separate out training data\n",
+ " y_train = y[0: int(splice_size * (i-1))] + y[int(splice_size * i): int(list_length) - 1] \n",
+ "\n",
+ " dt = tree.DecisionTreeClassifier().fit(x_train, y_train) \n",
+ " \n",
+ " y_pred = dt.predict(x_test) \n",
+ " score = metrics.accuracy_score(y_test, y_pred)\n",
+ " print(\"Array \" + str(i) + \": score of \" + str(score))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 164,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9447619047619048"
+ ]
+ },
+ "execution_count": 164,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.mean(score_list) #Get the average score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 181,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.95333333333333348"
+ ]
+ },
+ "execution_count": 181,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scores = cross_val_score(dt,x,y,cv=5) #Test it using cross validation function\n",
+ "np.mean(scores) #Compare score means"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/class9/homework/najmabadi_shannon_9_2.ipynb b/class9/homework/najmabadi_shannon_9_2.ipynb
new file mode 100644
index 0000000..d37d9e8
--- /dev/null
+++ b/class9/homework/najmabadi_shannon_9_2.ipynb
@@ -0,0 +1,211 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Assignment 2\n",
+ "\n",
+ "Using the readings, try and create a RandomForestClassifier for the iris dataset\n",
+ "Using a 25/75 training/test split, compare the results with the original decision tree model and describe the result to the best of your ability in your PR"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn import tree\n",
+ "from sklearn import metrics\n",
+ "from sklearn import datasets\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.cross_validation import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "iris = datasets.load_iris()\n",
+ "x = iris.data[:,2:]\n",
+ "y = iris.target"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+ " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+ " min_samples_leaf=1, min_samples_split=2,\n",
+ " min_weight_fraction_leaf=0.0, n_estimators=5, n_jobs=1,\n",
+ " oob_score=False, random_state=42, verbose=0, warm_start=False)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25,train_size=0.75)\n",
+ "forest = RandomForestClassifier(n_estimators=5, random_state=42)\n",
+ "forest.fit(x_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training set accuracy: 1.000000\n",
+ "Testing set accuracy: 0.947368\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Training set accuracy: %f\" % forest.score(x_train, y_train))\n",
+ "print(\"Testing set accuracy: %f\" % forest.score(x_test, y_test))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25, train_size=0.25)\n",
+ "dt = tree.DecisionTreeClassifier()\n",
+ "dt = dt.fit(x_train, y_train)\n",
+ "\n",
+ "def measure_performance(x,y,dt, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):\n",
+ " y_pred=dt.predict(x)\n",
+ " if show_accuracy:\n",
+ " print(\"Accuracy:{0:.3f}\".format(metrics.accuracy_score(y, y_pred)),\"\\n\")\n",
+ " if show_classification_report:\n",
+ " print(\"Classification report\")\n",
+ " print(metrics.classification_report(y,y_pred),\"\\n\")\n",
+ " if show_confussion_matrix:\n",
+ " print(\"Confusion matrix\")\n",
+ " print(metrics.confusion_matrix(y,y_pred),\"\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training set:\n",
+ "Accuracy:1.000 \n",
+ "\n",
+ "Classification report\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 12\n",
+ " 1 1.00 1.00 1.00 12\n",
+ " 2 1.00 1.00 1.00 13\n",
+ "\n",
+ "avg / total 1.00 1.00 1.00 37\n",
+ " \n",
+ "\n",
+ "Confusion matrix\n",
+ "[[12 0 0]\n",
+ " [ 0 12 0]\n",
+ " [ 0 0 13]] \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Training set:\") \n",
+ "measure_performance(x_train, y_train,dt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Testing set:\n",
+ "Accuracy:0.974 \n",
+ "\n",
+ "Classification report\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 13\n",
+ " 1 0.93 1.00 0.96 13\n",
+ " 2 1.00 0.92 0.96 12\n",
+ "\n",
+ "avg / total 0.98 0.97 0.97 38\n",
+ " \n",
+ "\n",
+ "Confusion matrix\n",
+ "[[13 0 0]\n",
+ " [ 0 13 0]\n",
+ " [ 0 1 11]] \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Testing set:\")\n",
+ "measure_performance(x_test,y_test,dt)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}