import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import math


## Read the data
wbc = pd.read_csv("https://remiller1450.github.io/data/wisc_bc.csv")

## Train-test split
from sklearn.model_selection import train_test_split
train, test = train_test_split(wbc, test_size=0.2, random_state=7)

## Separate the target from the predictors and re-label the target
train_y = train['Label'].map({'M': 1, 'B': 0})
test_y = test['Label'].map({'M': 1, 'B': 0})
train_X = train.drop(['ID','Label'], axis = 1)[['Radius', 'Texture']]
test_X = test.drop(['ID','Label'], axis = 1)[['Radius', 'Texture']]


## Scatter plot of radius vs. texture by label
plt.scatter(train_X['Radius'], train_X['Texture'], c = train_y, label = train_y)
plt.xlabel('Radius')
plt.ylabel('Texture')
plt.show()


## Function to make a grid spanning the min/max values of two variables
def make_grid(x1, x2, s=1):
    x1_min, x1_max = x1.min() - 1, x1.max() + 1  # The +/- 1 makes visualizations less tight
    x2_min, x2_max = x2.min() - 1, x2.max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, s), np.arange(x2_min, x2_max, s))
    return xx1, xx2

## Function to get predictions from a classifier over a grid of x,y 
def plot_surface(ax, mod, xx1, xx2):
    Z = mod.predict(np.c_[xx1.ravel(), xx2.ravel()])
    Z = Z.reshape(xx1.shape)
    plot = ax.contourf(xx1, xx2, Z)
    return plot


## Train a linear SVM w/ C=1 
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='linear').fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Prediction boundary
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2) # Show the surface
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)      # Add the data points on top of the surface
plt.show()


## Show prediction surface, w/ support vectors plotted in blue
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
ax.scatter(fitted_model.support_vectors_[:,0], fitted_model.support_vectors_[:,1])
plt.show()


## Example #1 - lower value of C
fitted_model = SVC(kernel='poly', C = 0.1).fit(train_XS, train_y)
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.show()


## Example #2 - higher value of C
fitted_model = SVC(kernel='poly', C = 100).fit(train_XS, train_y)
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.show()


## Example #1 - lower value of gamma
fitted_model = SVC(kernel='poly', gamma = 0.1).fit(train_XS, train_y)
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.show()


## Example #1 - higher value of gamma
fitted_model = SVC(kernel='poly', gamma = 10).fit(train_XS, train_y)
fig, ax = plt.subplots()
plot_surface(ax, mod = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.show()


## Read IC home sales data and split into training/test sets
ic = pd.read_csv("https://remiller1450.github.io/data/IowaCityHomeSales.csv")
train_ic, test_ic = train_test_split(ic, test_size=0.2, random_state=7)

## Create X and y
train_ic_y = train_ic['sale.amount']
train_ic_X = StandardScaler().fit_transform(train_ic[['area.living']])

## Train an SVR model
from sklearn.svm import SVR
ic_reg = SVR(kernel = 'linear', C=100000).fit(train_ic_X, train_ic_y)

## Grid of values used to generate the prediction line
xx = np.linspace(np.min(train_ic_X), np.max(train_ic_X), 100)
yy = ic_reg.predict(xx.reshape(-1, 1))

## Scatterplot w/ prediction line
plt.scatter(train_ic_X, train_ic_y)
plt.plot(xx, yy)
plt.show()


## Polynomial model
ic_reg = SVR(kernel = 'poly', C=100000).fit(train_ic_X, train_ic_y)
plt.scatter(train_ic_X, train_ic_y)
plt.plot(xx, yy)
plt.show()

Lab 7 - Support Vector Machines¶

Part 1 - Kernel Choice¶

Part 2 - Regularization (slack)¶

Part 3 - Choosing `gamma`¶

Part 4 - Support Vector Regression¶

Lab 7 - Support Vector Machines¶

Part 1 - Kernel Choice¶

Part 2 - Regularization (slack)¶

Part 3 - Choosing gamma¶

Part 4 - Support Vector Regression¶

Part 3 - Choosing `gamma`¶