## Standard Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

# turn off KNN future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


## Read the data
wbc = pd.read_csv("https://remiller1450.github.io/data/wisc_bc.csv")

## Train-test split
from sklearn.model_selection import train_test_split
train, test = train_test_split(wbc, test_size=0.2, random_state=7)

## Separate the target from the predictors and re-label the target
train_y = train['Label'].map({'M': 1, 'B': 0})
test_y = test['Label'].map({'M': 1, 'B': 0})
train_X = train.drop(['ID','Label'], axis = 1)[['Radius', 'Texture']]
test_X = test.drop(['ID','Label'], axis = 1)[['Radius', 'Texture']]


## Scatter plot of radius vs. texture by label
plt.scatter(train_X['Radius'], train_X['Texture'], c = train_y)
plt.show()


## Function to create a grid spanning the min/max values of two variables
def make_grid(x1, x2, s=1):
    x1_min, x1_max = x1.min() - 1, x1.max() + 1  # The +/- will make visualizations look nicer
    x2_min, x2_max = x2.min() - 1, x2.max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, s), np.arange(x2_min, x2_max, s))
    return xx1, xx2

## Function to get predictions from a classifier over a grid of x,y 
def plot_surface(ax, clf, xx1, xx2, **params):
    Z = clf.predict(np.c_[xx1.ravel(), xx2.ravel()])
    Z = Z.reshape(xx1.shape)
    plot = ax.contourf(xx1, xx2, Z, **params)
    return plot


## Train a linear SVM w/ C=1 
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='linear').fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Show prediction surface
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.show()


## Show prediction surface, w/ support vectors plotted in blue
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
ax.scatter(fitted_model.support_vectors_[:,0], fitted_model.support_vectors_[:,1])
plt.show()


## Train a poly SVM w/ C=0.5 
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='poly', degree=3, C=0.5).fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Show prediction surface
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
ax.scatter(fitted_model.support_vectors_[:,0], fitted_model.support_vectors_[:,1])
plt.title("C=0.5")

## Train a poly SVM w/ C=1000
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='poly', degree=3, C=1000).fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Show prediction surface
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
ax.scatter(fitted_model.support_vectors_[:,0], fitted_model.support_vectors_[:,1])
plt.title("C=1000")
plt.show()


## Comparison of weighted vs. non-weighted with imbalanced classes
from IPython.display import Image
Image("C:\\Users\\millerry\\OneDrive - Grinnell College\\Documents\\STA-395_Intro_ML\\Spring24\\Labs\\class_weight.PNG")


## Train a poly SVM w/ gamma = 10
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='poly', C=2, gamma = 10).fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Show prediction surface
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.title("gamma = 10")

## Train a poly SVM w/ gamma = 0.1
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
train_XS = StandardScaler().fit_transform(train_X)
fitted_model = SVC(kernel='poly', C=2, gamma = 0.1).fit(train_XS, train_y)

## Make a grid spanning our two predictors
xg1, xg2 = make_grid(train_XS[:,0], train_XS[:,1], s=0.05)

## Show prediction surface
fig, ax = plt.subplots()
plot_surface(ax, clf = fitted_model, xx1 = xg1, xx2 = xg2)
ax.scatter(train_XS[:,0], train_XS[:,1], c = train_y)
plt.title("gamma = 0.1")
plt.show()


## Read IC home sales data and split into training/test sets
ic = pd.read_csv("https://remiller1450.github.io/data/IowaCityHomeSales.csv")
train_ic, test_ic = train_test_split(ic, test_size=0.2, random_state=7)

## Create X and y
train_ic_y = train_ic['sale.amount']
train_ic_X = StandardScaler().fit_transform(train_ic[['area.living']])

## Train an SVR model
from sklearn.svm import SVR
ic_reg = SVR(kernel = 'linear', C=100000).fit(train_ic_X, train_ic_y)

## Grid of values used to generate the prediction line
xx = np.linspace(np.min(train_ic_X), np.max(train_ic_X), 100)
yy = ic_reg.predict(xx.reshape(-1, 1))

## Scatterplot w/ prediction line
plt.scatter(train_ic_X, train_ic_y)
plt.plot(xx, yy)
plt.show()


## Non-linear kernel (RBF)
ic_reg = SVR(kernel = 'rbf', C=100000).fit(train_ic_X, train_ic_y)

## Grid of values used to generate the prediction line
xx = np.linspace(np.min(train_ic_X), np.max(train_ic_X), 100)
yy = ic_reg.predict(xx.reshape(-1, 1))

## Scatterplot w/ prediction line
plt.scatter(train_ic_X, train_ic_y)
plt.plot(xx, yy)
plt.show()


## Overwrite train/test X to now include all predictors
train_X = train.drop(['ID','Label'], axis = 1)
test_X = test.drop(['ID','Label'], axis = 1)

Lab 8 - Support Vector Machines¶

Part 1 - Choice of kernel function¶

Part 2 - Regularization or "slack"¶

Part 3 - Choice of gamma¶

Part 4 - Support Vector Regression¶

Part 5 - Comparing many models¶