import torch
import torchvision
import string


root = 'C:/Users/millerry/OneDrive - Grinnell College/Documents/surnames/'
Chinese = open(root+'Chinese.txt', encoding='utf-8').read().strip().split('\n')
Japanese = open(root+'Japanese.txt', encoding='utf-8').read().strip().split('\n')
Korean = open(root+'Korean.txt', encoding='utf-8').read().strip().split('\n')
English = open(root+'English.txt', encoding='utf-8').read().strip().split('\n')
Irish = open(root+'Irish.txt', encoding='utf-8').read().strip().split('\n')
Russian = open(root+'Russian.txt', encoding='utf-8').read().strip().split('\n')


## We'll consider all ascii letters plus basic punctuation
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

## Function to iterate through a line of text encode each letter as a 1 x 57 vector in an nchar x 1 x 57 tensor
def nameToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor


## Demonstration of the test name "Aa", notice the "A" is encoded as the 27th position, and "a" is the 1st position
example = nameToTensor('Aa')
print(example)

## Also notice dim1 of the tensor is the number of charactersr in the name
print(example.size())

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]]])
torch.Size([2, 1, 57])


from torch import nn
class my_rnn(nn.Module):
    
    ## Constructor commands
    def __init__(self, input_size, hidden_size, output_size):
        super(my_rnn, self).__init__()

        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    ## Function to generate predictions
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(hidden)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


## Initialize model with random weights
rnn = my_rnn(n_letters, 100, 6)

## Format an example input name (Albert)
test_input = nameToTensor('Albert')

## Provide an initial hidden state (all zeros this time)
hidden = torch.zeros(1, 100)

## Generate output from the RNN
output, next_hidden = rnn(test_input[0], hidden)
print(output)

tensor([[-1.8977, -1.8209, -1.8361, -1.7668, -1.7667, -1.6767]],
       grad_fn=<LogSoftmaxBackward0>)


## Print the top category (predicted class)
output.topk(1)

torch.return_types.topk(
values=tensor([[-1.6767]], grad_fn=<TopkBackward0>),
indices=tensor([[5]]))


## List of categories
category_labels = ['Chinese', 'Japanese', 'Korean', 'English', 'Irish', 'Russian']

## Dictionary of categories and names
category_lines = {'Chinese': Chinese,
                 'Japanese': Japanese,
                 'Korean': Korean,
                 'English': English,
                 'Irish': Irish,
                 'Russian': Russian}


## Function to randomly sample a single example
import random
def randomTrainingExample():
    ## Randomly choose a category index (ie: Chinese, etc.)
    category = category_labels[random.randint(0, len(category_labels)-1)]
    
    ## Randomly choose a name in that category
    name = category_lines[category][random.randint(0, len(category_lines[category])-1)]
    
    ## Convert the chosen example to a tensor
    category_tensor = torch.tensor([category_labels.index(category)], dtype=torch.long)
    line_tensor = nameToTensor(name)
    
    return category, name, category_tensor, line_tensor

## Try it out
randomTrainingExample()

('Korean',
 'Gwang ',
 tensor([2]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 1., 0., 0., 0., 0.]]]))


## Set learning rate
learning_rate = 0.005

## Define cost func
cost_fn = nn.CrossEntropyLoss()

## Training function for a single input (name category, name)
def train(category_tensor, line_tensor):
    
    ## initialize the hidden state
    hidden = rnn.initHidden()
    
    ## set the gradient to zero
    rnn.zero_grad()

    ## loop through the letters in the input, getting a prediction and new hidden state each time
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    ## Calculate cost and gradients
    cost = cost_fn(output, category_tensor)
    cost.backward()

    # Update parameters
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha = -learning_rate) ## This adds the LR times the gradient to each parameter 

    ## Return the output and cost
    return output, cost.item()


## Initializations
n_iters = 10000
cost_every_n = 25
current_cost = 0
track_cost = []

### Iteratively update model from randomly chosen example
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, cost = train(category_tensor, line_tensor)
    current_cost += cost
    
    # Save cost every 25 iterations
    if iter % cost_every_n == 0:
        track_cost.append(current_cost/cost_every_n)
        current_cost = 0


import matplotlib.pyplot as plt
plt.plot(track_cost)
plt.show()


def predict(input_line, n_predictions=4):
    print('\n> %s' % input_line)
    
    ## Don't update gradient with any of these examples
    with torch.no_grad():
        
        ## Initialize new hidden state
        hidden = rnn.initHidden()
        
        ## Convert input str to tensor
        input_t = nameToTensor(input_line)
 
        ## Pass each character into `rnn`
        for i in range(input_t.size()[0]):
            output, hidden = rnn(input_t[i], hidden)

        # Get top N categories from output
        topv, topi = output.topk(n_predictions, 1, True)
        predictions = []

        ## Go through the category predictions and save info for printing
        for i in range(n_predictions):
            value = topv[0][i].item()
            category_index = topi[0][i].item()
            print('(%.2f) %s' % (value, category_labels[category_index]))
            predictions.append([value, category_labels[category_index]])

## Try it out on a few examples:
predict('Dovesky')
predict('Miller')
predict('Satoshi')
predict('ABCDEFGHIJKLMNOP')

> Dovesky
(-0.47) Russian
(-1.11) English
(-3.20) Irish
(-5.11) Japanese

> Miller
(-0.34) English
(-1.93) Russian
(-2.00) Irish
(-5.77) Japanese

> Satoshi
(-0.25) Japanese
(-2.33) Russian
(-2.52) English
(-3.23) Irish

> ABCDEFGHIJKLMNOP
(-0.68) English
(-0.78) Russian
(-3.47) Irish
(-5.80) Japanese


n_categories = len(category_labels)
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1


from torch import nn

class my_gen_rnn(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(my_gen_rnn, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


from IPython.display import HTML
HTML('<img src="https://i.imgur.com/jzVrf7f.png">')


def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

def outputTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) 
    return torch.LongTensor(letter_indexes)

def categoryTensor(category):
    li = category_labels.index(category)
    tensor = torch.zeros(1, n_categories)
    tensor[0][li] = 1
    return tensor


# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

# Get a random category and random line from that category
def randomTrainingPair():
    category = randomChoice(category_labels)
    line = randomChoice(category_lines[category])
    return category, line

# Make category, input, and target tensors from a random category, line pair
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = outputTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor


## Try it out
randomTrainingExample()

(tensor([[1., 0., 0., 0., 0., 0.]]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]]]),
 tensor([20,  0, 13, 58]))


max_length = 15
gen_rnn = my_gen_rnn(n_letters, 128, n_letters)

# Sample using a given category and starting letter
def sample(category, start_letter):
    
    ## We are just sampling, so we don't want to store info used in gradient calculations
    with torch.no_grad(): 
        category_tensor = categoryTensor(category)  ## create category tensor of input category
        input = inputTensor(start_letter)           ## intialize input tensor as an encoding of the start letter
        hidden = gen_rnn.initHidden()               ## reset the initial hidden state
        output_name = start_letter                  ## Use start letter as first piece of the output name
        
        ## Loop until reaching the max length or the stop character 
        for i in range(max_length):
            output, hidden = gen_rnn(category_tensor, input[0], hidden)  ## Get the next output and hidden state
            topv, topi = output.topk(1)                                  ## Identify the top predicted character's value and index position
            topi = topi[0][0]                                            ## Extract integer id of predicted char
            if topi == n_letters - 1:                                    ## Stop if its the stop character's ID
                break
            else:
                letter = all_letters[topi]                               ## Convert integer id to the character
                output_name += letter                                    ## Add this character to the output 
            input = inputTensor(letter)                                  ## Prep this letter as the next input

        return output_name


sample('English', 'B')

'BRiPTPiPTPiPTPiP'


cost_fn = nn.CrossEntropyLoss()
gen_rnn = my_gen_rnn(n_letters, 128, n_letters)
learning_rate = 0.001

def train(category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor.unsqueeze_(-1)
    hidden = gen_rnn.initHidden()

    gen_rnn.zero_grad()
    cost = 0

    for i in range(input_line_tensor.size(0)):
        output, hidden = gen_rnn(category_tensor, input_line_tensor[i], hidden)
        l = cost_fn(output, target_line_tensor[i])
        cost += l

    cost.backward()

    for p in gen_rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, cost.item() / input_line_tensor.size(0)


n_iters = 10000
cost_every_n = 25
current_cost = 0
track_cost = []

for iter in range(1, n_iters + 1):
    cat, il, ol = randomTrainingExample()
    if -1 in ol:                               ### If an example happens to contain an unusual character we'll skip it
        continue  
    output, cost = train(cat, il, ol)
    current_cost += cost
    
    # Save the cost every 25 iterations
    if iter % cost_every_n == 0:
        track_cost.append(current_cost/cost_every_n)
        current_cost = 0


plt.plot(track_cost)
plt.show()


test_letter = 'M'
print('Korean:',sample('Korean', test_letter), 
      '\nJapanese:', sample('Japanese', test_letter),
      '\nChinese:', sample('Chinese', test_letter),
      '\nEnglish:', sample('English', test_letter),
      '\nIrish:', sample('Irish', test_letter),
      '\nRussian:', sample('Russian', test_letter))

Korean: Mon 
Japanese: Mana 
Chinese: Man 
English: Mane 
Irish: Manan 
Russian: Manakov

Lab 10 - Introduction to Reccurent Neural Networks¶

Part 1 - Data Preparation¶

Question #1¶

Part 2 - Model Architecture¶

Question #2¶

Part 3 - Training¶

Question #3¶

Part 4 - Using the Model¶

Question #4¶

Part 5 - Creating a Generative RNN¶

Question #5¶

Question #6¶