import torch
import torchvision
import string


root = 'C:/Users/millerry/OneDrive - Grinnell College/Documents/surnames/'
Chinese = open(root+'Chinese.txt', encoding='utf-8').read().strip().split('\n')
Japanese = open(root+'Japanese.txt', encoding='utf-8').read().strip().split('\n')
Korean = open(root+'Korean.txt', encoding='utf-8').read().strip().split('\n')
English = open(root+'English.txt', encoding='utf-8').read().strip().split('\n')
Irish = open(root+'Irish.txt', encoding='utf-8').read().strip().split('\n')
Russian = open(root+'Russian.txt', encoding='utf-8').read().strip().split('\n')


## We'll consider all ascii letters plus basic punctuation
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

## Function to iterate through a line of text encode each letter as a 1 x 57 vector in an nchar x 1 x 57 tensor
def nameToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor


## Demonstration of the test name "Aa", notice the "A" is encoded as the 27th position, and "a" is the 1st position
example = nameToTensor('Aa')
print(example)

## Notice the dimensions of the output and where 1's appear
print(example.size())

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]]])
torch.Size([2, 1, 57])


from torch import nn
class my_rnn(nn.Module):
    
    ## Constructor commands
    def __init__(self, input_size, hidden_size, output_size):
        super(my_rnn, self).__init__()

        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    
    ## Function to generate predictions
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(hidden)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


## Initialize model with random weights
h_size = 100
rnn = my_rnn(n_letters, h_size, 6)

## Format an example input name (Albert)
test_input = nameToTensor('Albert')

## Provide an initial hidden state (all zeros as an example)
hidden = torch.zeros(1, h_size)

## Generate output from the RNN
output, next_hidden = rnn(test_input[0], hidden)
print(output)
print(next_hidden)

tensor([[-1.7671, -1.7803, -1.8638, -1.7610, -1.7642, -1.8184]],
       grad_fn=<LogSoftmaxBackward0>)
tensor([[ 0.0279,  0.0997,  0.0669, -0.0740,  0.0734, -0.0247,  0.0918, -0.0393,
         -0.0519, -0.0655,  0.0390,  0.0533,  0.0768, -0.0368,  0.1451, -0.1040,
          0.0086, -0.0298, -0.0467,  0.0262, -0.0342,  0.1191,  0.0169, -0.0956,
         -0.0450, -0.0689, -0.1303, -0.0004, -0.0771,  0.0377, -0.0166,  0.0728,
          0.1355, -0.0284,  0.0032,  0.0716, -0.0305, -0.0089,  0.0056,  0.0286,
          0.1051, -0.0363,  0.0083,  0.0122, -0.0552,  0.0127, -0.1028,  0.0086,
         -0.0755, -0.0582, -0.0622, -0.0276,  0.0176,  0.0867, -0.0195,  0.0708,
          0.1206, -0.0892,  0.1336, -0.0294,  0.1311, -0.0191, -0.1166,  0.0508,
         -0.0424,  0.1239,  0.0770, -0.0385, -0.0518,  0.0393, -0.0180, -0.0700,
          0.0412, -0.0844,  0.0238,  0.0569,  0.0029, -0.0004,  0.0342,  0.0089,
         -0.0638,  0.0079,  0.0153,  0.1120,  0.0331, -0.0466,  0.1128,  0.1140,
         -0.1246,  0.0625, -0.0129,  0.0414,  0.1342, -0.0621, -0.1112, -0.0609,
         -0.0932,  0.0053, -0.0452,  0.0416]], grad_fn=<AddmmBackward0>)


## Print the top category (predicted class)
output.topk(1)

torch.return_types.topk(
values=tensor([[-1.7610]], grad_fn=<TopkBackward0>),
indices=tensor([[3]]))


## List of categories
category_labels = ['Chinese', 'Japanese', 'Korean', 'English', 'Irish', 'Russian']

## Dictionary of categories and names
category_lines = {'Chinese': Chinese,
                 'Japanese': Japanese,
                 'Korean': Korean,
                 'English': English,
                 'Irish': Irish,
                 'Russian': Russian}


## Function to randomly sample a single example
import random
def randomTrainingExample():
    ## Randomly choose a category index (ie: Chinese, etc.)
    category = category_labels[random.randint(0, len(category_labels)-1)]
    
    ## Randomly choose a name in that category
    name = category_lines[category][random.randint(0, len(category_lines[category])-1)]
    
    ## Convert the chosen example to a tensor
    category_tensor = torch.tensor([category_labels.index(category)], dtype=torch.long)
    line_tensor = nameToTensor(name)
    
    return category, name, category_tensor, line_tensor

## Try it out
randomTrainingExample()

('Russian',
 'Zhvanetsky',
 tensor([5]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           1., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]]]))


## Set learning rate
learning_rate = 0.005

## Define cost func
cost_fn = nn.CrossEntropyLoss()

## Training function for a single input (name category, name)
def train(category_tensor, line_tensor):
    
    ## initialize the hidden state
    hidden = rnn.initHidden()
    
    ## set the gradient to zero
    rnn.zero_grad()

    ## loop through the letters in the input, getting a prediction and new hidden state each time
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    ## Calculate cost and gradients
    cost = cost_fn(output, category_tensor)
    cost.backward()

    # Update parameters
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha = -learning_rate) ## This adds the LR times the gradient to each parameter 

    ## Return the output and cost
    return output, cost.item()


## Initializations
n_iters = 10000
cost_every_n = 25
current_cost = 0
track_cost = []

### Iteratively update model from randomly chosen example
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, cost = train(category_tensor, line_tensor)
    current_cost += cost
    
    # Save cost every 25 iterations
    if iter % cost_every_n == 0:
        track_cost.append(current_cost/cost_every_n)
        current_cost = 0


import matplotlib.pyplot as plt
plt.plot(track_cost)
plt.show()


def predict(input_line, n_predictions=4):
    print('\n> %s' % input_line)
    
    ## Don't update gradient with any of these examples
    with torch.no_grad():
        
        ## Initialize new hidden state
        hidden = rnn.initHidden()
        
        ## Convert input str to tensor
        input_t = nameToTensor(input_line)
 
        ## Pass each character into `rnn`
        for i in range(input_t.size()[0]):
            output, hidden = rnn(input_t[i], hidden)

        # Get top N categories from output
        topv, topi = output.topk(n_predictions, 1, True)
        predictions = []

        ## Go through the category predictions and save info for printing
        for i in range(n_predictions):
            value = topv[0][i].item()
            category_index = topi[0][i].item()
            print('(%.2f) %s' % (value, category_labels[category_index]))
            predictions.append([value, category_labels[category_index]])

## Try it out on a few examples:
predict('Dovesky')
predict('Miller')
predict('Satoshi')
predict('ABCDEFGHIJKLMNOP')

> Dovesky
(-0.21) Russian
(-2.11) English
(-3.35) Japanese
(-3.49) Irish

> Miller
(-0.45) English
(-1.70) Irish
(-1.78) Russian
(-4.28) Japanese

> Satoshi
(-0.04) Japanese
(-3.78) Russian
(-4.80) Irish
(-5.52) English

> ABCDEFGHIJKLMNOP
(-0.66) Russian
(-1.26) English
(-2.11) Japanese
(-3.40) Irish


## Set up number of category labels and number of letters (plus 1 for a stop char)
n_categories = len(category_labels)
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1


from torch import nn

class my_gen_rnn(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(my_gen_rnn, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


from IPython.display import HTML
HTML('<img src="https://i.imgur.com/jzVrf7f.png">')


def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

def outputTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) 
    return torch.LongTensor(letter_indexes)

def categoryTensor(category):
    li = category_labels.index(category)
    tensor = torch.zeros(1, n_categories)
    tensor[0][li] = 1
    return tensor


# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

# Get a random category and random line from that category
def randomTrainingPair():
    category = randomChoice(category_labels)
    line = randomChoice(category_lines[category])
    return category, line

# Make category, input, and target tensors from a random category, line pair
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = outputTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor


## Try it out
randomTrainingExample()

(tensor([[1., 0., 0., 0., 0., 0.]]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0.]]]),
 tensor([ 0, 13,  6, 58]))


max_length = 15
gen_rnn = my_gen_rnn(n_letters, 128, n_letters)

# Sample using a given category and starting letter
def sample(category, start_letter):
    
    ## We are just sampling, so we don't want to store info used in gradient calculations
    with torch.no_grad(): 
        category_tensor = categoryTensor(category)  ## create category tensor of input category
        input = inputTensor(start_letter)           ## intialize input tensor as an encoding of the start letter
        hidden = gen_rnn.initHidden()               ## reset the initial hidden state
        output_name = start_letter                  ## Use start letter as first piece of the output name
        
        ## Loop until reaching the max length or the stop character 
        for i in range(max_length):
            output, hidden = gen_rnn(category_tensor, input[0], hidden)  ## Get the next output and hidden state
            topv, topi = output.topk(1)                                  ## Identify the top predicted character's value and index position
            topi = topi[0][0]                                            ## Extract integer id of predicted char
            if topi == n_letters - 1:                                    ## Stop if its the stop character's ID
                break
            else:
                letter = all_letters[topi]                               ## Convert integer id to the character
                output_name += letter                                    ## Add this character to the output 
            input = inputTensor(letter)                                  ## Prep this letter as the next input

        return output_name


sample('English', 'B')

'BqqqqqqqqqqSqqSq'


cost_fn = nn.CrossEntropyLoss()
gen_rnn = my_gen_rnn(n_letters, 256, n_letters)
learning_rate = 0.001

def train(category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor.unsqueeze_(-1)
    hidden = gen_rnn.initHidden()

    gen_rnn.zero_grad()
    cost = 0

    for i in range(input_line_tensor.size(0)):
        output, hidden = gen_rnn(category_tensor, input_line_tensor[i], hidden)
        l = cost_fn(output, target_line_tensor[i])
        cost += l

    cost.backward()

    for p in gen_rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, cost.item() / input_line_tensor.size(0)


n_iters = 20000
cost_every_n = 25
current_cost = 0
track_cost = []

for iter in range(1, n_iters + 1):
    cat, il, ol = randomTrainingExample()
    if -1 in ol:                               ### If an example happens to contain an unusual character we'll skip it
        continue  
    output, cost = train(cat, il, ol)
    current_cost += cost
    
    # Save the cost every 25 iterations
    if iter % cost_every_n == 0:
        track_cost.append(current_cost/cost_every_n)
        current_cost = 0


import matplotlib.pyplot as plt
plt.plot(track_cost)
plt.show()


test_letter = 'Br'
print('Korean:',sample('Korean', test_letter), 
      '\nJapanese:', sample('Japanese', test_letter),
      '\nChinese:', sample('Chinese', test_letter),
      '\nEnglish:', sample('English', test_letter),
      '\nIrish:', sample('Irish', test_letter),
      '\nRussian:', sample('Russian', test_letter))

Korean: Bron 
Japanese: Braka 
Chinese: Bran 
English: Brare 
Irish: Branghan 
Russian: Braranov

Introduction to PyTorch - Part 4 (recurrent neural networks)¶

Part 1 - Data Preparation¶

Part 2 - Model Architecture¶

Part 3 - Training¶

Part 4 - Using the Model¶

Part 5 - Introduction to Generative RNNs¶

Part 6 - Training and Using or Generative RNN¶