{"id":326527,"date":"2021-07-16T03:00:11","date_gmt":"2021-07-16T03:00:11","guid":{"rendered":"http:\/\/savepearlharbor.com\/?p=326527"},"modified":"-0001-11-30T00:00:00","modified_gmt":"-0001-11-29T21:00:00","slug":"","status":"publish","type":"post","link":"https:\/\/savepearlharbor.com\/?p=326527","title":{"rendered":"\u041c\u043e\u0434\u0435\u043b\u0438 \u0433\u043b\u0443\u0431\u043e\u043a\u0438\u0445 \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439 sequence-to-sequence \u043d\u0430 PyTorch (\u0427\u0430\u0441\u0442\u044c 4)"},"content":{"rendered":"\n<div class=\"post__text post__text_v2\" id=\"post-content-body\">\n<h2>4 &#8212; \u0423\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430, \u0432\u044b\u0432\u043e\u0434 \u0438 \u043c\u0435\u0442\u043e\u0434 \u043e\u0446\u0435\u043d\u043a\u0438 BLEU<\/h2>\n<p>\u0412 \u044d\u0442\u043e\u0439 \u0447\u0430\u0441\u0442\u0438 \u043c\u044b \u0434\u043e\u0431\u0430\u0432\u0438\u043c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439 \u2014 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0438 \u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430 \u2014 \u043a \u043c\u043e\u0434\u0435\u043b\u0438 \u0438\u0437 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e \u0440\u0430\u0437\u0434\u0435\u043b\u0430. \u0423\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442\u0441\u044f, \u0447\u0442\u043e\u0431\u044b \u0441\u043e\u043e\u0431\u0449\u0438\u0442\u044c \u043d\u0430\u0448\u0435\u0439 RNN, \u0447\u0442\u043e \u043d\u0443\u0436\u043d\u043e \u043f\u0440\u043e\u043f\u0443\u0441\u043a\u0430\u0442\u044c \u043c\u0430\u0440\u043a\u0435\u0440\u044b \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u043d\u0430\u0448\u0435\u043c \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a\u0435. \u041c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430 \u044f\u0432\u043d\u043e \u0437\u0430\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u043c\u043e\u0434\u0435\u043b\u044c \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f, \u0442\u0430\u043a\u0438\u0435 \u043a\u0430\u043a \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043a \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430\u043c \u0441 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435\u043c. \u041e\u0431\u0430 \u044d\u0442\u0438 \u043c\u0435\u0442\u043e\u0434\u0430 \u043e\u0431\u044b\u0447\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442\u0441\u044f \u0432 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0435 \u0435\u0441\u0442\u0435\u0441\u0442\u0432\u0435\u043d\u043d\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u0430 (NLP).<\/p>\n<p>\u041a\u0440\u043e\u043c\u0435 \u0442\u043e\u0433\u043e, \u043c\u044b \u0440\u0430\u0441\u0441\u043c\u043e\u0442\u0440\u0438\u043c \u043a\u0430\u043a \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043d\u0430\u0448\u0443 \u043c\u043e\u0434\u0435\u043b\u044c \u0434\u043b\u044f \u0432\u044b\u0432\u043e\u0434\u0430 \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f, \u0434\u0430\u0432\u0430\u044f \u0435\u0439 \u0432\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u0432\u0438\u0434\u044f \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u0435\u0451 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0430, \u0438 \u0432\u044b\u044f\u0441\u043d\u044f\u044f, \u043d\u0430 \u0447\u0442\u043e \u0438\u043c\u0435\u043d\u043d\u043e \u043e\u043d\u0430 \u043e\u0431\u0440\u0430\u0449\u0430\u0435\u0442 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043f\u0440\u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0435 \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u043b\u043e\u0432\u0430.<\/p>\n<p>\u041d\u0430\u043a\u043e\u043d\u0435\u0446, \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043c\u0435\u0442\u0440\u0438\u043a\u0443 BLEU \u0434\u043b\u044f \u0438\u0437\u043c\u0435\u0440\u0435\u043d\u0438\u044f \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0430 \u043d\u0430\u0448\u0438\u0445 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u043e\u0432.<\/p>\n<p>\u041a\u0430\u043a \u0438 \u0440\u0430\u043d\u0435\u0435, \u0435\u0441\u043b\u0438 \u0432\u0438\u0437\u0443\u0430\u043b\u044c\u043d\u044b\u0439 \u0444\u043e\u0440\u043c\u0430\u0442 \u043f\u043e\u0441\u0442\u0430 \u0432\u0430\u0441 \u043d\u0435 \u0443\u0434\u043e\u0432\u043b\u0435\u0442\u0432\u043e\u0440\u044f\u0435\u0442, \u0442\u043e \u043d\u0438\u0436\u0435 \u0441\u0441\u044b\u043b\u043a\u0438 \u043d\u0430 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u0443\u044e \u0438 \u0440\u0443\u0441\u0441\u043a\u0443\u044e \u0432\u0435\u0440\u0441\u0438\u044e jupyter notebook:<\/p>\n<p><a href=\"https:\/\/github.com\/bentrevett\/pytorch-seq2seq\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">\u0418\u0441\u0445\u043e\u0434\u043d\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f<\/a> <a href=\"https:\/\/colab.research.google.com\/github\/bentrevett\/pytorch-seq2seq\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">Open jupyter notebook In Colab<\/a><\/p>\n<p><a href=\"https:\/\/github.com\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">\u0420\u0443\u0441\u0441\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f<\/a> <a href=\"https:\/\/colab.research.google.com\/github\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">Open jupyter notebook In Colab<\/a><\/p>\n<p><strong>\u0417\u0430\u043c\u0435\u0447\u0430\u043d\u0438\u0435<\/strong>: \u0440\u0443\u0441\u0441\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f jupyter notebook \u043e\u0442\u043b\u0438\u0447\u0430\u0435\u0442\u0441\u044f \u043e\u0442 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0439 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043d\u044b\u043c \u0432 \u043a\u043e\u043d\u0446\u0435 \u0442\u0435\u0441\u0442\u043e\u043c \u043d\u0430 \u0438\u043d\u0432\u0435\u0440\u0441\u0438\u044e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f. \u041e\u0447\u0435\u043d\u044c \u043f\u0440\u043e\u0441\u0442\u0430\u044f \u0434\u043b\u044f \u0447\u0435\u043b\u043e\u0432\u0435\u043a\u0430 \u0437\u0430\u0434\u0430\u0447\u0430, \u043d\u043e, \u043f\u043e\u0440\u043e\u0439, \u043d\u0435\u043f\u0440\u0435\u043e\u0434\u043e\u043b\u0438\u043c\u0430 \u0434\u043b\u044f \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0441\u0438\u0441\u0442\u0435\u043c.<\/p>\n<h3>\u041f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043a\u0430 \u0434\u0430\u043d\u043d\u044b\u0445<\/h3>\n<p>\u0421\u043d\u0430\u0447\u0430\u043b\u0430 \u043c\u044b \u0438\u043c\u043f\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u0432\u0441\u0435 \u043c\u043e\u0434\u0443\u043b\u0438, \u043a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u0441 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u043c \u043c\u043e\u0434\u0443\u043b\u0435\u0439 <code>matplotlib<\/code>, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u0445 \u0434\u043b\u044f \u043f\u0440\u043e\u0441\u043c\u043e\u0442\u0440\u0430 \u0432\u0435\u043a\u0442\u043e\u0440\u0430 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F  from torchtext.legacy.datasets import Multi30k from torchtext.legacy.data import Field, BucketIterator, TabularDataset  import matplotlib.pyplot as plt import matplotlib.ticker as ticker  import spacy import numpy as np  import random import math import time<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0443\u0441\u0442\u0430\u043d\u043e\u0432\u0438\u043c \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0435 \u043d\u0430\u0447\u0430\u043b\u044c\u043d\u043e\u0435 \u0447\u0438\u0441\u043b\u043e \u0434\u043b\u044f \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u043c\u043e\u0441\u0442\u0438.<\/p>\n<pre><code>SEED = 1234  random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True<\/code><\/pre>\n<p>\u0417\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u0435 \u043d\u0435\u043c\u0435\u0446\u043a\u0443\u044e \u0438 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u0443\u044e \u043c\u043e\u0434\u0435\u043b\u0438 spaCy.<\/p>\n<pre><code class=\"python\">python -m spacy download en_core_web_sm python -m spacy download de_core_news_sm<\/code><\/pre>\n<p>\u0414\u043b\u044f \u0437\u0430\u0433\u0440\u0443\u0437\u043a\u0438 \u0432 Google Colab \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0435 \u043a\u043e\u043c\u0430\u043d\u0434\u044b (\u041f\u043e\u0441\u043b\u0435 \u0437\u0430\u0433\u0440\u0443\u0437\u043a\u0438 \u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0441\u0442\u0438\u0442\u0435 colab runtime! \u041d\u0430\u0438\u0431\u044b\u0441\u0442\u0440\u0435\u0439\u0448\u0438\u0439 \u0441\u043f\u043e\u0441\u043e\u0431 \u0447\u0435\u0440\u0435\u0437 \u043a\u043e\u0440\u043e\u0442\u043a\u0443\u044e \u043a\u043e\u043c\u0430\u0434\u0443\uff1a <strong>Ctrl + M + .<\/strong>):<\/p>\n<pre><code class=\"python\">!pip install -U spacy==3.0 !python -m spacy download en_core_web_sm !python -m spacy download de_core_news_sm<\/code><\/pre>\n<p>\u041a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u043c\u044b \u0438\u043c\u043f\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c spaCy \u0438 \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0438\u043c \u0442\u043e\u043a\u0435\u043d\u0438\u0437\u0430\u0442\u043e\u0440\u044b \u0434\u043b\u044f \u043d\u0435\u043c\u0435\u0446\u043a\u043e\u0433\u043e \u0438 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u043e\u0432.<\/p>\n<pre><code class=\"python\">spacy_de = spacy.load('de_core_news_sm') spacy_en = spacy.load('en_core_web_sm')<\/code><\/pre>\n<pre><code class=\"python\">def tokenize_de(text):     \"\"\"     Tokenizes German text from a string into a list of strings     \"\"\"     return [tok.text for tok in spacy_de.tokenizer(text)]  def tokenize_en(text):     \"\"\"     Tokenizes English text from a string into a list of strings     \"\"\"     return [tok.text for tok in spacy_en.tokenizer(text)]<\/code><\/pre>\n<p>\u041f\u0440\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0438 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439 \u043d\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u0441\u043e\u043e\u0431\u0449\u0438\u0442\u044c PyTorch, \u043a\u0430\u043a\u043e\u0432\u0430 \u0434\u043b\u0438\u043d\u0430 \u0444\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 (\u043d\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445) \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439. \u041a \u0441\u0447\u0430\u0441\u0442\u044c\u044e \u0434\u043b\u044f \u043d\u0430\u0441, \u043e\u0431\u044a\u0435\u043a\u0442\u044b TorchText <code>Field<\/code> \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u044e\u0442 \u043d\u0430\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442<code> include_lengths<\/code>, \u044d\u0442\u043e \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u0442 \u043a \u0442\u043e\u043c\u0443, \u0447\u0442\u043e \u043d\u0430\u0448 <code>batch.src<\/code> \u0431\u0443\u0434\u0435\u0442 \u043a\u043e\u0440\u0442\u0435\u0436\u0435\u043c. \u041f\u0435\u0440\u0432\u044b\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u043a\u043e\u0440\u0442\u0435\u0436\u0430 \u0442\u0430\u043a\u043e\u0439 \u0436\u0435, \u043a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u043f\u0430\u043a\u0435\u0442 \u0447\u0438\u0441\u043b\u043e\u0432\u044b\u0445 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0442\u0435\u043d\u0437\u043e\u0440\u0430, \u0430 \u0432\u0442\u043e\u0440\u043e\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u2014 \u044d\u0442\u043e \u0434\u043b\u0438\u043d\u044b \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439 \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0432 \u043f\u0430\u043a\u0435\u0442\u0435.<\/p>\n<pre><code class=\"python\">SRC = Field(tokenize = tokenize_de,              init_token = '&lt;sos&gt;',              eos_token = '&lt;eos&gt;',              lower = True,              include_lengths = True)  TRG = Field(tokenize = tokenize_en,              init_token = '&lt;sos&gt;',              eos_token = '&lt;eos&gt;',              lower = True)<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0437\u0430\u0433\u0440\u0443\u0436\u0430\u0435\u043c \u0434\u0430\u043d\u043d\u044b\u0435.<\/p>\n<pre><code class=\"python\">train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'),                                                      fields = (SRC, TRG))<\/code><\/pre>\n<p>\u0418 \u0437\u0430\u043f\u043e\u043b\u043d\u044f\u0435\u043c \u0441\u043b\u043e\u0432\u0430\u0440\u044c.<\/p>\n<pre><code class=\"python\">SRC.build_vocab(train_data, min_freq = 2) TRG.build_vocab(train_data, min_freq = 2)<\/code><\/pre>\n<p>\u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u043f\u043e\u0434\u0433\u043e\u0442\u0430\u0432\u043b\u0438\u0432\u0430\u0435\u043c \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u044b.<\/p>\n<p>\u041e\u0434\u043d\u0430 \u0438\u0437 \u043e\u0441\u043e\u0431\u0435\u043d\u043d\u043e\u0441\u0442\u0435\u0439 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439 \u0441 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f\u043c\u0438 \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u0432\u0441\u0435 \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u044b (\u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f) \u0432 \u043f\u0430\u043a\u0435\u0442\u0435 \u0434\u043e\u043b\u0436\u043d\u044b \u0431\u044b\u0442\u044c \u043e\u0442\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u044b \u043f\u043e \u0438\u0445 \u0434\u043b\u0438\u043d\u0435 \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439 \u0432 \u043f\u043e\u0440\u044f\u0434\u043a\u0435 \u0443\u0431\u044b\u0432\u0430\u043d\u0438\u044f, \u0442.\u0435. \u043f\u0435\u0440\u0432\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0432 \u043f\u0430\u043a\u0435\u0442\u0435 \u0434\u043e\u043b\u0436\u043d\u043e \u0431\u044b\u0442\u044c \u0441\u0430\u043c\u044b\u043c \u0434\u043b\u0438\u043d\u043d\u044b\u043c. \u041c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0434\u0432\u0430 \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u0430 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0430 \u0434\u043b\u044f \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u044d\u0442\u043e\u0433\u043e: <code>sort_within_batch<\/code>, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0441\u043e\u043e\u0431\u0449\u0430\u0435\u0442 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0443, \u0447\u0442\u043e \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u043c\u043e\u0435 \u043f\u0430\u043a\u0435\u0442\u0430 \u043d\u0435\u043e\u0431\u0445\u043e\u0434\u0438\u043c\u043e \u043e\u0442\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c, \u0438 <code>sort_key<\/code> &#8212; \u0444\u0443\u043d\u043a\u0446\u0438\u044e, \u043a\u043e\u0442\u043e\u0440\u0430\u044f \u0441\u043e\u043e\u0431\u0449\u0430\u0435\u0442 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0443, \u043a\u0430\u043a \u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u044b \u0432 \u043f\u0430\u043a\u0435\u0442\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u0441\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u043f\u043e \u0434\u043b\u0438\u043d\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f <code>src<\/code>.<\/p>\n<pre><code class=\"python\">BATCH_SIZE = 128  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  train_iterator, valid_iterator, test_iterator = BucketIterator.splits(     (train_data, valid_data, test_data),       batch_size = BATCH_SIZE,      sort_within_batch = True,      sort_key = lambda x : len(x.src),      device = device)<\/code><\/pre>\n<h3>\u041f\u043e\u0441\u0442\u0440\u043e\u0435\u043d\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438<\/h3>\n<h4>\u041a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a<\/h4>\n<p>\u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u043c \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a.<\/p>\n<p>\u0412\u0441\u0435 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u0437\u0434\u0435\u0441\u044c \u043f\u0440\u043e\u0438\u0441\u0445\u043e\u0434\u044f\u0442 \u0432 \u043c\u0435\u0442\u043e\u0434\u0435 <code>forward<\/code>. \u0422\u0435\u043f\u0435\u0440\u044c \u043e\u043d \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0435\u0442 \u0434\u043b\u0438\u043d\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0438 \u0441\u0430\u043c\u0438 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f.<\/p>\n<p>\u041f\u043e\u0441\u043b\u0435 \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 (\u0430\u0432\u0442\u043e\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u043e\u0435 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u043e\u043c) \u0431\u044b\u043b\u043e \u043f\u0440\u043e\u043f\u0443\u0449\u0435\u043d\u043e \u0447\u0435\u0440\u0435\u0437 \u044d\u043c\u0431\u0435\u0434\u0434\u0438\u043d\u0433, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043b\u044f \u043d\u0435\u0433\u043e <code>pack_padded_sequence<\/code> \u0441 \u0434\u043b\u0438\u043d\u0430\u043c\u0438 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439. \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435, \u0447\u0442\u043e \u0442\u0435\u043d\u0437\u043e\u0440, \u0441\u043e\u0434\u0435\u0440\u0436\u0430\u0449\u0438\u0439 \u0434\u043b\u0438\u043d\u044b \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439, \u0434\u043e\u043b\u0436\u0435\u043d \u0431\u044b\u0442\u044c \u0442\u0435\u043d\u0437\u043e\u0440\u043e\u043c CPU \u0432 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0439 \u0432\u0435\u0440\u0441\u0438\u0438 PyTorch, \u0447\u0442\u043e \u043c\u044b \u044f\u0432\u043d\u043e \u0434\u0435\u043b\u0430\u0435\u043c \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e <code>to ('cpu')<\/code>. <code>Packaged_embedded<\/code> \u0431\u0443\u0434\u0435\u0442 \u043d\u0430\u0448\u0435\u0439 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e. \u0417\u0430\u0442\u0435\u043c \u043f\u0435\u0440\u0435\u0434\u0430\u0451\u043c \u0432 \u043d\u0430\u0448\u0443 RNN \u043a\u0430\u043a \u043e\u0431\u044b\u0447\u043d\u043e, \u0438 \u043e\u043d\u0430 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 <code>packed_outputs<\/code>, \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0439 \u0442\u0435\u043d\u0437\u043e\u0440, \u0441\u043e\u0434\u0435\u0440\u0436\u0430\u0449\u0438\u0439 \u0432\u0441\u0435 \u0441\u043a\u0440\u044b\u0442\u044b\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438\u0437 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u0438 <code>hidden<\/code> &#8212; \u044d\u0442\u043e \u043f\u0440\u043e\u0441\u0442\u043e \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0435 \u0441\u043a\u0440\u044b\u0442\u043e\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0438\u0437 \u043d\u0430\u0448\u0435\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438. <code>hidden<\/code> \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u043d\u044b\u0439 \u0442\u0435\u043d\u0437\u043e\u0440 \u0438 \u043d\u0438\u043a\u0430\u043a \u043d\u0435 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0439, \u0435\u0434\u0438\u043d\u0441\u0442\u0432\u0435\u043d\u043d\u0430\u044f \u0440\u0430\u0437\u043d\u0438\u0446\u0430 \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 \u0431\u044b\u043b\u0430 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u0430\u044f \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c, \u044d\u0442\u043e\u0442 \u0442\u0435\u043d\u0437\u043e\u0440 \u0438\u0437 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0433\u043e <strong>\u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430 \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439<\/strong> \u0432 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438.<\/p>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0440\u0430\u0441\u043f\u0430\u043a\u043e\u0432\u044b\u0432\u0430\u0435\u043c \u043d\u0430\u0448\u0438 <code>packed_outputs<\/code> \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c <code>pad_packed_sequence<\/code> \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 <code>outputs<\/code> \u0438 \u0434\u043b\u0438\u043d\u044b \u043a\u0430\u0436\u0434\u043e\u0433\u043e, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u043d\u0430\u043c \u043d\u0435 \u043d\u0443\u0436\u043d\u044b.<\/p>\n<p>\u041f\u0435\u0440\u0432\u043e\u0435 \u0438\u0437\u043c\u0435\u0440\u0435\u043d\u0438\u0435 <code>outputs<\/code> &#8212; \u044d\u0442\u043e \u0434\u043b\u0438\u043d\u044b \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439.  \u041e\u0434\u043d\u0430\u043a\u043e \u0438\u0437-\u0437\u0430 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u043e\u0439 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0442\u0435\u043d\u0437\u043e\u0440\u043e\u0432 \u0431\u0443\u0434\u0443\u0442 \u043d\u0443\u043b\u0435\u0432\u044b\u043c\u0438, \u0432 \u0441\u043b\u0443\u0447\u0430\u0435 \u043a\u043e\u0433\u0434\u0430 \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 \u0431\u044b\u043b \u043c\u0430\u0440\u043a\u0435\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">class Encoder(nn.Module):     def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):         super().__init__()                  self.embedding = nn.Embedding(input_dim, emb_dim)                  self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional = True)                  self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)                  self.dropout = nn.Dropout(dropout)              def forward(self, src, src_len):                  #src = [src len, batch size]         #src_len = [batch size]                  embedded = self.dropout(self.embedding(src))                  #embedded = [src len, batch size, emb dim]                          #need to explicitly put lengths on cpu!         packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, src_len.to('cpu'))                          packed_outputs, hidden = self.rnn(packed_embedded)                                           #packed_outputs is a packed sequence containing all hidden states         #hidden is now from the final non-padded element in the batch                      outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs)                       #outputs is now a non-packed sequence, all hidden states obtained         #  when the input is a pad token are all zeros                      #outputs = [src len, batch size, hid dim * num directions]         #hidden = [n layers * num directions, batch size, hid dim]                  #hidden is stacked [forward_1, backward_1, forward_2, backward_2, ...]         #outputs are always from the last layer                  #hidden [-2, :, : ] is the last of the forwards RNN          #hidden [-1, :, : ] is the last of the backwards RNN                  #initial decoder hidden is final hidden state of the forwards and backwards          #  encoder RNNs fed through a linear layer         hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))                  #outputs = [src len, batch size, enc hid dim * 2]         #hidden = [batch size, dec hid dim]                  return outputs, hidden<\/code><\/pre>\n<h4>\u0412\u043d\u0438\u043c\u0430\u043d\u0438\u0435<\/h4>\n<p>\u041c\u043e\u0434\u0443\u043b\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u2014 \u044d\u0442\u043e \u0442\u0430 \u0447\u0430\u0441\u0442\u044c, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u043c\u044b \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u043f\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c\u0443 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044e.<\/p>\n<p>\u0420\u0430\u043d\u0435\u0435 \u043c\u044b \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u043b\u0438 \u044d\u0442\u043e\u043c\u0443 \u043c\u043e\u0434\u0443\u043b\u044e \u00ab\u043e\u0431\u0440\u0430\u0449\u0430\u0442\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435\u00bb \u043d\u0430 \u0442\u043e\u043a\u0435\u043d\u044b \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0438. \u041e\u0434\u043d\u0430\u043a\u043e, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f <em>\u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0443<\/em>, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0437\u0430\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043a\u043e\u043d\u0446\u0435\u043d\u0442\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c\u0441\u044f \u0442\u043e\u043b\u044c\u043a\u043e \u043d\u0430 \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430\u0445, \u043d\u0435 \u044f\u0432\u043b\u044f\u044e\u0449\u0438\u0435\u0441\u044f \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435\u043c.<\/p>\n<p>\u041c\u0435\u0442\u043e\u0434 <code>forward<\/code> \u0442\u0435\u043f\u0435\u0440\u044c \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0435\u0442 \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 <code>mask<\/code>. \u042d\u0442\u043e \u0442\u0435\u043d\u0437\u043e\u0440 <strong>[batch size, source sentence length]<\/strong>, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0440\u0430\u0432\u0435\u043d 1, \u0435\u0441\u043b\u0438 \u0442\u043e\u043a\u0435\u043d \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u043d\u0435 \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0442\u043e\u043a\u0435\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f, \u0438 0, \u043a\u043e\u0433\u0434\u0430 \u044d\u0442\u043e \u043c\u0430\u0440\u043a\u0435\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f. \u041d\u0430\u043f\u0440\u0438\u043c\u0435\u0440, \u0435\u0441\u043b\u0438 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 [&#171;hello&#187;, &#171;how&#187;, &#171;are&#187;, &#171;you&#187;, &#171;?&#187;, <code>&lt;pad&gt;<\/code>, <code>&lt;pad&gt;<\/code>], \u0442\u043e \u043c\u0430\u0441\u043a\u0430 \u0431\u0443\u0434\u0435\u0442 [1, 1, 1, 1, 1, 0, 0].<\/p>\n<p>\u041c\u044b \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u0435\u043c \u043c\u0430\u0441\u043a\u0443 \u043f\u043e\u0441\u043b\u0435 \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u0431\u044b\u043b\u043e \u0440\u0430\u0441\u0441\u0447\u0438\u0442\u0430\u043d\u043e, \u043d\u043e \u0434\u043e \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u043e\u043d\u043e \u0431\u0443\u0434\u0435\u0442 \u043d\u043e\u0440\u043c\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043e \u0444\u0443\u043d\u043a\u0446\u0438\u0435\u0439 <code>softmax<\/code>. \u042d\u0442\u043e \u0440\u0435\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043e \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c <code>masked_fill<\/code>. \u0422\u0435\u043d\u0437\u043e\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u044f\u0435\u0442\u0441\u044f \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430, \u0434\u043b\u044f \u043a\u043e\u0442\u043e\u0440\u043e\u0433\u043e \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0435\u0442\u0441\u044f \u0443\u0441\u043b\u043e\u0432\u0438\u0435 \u0434\u043b\u044f \u043f\u0435\u0440\u0432\u043e\u0433\u043e \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u0430 <code>mask == 0<\/code>, \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c, \u0437\u0430\u0434\u0430\u043d\u043d\u044b\u043c \u0432\u0442\u043e\u0440\u044b\u043c \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u043e\u043c <code> -1e10<\/code>. \u0414\u0440\u0443\u0433\u0438\u043c\u0438 \u0441\u043b\u043e\u0432\u0430\u043c\u0438, \u043e\u043d \u043f\u0440\u0438\u043c\u0435\u0442 \u043d\u0435\u043d\u043e\u0440\u043c\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u0438 \u0438\u0437\u043c\u0435\u043d\u0438\u0442 \u0438\u0445 \u0434\u043b\u044f \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u043e\u0432 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u043d\u0430<code>-1e10<\/code>. \u041f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u044d\u0442\u0438 \u0447\u0438\u0441\u043b\u0430 \u0431\u0443\u0434\u0443\u0442 \u043c\u0438\u0437\u0435\u0440\u043d\u044b\u043c\u0438 \u043f\u043e \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044e \u0441 \u0434\u0440\u0443\u0433\u0438\u043c\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f\u043c\u0438, \u043e\u043d\u0438 \u0441\u0442\u0430\u043d\u0443\u0442 \u0440\u0430\u0432\u043d\u044b\u043c\u0438 \u043d\u0443\u043b\u044e \u043f\u0440\u0438 \u043f\u0440\u043e\u0445\u043e\u0436\u0434\u0435\u043d\u0438\u0438 \u0447\u0435\u0440\u0435\u0437 \u0441\u043b\u043e\u0439 <code>softmax<\/code>, \u0447\u0442\u043e \u0433\u0430\u0440\u0430\u043d\u0442\u0438\u0440\u0443\u0435\u0442 \u043e\u0442\u0441\u0443\u0442\u0441\u0442\u0432\u0438\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u043a \u043c\u0430\u0440\u043a\u0435\u0440\u0430\u043c \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0438.<\/p>\n<pre><code class=\"python\">class Attention(nn.Module):     def __init__(self, enc_hid_dim, dec_hid_dim):         super().__init__()                  self.attn = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)         self.v = nn.Linear(dec_hid_dim, 1, bias = False)              def forward(self, hidden, encoder_outputs, mask):                  #hidden = [batch size, dec hid dim]         #encoder_outputs = [src len, batch size, enc hid dim * 2]                  batch_size = encoder_outputs.shape[1]         src_len = encoder_outputs.shape[0]                  #repeat decoder hidden state src_len times         hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)            encoder_outputs = encoder_outputs.permute(1, 0, 2)                  #hidden = [batch size, src len, dec hid dim]         #encoder_outputs = [batch size, src len, enc hid dim * 2]                  energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2)))                   #energy = [batch size, src len, dec hid dim]          attention = self.v(energy).squeeze(2)                  #attention = [batch size, src len]                  attention = attention.masked_fill(mask == 0, -1e10)                  return F.softmax(attention, dim = 1)<\/code><\/pre>\n<h4>\u0414\u0435\u043a\u043e\u0434\u0435\u0440<\/h4>\n<p>\u0412 \u0434\u0435\u043a\u043e\u0434\u0435\u0440\u0435 \u043d\u0443\u0436\u043d\u043e \u0432\u043d\u0435\u0441\u0442\u0438 \u043b\u0438\u0448\u044c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043d\u0435\u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u0439. \u0415\u043c\u0443 \u043d\u0435\u043e\u0431\u0445\u043e\u0434\u0438\u043c\u043e \u043f\u0440\u0438\u043d\u044f\u0442\u044c \u043c\u0430\u0441\u043a\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0438 \u043f\u0435\u0440\u0435\u0434\u0430\u0442\u044c \u0435\u0435 \u043c\u043e\u0434\u0443\u043b\u044e \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f. \u041f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u043c\u044b \u0445\u043e\u0442\u0438\u043c \u043f\u0440\u043e\u0441\u043c\u043e\u0442\u0440\u0435\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u0432\u043e \u0432\u0440\u0435\u043c\u044f \u0432\u044b\u0432\u043e\u0434\u0430, \u043c\u044b \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u043c \u0438 \u0442\u0435\u043d\u0437\u043e\u0440 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">class Decoder(nn.Module):     def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):         super().__init__()          self.output_dim = output_dim         self.attention = attention                  self.embedding = nn.Embedding(output_dim, emb_dim)                  self.rnn = nn.GRU((enc_hid_dim * 2) + emb_dim, dec_hid_dim)                  self.fc_out = nn.Linear((enc_hid_dim * 2) + dec_hid_dim + emb_dim, output_dim)                  self.dropout = nn.Dropout(dropout)              def forward(self, input, hidden, encoder_outputs, mask):                       #input = [batch size]         #hidden = [batch size, dec hid dim]         #encoder_outputs = [src len, batch size, enc hid dim * 2]         #mask = [batch size, src len]                  input = input.unsqueeze(0)                  #input = [1, batch size]                  embedded = self.dropout(self.embedding(input))                  #embedded = [1, batch size, emb dim]                  a = self.attention(hidden, encoder_outputs, mask)                          #a = [batch size, src len]                  a = a.unsqueeze(1)                  #a = [batch size, 1, src len]                  encoder_outputs = encoder_outputs.permute(1, 0, 2)                  #encoder_outputs = [batch size, src len, enc hid dim * 2]                  weighted = torch.bmm(a, encoder_outputs)                  #weighted = [batch size, 1, enc hid dim * 2]                  weighted = weighted.permute(1, 0, 2)                  #weighted = [1, batch size, enc hid dim * 2]                  rnn_input = torch.cat((embedded, weighted), dim = 2)                  #rnn_input = [1, batch size, (enc hid dim * 2) + emb dim]                      output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))                  #output = [seq len, batch size, dec hid dim * n directions]         #hidden = [n layers * n directions, batch size, dec hid dim]                  #seq len, n layers and n directions will always be 1 in this decoder, therefore:         #output = [1, batch size, dec hid dim]         #hidden = [1, batch size, dec hid dim]         #this also means that output == hidden         assert (output == hidden).all()                  embedded = embedded.squeeze(0)         output = output.squeeze(0)         weighted = weighted.squeeze(0)                  prediction = self.fc_out(torch.cat((output, weighted, embedded), dim = 1))                  #prediction = [batch size, output dim]                  return prediction, hidden.squeeze(0), a.squeeze(1)<\/code><\/pre>\n<h4>Seq2Seq<\/h4>\n<p>\u041e\u0431\u0449\u0430\u044f \u043c\u043e\u0434\u0435\u043b\u044c seq2seq \u043d\u0443\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u0432 \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f\u0445 \u0434\u043b\u044f \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439, \u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f \u0438 \u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0432\u044b\u0432\u043e\u0434\u0430.<\/p>\n<p>\u041d\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u0441\u043e\u043e\u0431\u0449\u0438\u0442\u044c \u043c\u043e\u0434\u0435\u043b\u0438, \u043a\u0430\u043a\u043e\u0432\u044b \u0438\u043d\u0434\u0435\u043a\u0441\u044b \u0434\u043b\u044f \u0442\u043e\u043a\u0435\u043d\u0430 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f, \u0430 \u0442\u0430\u043a\u0436\u0435 \u043f\u0435\u0440\u0435\u0434\u0430\u0442\u044c \u0434\u043b\u0438\u043d\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0432\u0445\u043e\u0434\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u043c\u0435\u0442\u043e\u0434 <code>forward<\/code>.<\/p>\n<p>\u041c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0438\u043d\u0434\u0435\u043a\u0441 \u0442\u043e\u043a\u0435\u043d\u0430 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0434\u043b\u044f \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f \u043c\u0430\u0441\u043e\u043a, \u0441\u043e\u0437\u0434\u0430\u0432\u0430\u044f \u0442\u0435\u043d\u0437\u043e\u0440 \u043c\u0430\u0441\u043a\u0438, \u0440\u0430\u0432\u043d\u044b\u0439 1, \u0432\u0435\u0437\u0434\u0435, \u0433\u0434\u0435 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u043d\u0435 \u0440\u0430\u0432\u043d\u043e \u0442\u043e\u043a\u0435\u043d\u0443 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f. \u0412\u0441\u0435 \u044d\u0442\u043e \u0434\u0435\u043b\u0430\u0435\u0442\u0441\u044f \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u0444\u0443\u043d\u043a\u0446\u0438\u0438 <code>create_mask<\/code>.<\/p>\n<p>\u0414\u043b\u0438\u043d\u044b \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u043d\u0435\u043e\u0431\u0445\u043e\u0434\u0438\u043c\u043e \u043f\u0435\u0440\u0435\u0434\u0430\u0442\u044c \u043a\u043e\u0434\u0435\u0440\u0443 \u0434\u043b\u044f \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439.<\/p>\n<p>\u0412\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043d\u0430 \u043a\u0430\u0436\u0434\u043e\u043c \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u043c \u0448\u0430\u0433\u0435 \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u0442\u0441\u044f \u0432 <code>attentions<\/code><\/p>\n<pre><code class=\"python\">class Seq2Seq(nn.Module):     def __init__(self, encoder, decoder, src_pad_idx, device):         super().__init__()                  self.encoder = encoder         self.decoder = decoder         self.src_pad_idx = src_pad_idx         self.device = device              def create_mask(self, src):         mask = (src != self.src_pad_idx).permute(1, 0)         return mask              def forward(self, src, src_len, trg, teacher_forcing_ratio = 0.5):                  #src = [src len, batch size]         #src_len = [batch size]         #trg = [trg len, batch size]         #teacher_forcing_ratio is probability to use teacher forcing         #e.g. if teacher_forcing_ratio is 0.75 we use teacher forcing 75% of the time                              batch_size = src.shape[1]         trg_len = trg.shape[0]         trg_vocab_size = self.decoder.output_dim                  #tensor to store decoder outputs         outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)                  #encoder_outputs is all hidden states of the input sequence, back and forwards         #hidden is the final forward and backward hidden states, passed through a linear layer         encoder_outputs, hidden = self.encoder(src, src_len)                          #first input to the decoder is the &lt;sos&gt; tokens         input = trg[0,:]                  mask = self.create_mask(src)          #mask = [batch size, src len]                          for t in range(1, trg_len):                          #insert input token embedding, previous hidden state, all encoder hidden states              #  and mask             #receive output tensor (predictions) and new hidden state             output, hidden, _ = self.decoder(input, hidden, encoder_outputs, mask)                          #place predictions in a tensor holding predictions for each token             outputs[t] = output                          #decide if we are going to use teacher forcing or not             teacher_force = random.random() &lt; teacher_forcing_ratio                          #get the highest predicted token from our predictions             top1 = output.argmax(1)                           #if teacher forcing, use actual next token as next input             #if not, use predicted token             input = trg[t] if teacher_force else top1                      return outputs<\/code><\/pre>\n<h3>\u041e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438 Seq2Seq<\/h3>\n<p>\u0417\u0430\u0442\u0435\u043c \u0438\u043d\u0438\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u0443\u0435\u043c \u043c\u043e\u0434\u0435\u043b\u044c \u0438 \u0440\u0430\u0437\u043c\u0435\u0449\u0430\u0435\u043c \u0435\u0435 \u043d\u0430 GPU.<\/p>\n<pre><code class=\"python\">INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]  attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)  model = Seq2Seq(enc, dec, SRC_PAD_IDX, device).to(device)<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0438\u043d\u0438\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u0443\u0435\u043c \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u044b \u043c\u043e\u0434\u0435\u043b\u0438.<\/p>\n<pre><code class=\"python\">def init_weights(m):     for name, param in m.named_parameters():         if 'weight' in name:             nn.init.normal_(param.data, mean=0, std=0.01)         else:             nn.init.constant_(param.data, 0)              model.apply(init_weights)<\/code><\/pre>\n<p>\u041c\u044b \u0440\u0430\u0441\u043f\u0435\u0447\u0430\u0442\u0430\u0435\u043c \u043a\u043e\u043b\u0438\u0447\u0435\u0441\u0442\u0432\u043e \u043e\u0431\u0443\u0447\u0430\u0435\u043c\u044b\u0445 \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u043e\u0432 \u0432 \u043c\u043e\u0434\u0435\u043b\u0438, \u0437\u0430\u043c\u0435\u0442\u0438\u0432, \u0447\u0442\u043e \u043e\u043d\u0430 \u0438\u043c\u0435\u0435\u0442 \u0442\u043e\u0447\u043d\u043e \u0442\u0430\u043a\u043e\u0435 \u0436\u0435 \u043a\u043e\u043b\u0438\u0447\u0435\u0441\u0442\u0432\u043e \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u043e\u0432, \u0447\u0442\u043e \u0438 \u043c\u043e\u0434\u0435\u043b\u044c \u0431\u0435\u0437 \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439.<\/p>\n<pre><code class=\"python\">def count_parameters(model):     return sum(p.numel() for p in model.parameters() if p.requires_grad)  print(f'The model has {count_parameters(model):,} trainable parameters')<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u043c \u043d\u0430\u0448 \u043e\u043f\u0442\u0438\u043c\u0438\u0437\u0430\u0442\u043e\u0440 \u0438 \u043a\u0440\u0438\u0442\u0435\u0440\u0438\u0439 \u043e\u043f\u0442\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0438.<\/p>\n<p><code>ignore_index<\/code> \u0434\u043b\u044f \u043a\u0440\u0438\u0442\u0435\u0440\u0438\u044f \u0434\u043e\u043b\u0436\u0435\u043d \u0431\u044b\u0442\u044c \u0438\u043d\u0434\u0435\u043a\u0441\u043e\u043c \u0442\u043e\u043a\u0435\u043d\u0430 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0434\u043b\u044f \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u0430, \u043d\u043e \u043d\u0435 \u0434\u043b\u044f \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e.<\/p>\n<pre><code class=\"python\">optimizer = optim.Adam(model.parameters())<\/code><\/pre>\n<pre><code class=\"python\">TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]  criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)<\/code><\/pre>\n<p>\u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0438\u043c \u043d\u0430\u0448\u0438 \u0446\u0438\u043a\u043b\u044b \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0438 \u043e\u0446\u0435\u043d\u043a\u0438.<\/p>\n<p>\u041f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u043c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c <code>include_lengths = True<\/code> \u0434\u043b\u044f \u043d\u0430\u0448\u0435\u0433\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u043e\u043b\u044f,<code> batch.src<\/code> \u0442\u0435\u043f\u0435\u0440\u044c \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043a\u043e\u0440\u0442\u0435\u0436\u0435\u043c, \u043f\u0435\u0440\u0432\u044b\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u043a\u043e\u0442\u043e\u0440\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0431\u043e\u0439 \u0447\u0438\u0441\u043b\u043e\u0432\u043e\u0439 \u0442\u0435\u043d\u0437\u043e\u0440, \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u044e\u0449\u0438\u0439 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u0430 \u0432\u0442\u043e\u0440\u043e\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u2014 \u0434\u043b\u0438\u043d\u044b \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0432 \u043f\u0430\u043a\u0435\u0442\u0435.<\/p>\n<p>\u041d\u0430\u0448\u0430 \u043c\u043e\u0434\u0435\u043b\u044c \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442, \u0432 \u0442\u043e\u043c \u0447\u0438\u0441\u043b\u0435, \u0432\u0435\u043a\u0442\u043e\u0440\u044b \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u0434\u043b\u044f \u0431\u0430\u0442\u0447\u0430 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u043d\u0430 \u043a\u0430\u0436\u0434\u043e\u043c \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u043c \u0448\u0430\u0433\u0435 \u0434\u0435\u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f. \u041c\u044b \u043d\u0435 \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0438\u0445 \u0432\u043e \u0432\u0440\u0435\u043c\u044f \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0438 \u043e\u0446\u0435\u043d\u043a\u0438 \u0442\u043e\u0447\u043d\u043e\u0441\u0442\u0438 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f, \u043d\u043e \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0438\u0445 \u043f\u043e\u0437\u0436\u0435 \u0434\u043b\u044f \u0432\u044b\u0432\u043e\u0434\u0430.<\/p>\n<p>\u0414\u043b\u044f \u043e\u0442\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u044f \u043f\u0440\u043e\u0433\u0440\u0435\u0441\u0441\u0430 \u0432 Google Colab \u0443\u0441\u0442\u0430\u043d\u0430\u0432\u043b\u0438\u0432\u0430\u0435\u043c tensorboard<\/p>\n<pre><code class=\"python\">!pip install tensorboardX<\/code><\/pre>\n<pre><code class=\"python\">from tensorboardX import SummaryWriter import datetime<\/code><\/pre>\n<pre><code class=\"python\">%load_ext tensorboard<\/code><\/pre>\n<pre><code class=\"python\">def train(model, iterator, optimizer, criterion, clip):          model.train()          epoch_loss = 0          for i, batch in enumerate(iterator):                  src, src_len = batch.src         trg = batch.trg                  optimizer.zero_grad()                  output = model(src, src_len, trg)                  #trg = [trg len, batch size]         #output = [trg len, batch size, output dim]                  output_dim = output.shape[-1]                  output = output[1:].view(-1, output_dim)         trg = trg[1:].view(-1)                  #trg = [(trg len - 1) * batch size]         #output = [(trg len - 1) * batch size, output dim]                  loss = criterion(output, trg)                  loss.backward()                  torch.nn.utils.clip_grad_norm_(model.parameters(), clip)                  optimizer.step()                  epoch_loss += loss.item()              return epoch_loss \/ len(iterator)<\/code><\/pre>\n<pre><code class=\"python\">def evaluate(model, iterator, criterion):          model.eval()          epoch_loss = 0          with torch.no_grad():              for i, batch in enumerate(iterator):              src, src_len = batch.src             trg = batch.trg              output = model(src, src_len, trg, 0) #turn off teacher forcing                          #trg = [trg len, batch size]             #output = [trg len, batch size, output dim]              output_dim = output.shape[-1]                          output = output[1:].view(-1, output_dim)             trg = trg[1:].view(-1)              #trg = [(trg len - 1) * batch size]             #output = [(trg len - 1) * batch size, output dim]              loss = criterion(output, trg)              epoch_loss += loss.item()              return epoch_loss \/ len(iterator)<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0438\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u044e \u0434\u043b\u044f \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u0438\u044f \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u044d\u043f\u043e\u0445.<\/p>\n<pre><code class=\"python\">def epoch_time(start_time, end_time):     elapsed_time = end_time - start_time     elapsed_mins = int(elapsed_time \/ 60)     elapsed_secs = int(elapsed_time - (elapsed_mins * 60))     return elapsed_mins, elapsed_secs<\/code><\/pre>\n<p>\u0417\u0430\u043f\u0443\u0441\u043a\u0430\u0435\u043c tensorboard. \u041e\u0442\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435 \u0434\u0430\u043d\u043d\u044b\u0445 \u043d\u0430\u0447\u043d\u0451\u0442\u0441\u044f \u043f\u043e\u0441\u043b\u0435 \u0437\u0430\u043f\u0443\u0441\u043a\u0430 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f. \u041e\u0431\u043d\u043e\u0432\u0438\u0442\u044c \u0434\u0430\u043d\u043d\u044b\u0435 \u043c\u043e\u0436\u043d\u043e \u043a\u043b\u0438\u043a\u043d\u0443\u0432 \u043f\u043e \u0438\u043a\u043e\u043d\u043a\u0435 \u0441\u043f\u0440\u0430\u0432\u0430 \u0432\u0432\u0435\u0440\u0445\u0443<\/p>\n<pre><code class=\"python\">!rm -rf .\/logs\/ %tensorboard --logdir runs<\/code><\/pre>\n<p>\u041f\u0440\u0435\u0434\u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0438\u0439 \u0448\u0430\u0433 \u2014 \u043e\u0431\u0443\u0447\u0438\u0442\u044c \u043d\u0430\u0448\u0443 \u043c\u043e\u0434\u0435\u043b\u044c. \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435, \u0447\u0442\u043e \u044d\u0442\u043e \u0437\u0430\u043d\u0438\u043c\u0430\u0435\u0442 \u0442\u043e\u043b\u044c\u043a\u043e \u043f\u043e\u0447\u0442\u0438 \u043f\u043e\u043b\u043e\u0432\u0438\u043d\u0443 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u043f\u043e \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044e \u0441 \u043d\u0430\u0448\u0435\u0439 \u043c\u043e\u0434\u0435\u043b\u044c\u044e \u0431\u0435\u0437 \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439, \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043d\u044b\u0445 \u0432 \u044d\u0442\u043e\u0442 \u0440\u0430\u0437\u0434\u0435\u043b.<\/p>\n<pre><code class=\"python\">N_EPOCHS = 10 CLIP = 1  best_valid_loss = float('inf') writer = SummaryWriter()  for epoch in range(N_EPOCHS):          start_time = time.time()          train_loss = train(model, train_iterator, optimizer, criterion, CLIP)     valid_loss = evaluate(model, valid_iterator, criterion)          end_time = time.time()          epoch_mins, epoch_secs = epoch_time(start_time, end_time)          if valid_loss &lt; best_valid_loss:         best_valid_loss = valid_loss         torch.save(model.state_dict(), 'tut4-model.pt')          print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')     print(f'\\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')     print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')     writer.add_scalar(\"Train Loss\", train_loss, epoch+1)     writer.add_scalar(\"Train PPL\", math.exp(train_loss), epoch+1)     writer.add_scalar(\"Val. Loss\", valid_loss, epoch+1)     writer.add_scalar(\"Val. PPL\", math.exp(valid_loss), epoch+1)  writer.close()<\/code><\/pre>\n<p>\u041d\u0430\u043a\u043e\u043d\u0435\u0446, \u043c\u044b \u0437\u0430\u0433\u0440\u0443\u0436\u0430\u0435\u043c \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u044b \u0438\u0437 \u043d\u0430\u0448\u0435\u0439 \u043b\u0443\u0447\u0448\u0435\u0439 \u043c\u043e\u0434\u0435\u043b\u0438 \u0438 \u043f\u0440\u043e\u0432\u043e\u0434\u0438\u043c \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0443 \u043d\u0430 \u0442\u0435\u0441\u0442\u043e\u0432\u043e\u043c \u043d\u0430\u0431\u043e\u0440\u0435.<\/p>\n<p>\u041c\u044b \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c \u043c\u043e\u0434\u0435\u043b\u044c \u0441 \u043b\u0443\u0447\u0448\u0438\u043c\u0438 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0430\u043c\u0438, \u043d\u043e \u043f\u0440\u0438 \u044d\u0442\u043e\u043c \u043f\u043e\u0447\u0442\u0438 \u0432\u0434\u0432\u043e\u0435 \u0431\u044b\u0441\u0442\u0440\u0435\u0435!<\/p>\n<pre><code class=\"python\">model.load_state_dict(torch.load('tut4-model.pt'))  test_loss = evaluate(model, test_iterator, criterion)  print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')<\/code><\/pre>\n<h3>\u0412\u044b\u0432\u043e\u0434<\/h3>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043d\u0430\u0448\u0443 \u043e\u0431\u0443\u0447\u0435\u043d\u043d\u0443\u044e \u043c\u043e\u0434\u0435\u043b\u044c \u0434\u043b\u044f \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u043e\u0432.<\/p>\n<p><strong>\u041f\u0440\u0438\u043c\u0435\u0447\u0430\u043d\u0438\u0435:<\/strong> \u044d\u0442\u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0431\u0443\u0434\u0443\u0442 \u0445\u0443\u0436\u0435, \u0447\u0435\u043c \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0435 \u0432 \u0441\u0442\u0430\u0442\u044c\u0435, \u043f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u0430\u0432\u0442\u043e\u0440\u044b \u0432 \u0441\u0442\u0430\u0442\u044c\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043b\u0438 \u0441\u043a\u0440\u044b\u0442\u044b\u0435 \u0440\u0430\u0437\u043c\u0435\u0440\u044b 1000 \u0438 \u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u0430\u043b\u0438 \u043c\u043e\u0434\u0435\u044c \u0432 \u0442\u0435\u0447\u0435\u043d\u0438\u0435 4 \u0434\u043d\u0435\u0439! \u041e\u043d\u0438 \u0431\u044b\u043b\u0438 \u043f\u043e\u0434\u043e\u0431\u0440\u0430\u043d\u044b \u0434\u043b\u044f \u0442\u043e\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u043f\u0440\u043e\u0434\u0435\u043c\u043e\u043d\u0441\u0442\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c, \u043a\u0430\u043a \u0434\u043e\u043b\u0436\u043d\u043e \u0432\u044b\u0433\u043b\u044f\u0434\u0435\u0442\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043d\u0430 \u043c\u043e\u0434\u0435\u043b\u0438 \u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u0431\u043e\u043b\u044c\u0448\u043e\u0433\u043e \u0440\u0430\u0437\u043c\u0435\u0440\u0430.<\/p>\n<p>\u041d\u0430\u0448 <code>translate_sentence<\/code> \u0431\u0443\u0434\u0435\u0442 \u0434\u0435\u043b\u0430\u0442\u044c \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0435:<\/p>\n<ul>\n<li>\n<p>\u0443\u0431\u0435\u0434\u0438\u0442\u0441\u044f, \u0447\u0442\u043e \u043d\u0430\u0448\u0430 \u043c\u043e\u0434\u0435\u043b\u044c \u043d\u0430\u0445\u043e\u0434\u0438\u0442\u0441\u044f \u0432 \u0440\u0435\u0436\u0438\u043c\u0435 \u043e\u0446\u0435\u043d\u043a\u0438, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0432\u0441\u0435\u0433\u0434\u0430 \u0434\u043e\u043b\u0436\u0435\u043d \u0431\u044b\u0442\u044c \u0432\u043a\u043b\u044e\u0447\u0451\u043d \u0434\u043b\u044f \u0432\u044b\u0432\u043e\u0434\u0430<\/p>\n<\/li>\n<li>\n<p>\u0442\u043e\u043a\u0435\u043d\u0438\u0437\u0438\u0440\u0443\u0435\u043c \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u0435\u0441\u043b\u0438 \u043e\u043d\u043e \u043d\u0435 \u0431\u044b\u043b\u043e \u0442\u043e\u043a\u0435\u043d\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043e (\u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0441\u0442\u0440\u043e\u043a\u043e\u0439)<\/p>\n<\/li>\n<li>\n<p>&#171;\u043e\u0446\u0438\u0444\u0440\u0443\u0435\u0442&#187; \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435<\/p>\n<\/li>\n<li>\n<p>\u043f\u0440\u0435\u043e\u0431\u0440\u0430\u0437\u0443\u0435\u0442 \u0435\u0433\u043e \u0432 \u0442\u0435\u043d\u0437\u043e\u0440 \u0438 \u0434\u043e\u0431\u0430\u0432\u0438\u0442\u044c \u0440\u0430\u0437\u043c\u0435\u0440 \u0431\u0430\u0442\u0447\u0430<\/p>\n<\/li>\n<li>\n<p>\u043f\u043e\u043b\u0443\u0447\u0438\u0442 \u0434\u043b\u0438\u043d\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0438 \u043f\u0440\u0435\u043e\u0431\u0440\u0430\u0437\u0443\u0435\u0442 \u0435\u0433\u043e \u0432 \u0442\u0435\u043d\u0437\u043e\u0440<\/p>\n<\/li>\n<li>\n<p>\u043f\u0435\u0440\u0435\u0434\u0430\u0441\u0442 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0432 \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a<\/p>\n<\/li>\n<li>\n<p>\u0441\u043e\u0437\u0434\u0430\u0441\u0442 \u043c\u0430\u0441\u043a\u0443 \u0434\u043b\u044f \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f<\/p>\n<\/li>\n<li>\n<p>\u0441\u043e\u0437\u0434\u0430\u0441\u0442 \u0441\u043f\u0438\u0441\u043e\u043a \u0434\u043b\u044f \u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044f \u0432\u044b\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f, \u0438\u043d\u0438\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u043e\u0433\u043e \u0442\u043e\u043a\u0435\u043d\u043e\u043c <code>&lt;sos&gt;<\/code><\/p>\n<\/li>\n<li>\n<p>\u0441\u043e\u0437\u0434\u0430\u0441\u0442 \u0442\u0435\u043d\u0437\u043e\u0440 \u0434\u043b\u044f \u0443\u0434\u0435\u0440\u0436\u0430\u043d\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0439 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f<\/p>\n<\/li>\n<li>\n<p>\u043f\u043e\u043a\u0430 \u043c\u044b \u043d\u0435 \u0434\u043e\u0441\u0442\u0438\u0433\u043b\u0438 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0439 \u0434\u043b\u0438\u043d\u044b<\/p>\n<ul>\n<li>\n<p>\u043f\u043e\u043b\u0443\u0447\u0438\u0442 \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0442\u0435\u043d\u0437\u043e\u0440, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0434\u043e\u043b\u0436\u0435\u043d \u0431\u044b\u0442\u044c \u043b\u0438\u0431\u043e <code>&lt;sos&gt;<\/code>, \u043b\u0438\u0431\u043e \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0438\u043c \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u043c \u0442\u043e\u043a\u0435\u043d\u043e\u043c<\/p>\n<\/li>\n<li>\n<p>\u043f\u0435\u0440\u0435\u0434\u0430\u0441\u0442 \u0432\u0445\u043e\u0434\u043d\u043e\u0439 \u0442\u0435\u043d\u0437\u043e\u0440, \u0432\u0441\u0435 \u0432\u044b\u0445\u043e\u0434\u044b \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a\u0430, \u0441\u043a\u0440\u044b\u0442\u043e\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0438 \u043c\u0430\u0441\u043a\u0443 \u0432 \u0434\u0435\u043a\u043e\u0434\u0435\u0440<\/p>\n<\/li>\n<li>\n<p>\u0441\u043e\u0445\u0440\u0430\u043d\u0438\u0442 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f<\/p>\n<\/li>\n<li>\n<p>\u043f\u043e\u043b\u0443\u0447\u0438\u0442 \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0439 \u0442\u043e\u043a\u0435\u043d<\/p>\n<\/li>\n<li>\n<p>\u0434\u043e\u0431\u0430\u0432\u0438\u0442 \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u0438\u0435 \u043a \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u0438\u044e \u0442\u0435\u043a\u0443\u0449\u0435\u0433\u043e \u0432\u044b\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f<\/p>\n<\/li>\n<li>\n<p>\u043f\u0440\u0435\u0440\u0432\u0451\u0442 \u0432\u044b\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435, \u0435\u0441\u043b\u0438 \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u0438\u0435 \u0431\u044b\u043b\u043e \u0442\u043e\u043a\u0435\u043d\u043e\u043c <code>&lt;eos&gt;<\/code><\/p>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u043f\u0440\u0435\u043e\u0431\u0440\u0430\u0437\u0443\u0435\u0442 \u0432\u044b\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0438\u0437 \u0438\u043d\u0434\u0435\u043a\u0441\u043e\u0432 \u0432 \u0442\u043e\u043a\u0435\u043d\u044b<\/p>\n<\/li>\n<li>\n<p>\u0432\u0435\u0440\u043d\u0451\u0442 \u0432\u044b\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 (\u0441 \u0443\u0434\u0430\u043b\u0435\u043d\u043d\u044b\u043c \u0442\u043e\u043a\u0435\u043d\u043e\u043c <code>&lt;sos&gt;<\/code>) \u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u0434\u043b\u044f \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438<\/p>\n<\/li>\n<\/ul>\n<pre><code class=\"python\">def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 50):      model.eval()              if isinstance(sentence, str):         nlp = spacy.load('de')         tokens = [token.text.lower() for token in nlp(sentence)]     else:         tokens = [token.lower() for token in sentence]      tokens = [src_field.init_token] + tokens + [src_field.eos_token]              src_indexes = [src_field.vocab.stoi[token] for token in tokens]          src_tensor = torch.LongTensor(src_indexes).unsqueeze(1).to(device)      src_len = torch.LongTensor([len(src_indexes)])          with torch.no_grad():         encoder_outputs, hidden = model.encoder(src_tensor, src_len)      mask = model.create_mask(src_tensor)              trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]      attentions = torch.zeros(max_len, 1, len(src_indexes)).to(device)          for i in range(max_len):          trg_tensor = torch.LongTensor([trg_indexes[-1]]).to(device)                          with torch.no_grad():             output, hidden, attention = model.decoder(trg_tensor, hidden, encoder_outputs, mask)          attentions[i] = attention                      pred_token = output.argmax(1).item()                  trg_indexes.append(pred_token)          if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:             break          trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]          return trg_tokens[1:], attentions[:len(trg_tokens)-1]<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0441\u043e\u0437\u0434\u0430\u0434\u0438\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u044e, \u043a\u043e\u0442\u043e\u0440\u0430\u044f \u043e\u0442\u043e\u0431\u0440\u0430\u0436\u0430\u0435\u0442 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438 \u043a \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c\u0443 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044e \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u0433\u0435\u043d\u0435\u0440\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u043e\u0433\u043e \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0442\u043e\u043a\u0435\u043d\u0430.<\/p>\n<pre><code class=\"python\">def display_attention(sentence, translation, attention):          fig = plt.figure(figsize=(10,10))     ax = fig.add_subplot(111)          attention = attention.squeeze(1).cpu().detach().numpy()          cax = ax.matshow(attention, cmap='bone')         ax.tick_params(labelsize=15)          x_ticks = [''] + ['&lt;sos&gt;'] + [t.lower() for t in sentence] + ['&lt;eos&gt;']     y_ticks = [''] + translation           ax.set_xticklabels(x_ticks, rotation=45)     ax.set_yticklabels(y_ticks)      ax.xaxis.set_major_locator(ticker.MultipleLocator(1))     ax.yaxis.set_major_locator(ticker.MultipleLocator(1))      plt.show()     plt.close()<\/code><\/pre>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c \u043c\u044b \u0432\u043e\u0437\u044c\u043c\u0435\u043c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u043e\u0432 \u0438\u0437 \u043d\u0430\u0448\u0435\u0433\u043e \u043d\u0430\u0431\u043e\u0440\u0430 \u0434\u0430\u043d\u043d\u044b\u0445 \u0438 \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0438\u043c, \u043d\u0430\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0445\u043e\u0440\u043e\u0448\u043e \u0440\u0430\u0431\u043e\u0442\u0430\u0435\u0442 \u043d\u0430\u0448\u0430 \u043c\u043e\u0434\u0435\u043b\u044c. \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435: \u0437\u0434\u0435\u0441\u044c \u043c\u044b \u0441\u043e\u0431\u0438\u0440\u0430\u0435\u043c\u0441\u044f \u0432\u044b\u0431\u0440\u0430\u0442\u044c \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0440\u0430\u0444\u0438\u043d\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u043f\u0440\u0438\u043c\u0435\u0440\u044b, \u0447\u0442\u043e\u0431\u044b \u0431\u044b\u043b\u043e \u0447\u0442\u043e-\u0442\u043e \u0438\u043d\u0442\u0435\u0440\u0435\u0441\u043d\u043e\u0435, \u043d\u043e \u043d\u0435 \u0441\u0442\u0435\u0441\u043d\u044f\u0439\u0442\u0435\u0441\u044c \u0438\u0437\u043c\u0435\u043d\u044f\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 <code>example_idx<\/code>, \u0447\u0442\u043e\u0431\u044b \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0435\u0442\u044c \u043d\u0430 \u0434\u0440\u0443\u0433\u0438\u0435 \u043f\u0440\u0438\u043c\u0435\u0440\u044b.<\/p>\n<p>\u0421\u043d\u0430\u0447\u0430\u043b\u0430 \u043c\u044b \u043f\u043e\u043b\u0443\u0447\u0438\u043c \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u0438 \u0446\u0435\u043b\u0435\u0432\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0438\u0437 \u043d\u0430\u0448\u0435\u0433\u043e \u043d\u0430\u0431\u043e\u0440\u0430 \u0434\u0430\u043d\u043d\u044b\u0445.<\/p>\n<pre><code class=\"python\">example_idx = 12  src = vars(train_data.examples[example_idx])['src'] trg = vars(train_data.examples[example_idx])['trg']  print(f'src = {src}') print(f'trg = {trg}')<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0432\u043e\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u0441\u044f \u0444\u0443\u043d\u043a\u0446\u0438\u0435\u0439 <code>translate_sentence<\/code>, \u0447\u0442\u043e\u0431\u044b \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0439 \u043f\u0435\u0440\u0435\u0432\u043e\u0434 \u0438 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435. \u041c\u044b \u043f\u043e\u043a\u0430\u0437\u044b\u0432\u0430\u0435\u043c \u044d\u0442\u043e \u0433\u0440\u0430\u0444\u0438\u0447\u0435\u0441\u043a\u0438, \u0440\u0430\u0441\u043f\u043e\u043b\u0430\u0433\u0430\u044f \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u043d\u0430 \u043e\u0441\u0438 x \u0438 \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u0438\u0440\u0443\u0435\u043c\u044b\u0439 \u043f\u0435\u0440\u0435\u0432\u043e\u0434 \u043d\u0430 \u043e\u0441\u0438 y. \u0427\u0435\u043c \u0441\u0432\u0435\u0442\u043b\u0435\u0435 \u043a\u0432\u0430\u0434\u0440\u0430\u0442 \u043d\u0430 \u043f\u0435\u0440\u0435\u0441\u0435\u0447\u0435\u043d\u0438\u0438 \u0434\u0432\u0443\u0445 \u0441\u043b\u043e\u0432, \u0442\u0435\u043c \u0431\u043e\u043b\u044c\u0448\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u043c\u043e\u0434\u0435\u043b\u044c \u0443\u0434\u0435\u043b\u044f\u043b\u0430 \u044d\u0442\u043e\u043c\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c\u0443 \u0441\u043b\u043e\u0432\u0443 \u043f\u0440\u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0435 \u044d\u0442\u043e\u0433\u043e \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0441\u043b\u043e\u0432\u0430.<\/p>\n<p>\u041d\u0438\u0436\u0435 \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u043d \u043f\u0440\u0438\u043c\u0435\u0440, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u043c\u043e\u0434\u0435\u043b\u044c \u043f\u044b\u0442\u0430\u043b\u0430\u0441\u044c \u043f\u0435\u0440\u0435\u0432\u0435\u0441\u0442\u0438, \u043e\u043d\u0430 \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u0442 \u043f\u0440\u0430\u0432\u0438\u043b\u044c\u043d\u044b\u0439 \u043f\u0435\u0440\u0435\u0432\u043e\u0434, \u0437\u0430 \u0438\u0441\u043a\u043b\u044e\u0447\u0435\u043d\u0438\u0435\u043c \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u0441\u043b\u043e\u0432.<\/p>\n<pre><code class=\"python\">translation, attention = translate_sentence(src, SRC, TRG, model, device)  print(f'predicted trg = {translation}')  display_attention(src, translation, attention)<\/code><\/pre>\n<p>\u041f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0438\u0437 \u043e\u0431\u0443\u0447\u0430\u044e\u0449\u0435\u0439 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u043c\u043e\u0434\u0435\u043b\u044c \u043c\u043e\u0433\u043b\u0430 \u043f\u0440\u043e\u0441\u0442\u043e \u0437\u0430\u043f\u043e\u043c\u043d\u0438\u0442\u044c. \u0422\u0430\u043a \u0447\u0442\u043e \u0441\u043f\u0440\u0430\u0432\u0435\u0434\u043b\u0438\u0432\u043e, \u0447\u0442\u043e \u043c\u044b \u0442\u0430\u043a\u0436\u0435 \u0441\u043c\u043e\u0442\u0440\u0438\u043c \u043d\u0430 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0438\u0437 \u043d\u0430\u0431\u043e\u0440\u0430 \u0434\u043b\u044f \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438 \u0438 \u0442\u0435\u0441\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">example_idx = 14  src = vars(valid_data.examples[example_idx])['src'] trg = vars(valid_data.examples[example_idx])['trg']  print(f'src = {src}') print(f'trg = {trg}')<\/code><\/pre>\n<h3>BLEU<\/h3>\n<p>\u0420\u0430\u043d\u044c\u0448\u0435 \u043c\u044b \u0437\u0430\u0431\u043e\u0442\u0438\u043b\u0438\u0441\u044c \u0442\u043e\u043b\u044c\u043a\u043e \u043e \u043f\u043e\u0442\u0435\u0440\u044f\u0445\/\u0434\u043e\u0441\u0442\u043e\u0432\u0435\u0440\u043d\u043e\u0441\u0442\u0438 \u043c\u043e\u0434\u0435\u043b\u0438. \u041e\u0434\u043d\u0430\u043a\u043e \u0435\u0441\u0442\u044c \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u0438, \u0441\u043f\u0435\u0446\u0438\u0430\u043b\u044c\u043d\u043e \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0430\u043d\u043d\u044b\u0435 \u0434\u043b\u044f \u0438\u0437\u043c\u0435\u0440\u0435\u043d\u0438\u044f \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0430 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0430. \u041d\u0430\u0438\u0431\u043e\u043b\u0435\u0435 \u043f\u043e\u043f\u0443\u043b\u044f\u0440\u043d\u044b\u043c \u0438\u0437 \u043d\u0438\u0445 \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f <em>BLEU<\/em>. \u041d\u0435 \u0432\u0434\u0430\u0432\u0430\u044f\u0441\u044c \u0432 \u043f\u043e\u0434\u0440\u043e\u0431\u043d\u043e\u0441\u0442\u0438, BLEU \u0440\u0430\u0441\u0441\u043c\u0430\u0442\u0440\u0438\u0432\u0430\u0435\u0442 \u0441\u043e\u0432\u043f\u0430\u0434\u0435\u043d\u0438\u0435 \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0445 \u0438 \u0444\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 \u0446\u0435\u043b\u0435\u0432\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439 \u0441 \u0442\u043e\u0447\u043a\u0438 \u0437\u0440\u0435\u043d\u0438\u044f \u0438\u0445 n-\u0433\u0440\u0430\u043c\u043c. \u041e\u043d \u0434\u0430\u0441\u0442 \u043d\u0430\u043c \u0447\u0438\u0441\u043b\u043e \u043e\u0442 0 \u0434\u043e 1 \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u0433\u0434\u0435 1 \u043e\u0437\u043d\u0430\u0447\u0430\u0435\u0442 \u043f\u043e\u043b\u043d\u043e\u0435 \u0441\u043e\u0432\u043f\u0430\u0434\u0435\u043d\u0438\u0435, \u0442\u043e \u0435\u0441\u0442\u044c \u0438\u0434\u0435\u0430\u043b\u044c\u043d\u044b\u0439 \u043f\u0435\u0440\u0435\u0432\u043e\u0434 (\u0445\u043e\u0442\u044f \u043e\u0431\u044b\u0447\u043d\u043e \u043e\u0442\u043e\u0431\u0440\u0430\u0436\u0430\u044e\u0442 \u0432 \u043f\u0440\u043e\u0446\u0435\u043d\u0442\u0430\u0445 \u043e\u0442 0 \u0434\u043e 100). BLEU \u0431\u044b\u043b \u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0430\u043d \u0434\u043b\u044f \u0440\u0430\u0431\u043e\u0442\u044b \u0441 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u0438\u043c\u0438 \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u0430\u043c\u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u043e\u0432 \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0439 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u043e\u0434\u043d\u0430\u043a\u043e \u0432 \u043d\u0430\u0448\u0435\u043c \u043d\u0430\u0431\u043e\u0440\u0435 \u0434\u0430\u043d\u043d\u044b\u0445 \u0443 \u0435\u0441\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e \u043e\u0434\u0438\u043d \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442.<\/p>\n<p>\u041c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u044e <code>calculate_bleu<\/code>, \u043a\u043e\u0442\u043e\u0440\u0430\u044f \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u0442 \u043e\u0446\u0435\u043d\u043a\u0443 BLEU \u043f\u043e \u043f\u0440\u0435\u0434\u043e\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u043d\u043e\u043c\u0443 \u043d\u0430\u0431\u043e\u0440\u0443 \u0434\u0430\u043d\u043d\u044b\u0445 TorchText. \u042d\u0442\u0430 \u0444\u0443\u043d\u043a\u0446\u0438\u044f \u0441\u043e\u0437\u0434\u0430\u0435\u0442 \u043a\u043e\u0440\u043f\u0443\u0441 \u0444\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0438 \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u0438\u0440\u0443\u0435\u043c\u043e\u0433\u043e \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0430 \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f, \u0430 \u0437\u0430\u0442\u0435\u043c \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u0442 \u043e\u0446\u0435\u043d\u043a\u0443 BLEU.<\/p>\n<pre><code class=\"python\">from torchtext.data.metrics import bleu_score  def calculate_bleu(data, src_field, trg_field, model, device, max_len = 50):          trgs = []     pred_trgs = []          for datum in data:                  src = vars(datum)['src']         trg = vars(datum)['trg']                  pred_trg, _ = translate_sentence(src, src_field, trg_field, model, device, max_len)                  #cut off &lt;eos&gt; token         pred_trg = pred_trg[:-1]                  pred_trgs.append(pred_trg)         trgs.append([trg])              return bleu_score(pred_trgs, trgs)<\/code><\/pre>\n<p>\u041c\u044b \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c BLEU \u043e\u043a\u043e\u043b\u043e 28. \u0415\u0441\u043b\u0438 \u043c\u044b \u0441\u0440\u0430\u0432\u043d\u0438\u043c \u044d\u0442\u043e\u0442 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u0441\u043e \u0441\u0442\u0430\u0442\u044c\u0451\u0439, \u0438\u0437 \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u043f\u044b\u0442\u0430\u0435\u043c\u0441\u044f \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0441\u0442\u0438 \u043c\u043e\u0434\u0435\u043b\u044c \u0441 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f, \u0442\u043e \u0442\u0430\u043c \u0430\u0432\u0442\u043e\u0440\u044b \u0434\u043e\u0441\u0442\u0438\u0433\u0430\u044e\u0442 26,75 \u0431\u0430\u043b\u043b\u0430 \u043f\u043e \u0448\u043a\u0430\u043b\u0435 BLEU. \u042d\u0442\u043e \u043f\u043e\u0445\u043e\u0436\u0435 \u043d\u0430 \u043d\u0430\u0448\u0443 \u043e\u0446\u0435\u043d\u043a\u0443, \u043e\u0434\u043d\u0430\u043a\u043e \u043e\u043d\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442 \u0441\u043e\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u043e \u0434\u0440\u0443\u0433\u043e\u0439 \u043d\u0430\u0431\u043e\u0440 \u0434\u0430\u043d\u043d\u044b\u0445, \u0430 \u0440\u0430\u0437\u043c\u0435\u0440 \u0438\u0445 \u043c\u043e\u0434\u0435\u043b\u0438 \u043d\u0430\u043c\u043d\u043e\u0433\u043e \u0431\u043e\u043b\u044c\u0448\u0435 &#8212; 1000 \u0441\u043a\u0440\u044b\u0442\u044b\u0445 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0439, \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u0437\u0430\u043d\u0438\u043c\u0430\u0435\u0442 4 \u0434\u043d\u044f! &#8212; \u0442\u0430\u043a \u0447\u0442\u043e \u043c\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u043c \u0441\u0440\u0430\u0432\u043d\u0438\u0432\u0430\u0442\u044c \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u044b \u0441 \u043d\u0438\u043c\u0438.<\/p>\n<p>\u042d\u0442\u043e \u0447\u0438\u0441\u043b\u043e \u043d\u0430 \u0441\u0430\u043c\u043e\u043c \u0434\u0435\u043b\u0435 \u043d\u0435 \u043f\u043e\u0434\u0434\u0430\u0435\u0442\u0441\u044f \u0438\u043d\u0442\u0435\u0440\u043f\u0440\u0435\u0442\u0430\u0446\u0438\u0438, \u043c\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u043c \u0441\u043a\u0430\u0437\u0430\u0442\u044c \u043e \u043d\u0435\u043c \u043c\u043d\u043e\u0433\u043e. \u0421\u0430\u043c\u0430\u044f \u043f\u043e\u043b\u0435\u0437\u043d\u0430\u044f \u0447\u0430\u0441\u0442\u044c \u043e\u0446\u0435\u043d\u043a\u0438 BLEU \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u0435\u0451 \u043c\u043e\u0436\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043b\u044f \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044f \u0440\u0430\u0437\u043d\u044b\u0445 \u043c\u043e\u0434\u0435\u043b\u0435\u0439 \u0432 \u043e\u0434\u043d\u043e\u043c \u043d\u0430\u0431\u043e\u0440\u0435 \u0434\u0430\u043d\u043d\u044b\u0445, \u0433\u0434\u0435 \u043c\u043e\u0434\u0435\u043b\u044c \u0441 \u0431\u043e\u043b\u0435\u0435 \u0432\u044b\u0441\u043e\u043a\u043e\u0439 \u043e\u0446\u0435\u043d\u043a\u043e\u0439 BLEU \u00ab\u043b\u0443\u0447\u0448\u0435\u00bb.<\/p>\n<pre><code class=\"python\">bleu_score = calculate_bleu(test_data, SRC, TRG, model, device)  print(f'BLEU score = {bleu_score*100:.2f}')<\/code><\/pre>\n<h3>\u041e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0441\u0435\u0442\u0438 \u0438\u043d\u0432\u0435\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f<\/h3>\n<p>\u0412 \u043a\u043e\u043d\u0446\u0435 \u043f\u0440\u0438\u0432\u0435\u0434\u0443 \u043e\u0434\u0438\u043d \u0438\u0437 \u043c\u043e\u0438\u0445 \u043b\u044e\u0431\u0438\u043c\u044b\u0445 \u0442\u0435\u0441\u0442\u043e\u0432: \u0442\u0435\u0441\u0442 \u043d\u0430 \u0438\u043d\u0432\u0435\u0440\u0441\u0438\u044e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f. \u041e\u0447\u0435\u043d\u044c \u043f\u0440\u043e\u0441\u0442\u0430\u044f \u0434\u043b\u044f \u0447\u0435\u043b\u043e\u0432\u0435\u043a\u0430 \u0437\u0430\u0434\u0430\u0447\u0430 (\u0443\u0447\u0435\u043d\u0438\u043a\u0438 \u043d\u0430\u0447\u0430\u043b\u044c\u043d\u043e\u0439 \u0448\u043a\u043e\u043b\u044b \u043e\u0431\u0443\u0447\u0430\u044e\u0442\u0441\u044f \u0437\u0430 10-15 \u043f\u0440\u0438\u043c\u0435\u0440\u043e\u0432), \u043d\u043e, \u043f\u043e\u0440\u043e\u0439, \u043d\u0435\u043f\u0440\u0435\u043e\u0434\u043e\u043b\u0438\u043c\u0430 \u0434\u043b\u044f \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0441\u0438\u0441\u0442\u0435\u043c.<\/p>\n<p>\u0414\u043b\u044f Google Colab \u0441\u043a\u0430\u0447\u0430\u0435\u043c \u043e\u0431\u0443\u0447\u0430\u044e\u0449\u0438\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438<\/p>\n<pre><code class=\"python\">!wget https:\/\/raw.githubusercontent.com\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/master\/toy_revert\/train.csv -P toy_revert !wget https:\/\/raw.githubusercontent.com\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/master\/toy_revert\/val.csv -P toy_revert !wget https:\/\/raw.githubusercontent.com\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/master\/toy_revert\/test.csv -P toy_revert<\/code><\/pre>\n<p>\u0412 \u043d\u0430\u0447\u0430\u043b\u0435 \u043e\u0431\u0443\u0447\u0438\u043c \u0441\u0435\u0442\u044c \u0438\u043d\u0432\u0435\u0440\u0441\u0438\u0438 \u0438 \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0438\u043c \u043d\u0430 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442.<\/p>\n<pre><code class=\"python\">SRC = Field(tokenize=\"spacy\",             init_token='&lt;sos&gt;',             eos_token='&lt;eos&gt;',             lower=True,              include_lengths = True)  TRG = Field(tokenize=\"spacy\",             init_token='&lt;sos&gt;',             eos_token='&lt;eos&gt;',             lower=True)  data_fields = [('src', SRC), ('trg', TRG)] # load the dataset in csv format train_data, valid_data, test_data = TabularDataset.splits(     path='toy_revert',     train='train.csv',     validation='val.csv',     test='test.csv',     format='csv',     fields=data_fields,     skip_header=True )  SRC.build_vocab(train_data) TRG.build_vocab(train_data)<\/code><\/pre>\n<pre><code class=\"python\">BATCH_SIZE = 128  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  train_iterator, valid_iterator, test_iterator = BucketIterator.splits(     (train_data, valid_data, test_data),       batch_size = BATCH_SIZE,      sort_within_batch = True,      sort_key = lambda x : len(x.src),      device = device)  ################## create DNN Seq2Seq ############################### INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token] TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]  attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)  model = Seq2Seq(enc, dec, SRC_PAD_IDX, device).to(device)   ####################################################################   ####### initial weights model.apply(init_weights); # print(model)  print(f'The model has {count_parameters(model):,} trainable parameters')  optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)  N_EPOCHS = 10 CLIP = 1  best_valid_loss = float('inf')  for epoch in range(N_EPOCHS):          start_time = time.time()          train_loss = train(model, train_iterator, optimizer, criterion, CLIP)     valid_loss = evaluate(model, valid_iterator, criterion)          end_time = time.time()          epoch_mins, epoch_secs = epoch_time(start_time, end_time)          if valid_loss &lt; best_valid_loss:         best_valid_loss = valid_loss         torch.save(model.state_dict(), 'tut4-model.pt')      print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')     print(f'\\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')     print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')      # writer.add_scalar(\"Train_loss_average_per_epoch\", train_loss, epoch)     # writer.add_scalar(\"Validate_loss_average_per_epoch\", valid_loss, epoch)  model.load_state_dict(torch.load('tut4-model.pt'))  test_loss = evaluate(model, test_iterator, criterion)  print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')<\/code><\/pre>\n<pre><code class=\"python\">from torchtext.data.metrics import bleu_score  def calculate_bleu(data, src_field, trg_field, model, device, max_len = 50):          trgs = []     pred_trgs = []          for datum in data:                  src = vars(datum)['src']         trg = vars(datum)['trg']                  pred_trg, _ = translate_sentence(src, src_field, trg_field, model, device, max_len)                  #cut off &lt;eos&gt; token         pred_trg = pred_trg[:-1]                  pred_trgs.append(pred_trg)         trgs.append([trg])              return bleu_score(pred_trgs, trgs)<\/code><\/pre>\n<pre><code class=\"python\">example_idx = 10  src = vars(test_data.examples[example_idx])['src'] trg = vars(test_data.examples[example_idx])['trg']  print(f'src = {src}') print(f'trg = {trg}')  print(f'source        = {src}') translation, attention =  translate_sentence(src, SRC, TRG, model, device) display_attention(src, translation, attention)  print(f'predicted trg = {translation}')  src = ['a', 'b', 'c', 'a', 'd'] print(f'source        = {src}') translation, attention =  translate_sentence(src, SRC, TRG, model, device) display_attention(src, translation, attention) print(f'predicted trg = {translation}')  src = 'd b c d'.split(' ') print(f'source        = {src}') translation, attention =  translate_sentence(src, SRC, TRG, model, device) display_attention(src, translation, attention) print(f'predicted trg = {translation}')  src = ['a', 'a', 'a', 'a', 'd'] print(f'source        = {src}') translation, attention = translate_sentence(src, SRC, TRG, model, device) print(f'predicted trg = {translation}')  src = ['d', 'b', 'c', 'a'] print(f'source        = {src}') translation, attention = translate_sentence(src, SRC, TRG, model, device) print(f'predicted trg = {translation}')  src = ['d', 'd', 'd', 'd', 'd', 'd', 'd', 'd'] print(f'source        = {src}') translation, attention = translate_sentence(src, SRC, TRG, model, device) print(f'predicted trg = {translation}')   bleu_score = calculate_bleu(test_data, SRC, TRG, model, device) print(f'BLEU score = {bleu_score * 100:.2f}')<\/code><\/pre>\n<p>\u0412 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0445 \u0440\u0430\u0437\u0434\u0435\u043b\u0430\u0445 \u043c\u044b \u043e\u0442\u043e\u0439\u0434\u0435\u043c \u043e\u0442 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0440\u0435\u043a\u0443\u0440\u0440\u0435\u043d\u0442\u043d\u044b\u0445 \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439 \u0438 \u043d\u0430\u0447\u043d\u0435\u043c \u0438\u0441\u043a\u0430\u0442\u044c \u0434\u0440\u0443\u0433\u0438\u0435 \u0441\u043f\u043e\u0441\u043e\u0431\u044b \u043f\u043e\u0441\u0442\u0440\u043e\u0435\u043d\u0438\u044f \u043c\u043e\u0434\u0435\u043b\u0435\u0439 sequence-to-sequence. \u0412 \u0447\u0430\u0441\u0442\u043d\u043e\u0441\u0442\u0438, \u0432 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0439 \u0447\u0430\u0441\u0442\u0438 \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0441\u0432\u0451\u0440\u0442\u043e\u0447\u043d\u044b\u0435 \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0435 \u0441\u0435\u0442\u0438.<\/p>\n<\/div>\n<p> \u0441\u0441\u044b\u043b\u043a\u0430 \u043d\u0430 \u043e\u0440\u0438\u0433\u0438\u043d\u0430\u043b \u0441\u0442\u0430\u0442\u044c\u0438 <a href=\"https:\/\/habr.com\/ru\/post\/567998\/\"> https:\/\/habr.com\/ru\/post\/567998\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"\n<div class=\"post__text post__text_v2\" id=\"post-content-body\">\n<h2>4 &#8212; \u0423\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430, \u0432\u044b\u0432\u043e\u0434 \u0438 \u043c\u0435\u0442\u043e\u0434 \u043e\u0446\u0435\u043d\u043a\u0438 BLEU<\/h2>\n<p>\u0412 \u044d\u0442\u043e\u0439 \u0447\u0430\u0441\u0442\u0438 \u043c\u044b \u0434\u043e\u0431\u0430\u0432\u0438\u043c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439 \u2014 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0438 \u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430 \u2014 \u043a \u043c\u043e\u0434\u0435\u043b\u0438 \u0438\u0437 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e \u0440\u0430\u0437\u0434\u0435\u043b\u0430. \u0423\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0435 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442\u0441\u044f, \u0447\u0442\u043e\u0431\u044b \u0441\u043e\u043e\u0431\u0449\u0438\u0442\u044c \u043d\u0430\u0448\u0435\u0439 RNN, \u0447\u0442\u043e \u043d\u0443\u0436\u043d\u043e \u043f\u0440\u043e\u043f\u0443\u0441\u043a\u0430\u0442\u044c \u043c\u0430\u0440\u043a\u0435\u0440\u044b \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u043d\u0430\u0448\u0435\u043c \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a\u0435. \u041c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0430 \u044f\u0432\u043d\u043e \u0437\u0430\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u043c\u043e\u0434\u0435\u043b\u044c \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f, \u0442\u0430\u043a\u0438\u0435 \u043a\u0430\u043a \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043a \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430\u043c \u0441 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435\u043c. \u041e\u0431\u0430 \u044d\u0442\u0438 \u043c\u0435\u0442\u043e\u0434\u0430 \u043e\u0431\u044b\u0447\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442\u0441\u044f \u0432 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0435 \u0435\u0441\u0442\u0435\u0441\u0442\u0432\u0435\u043d\u043d\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u0430 (NLP).<\/p>\n<p>\u041a\u0440\u043e\u043c\u0435 \u0442\u043e\u0433\u043e, \u043c\u044b \u0440\u0430\u0441\u0441\u043c\u043e\u0442\u0440\u0438\u043c \u043a\u0430\u043a \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043d\u0430\u0448\u0443 \u043c\u043e\u0434\u0435\u043b\u044c \u0434\u043b\u044f \u0432\u044b\u0432\u043e\u0434\u0430 \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f, \u0434\u0430\u0432\u0430\u044f \u0435\u0439 \u0432\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u0432\u0438\u0434\u044f \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u0435\u0451 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0430, \u0438 \u0432\u044b\u044f\u0441\u043d\u044f\u044f, \u043d\u0430 \u0447\u0442\u043e \u0438\u043c\u0435\u043d\u043d\u043e \u043e\u043d\u0430 \u043e\u0431\u0440\u0430\u0449\u0430\u0435\u0442 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043f\u0440\u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0435 \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u043b\u043e\u0432\u0430.<\/p>\n<p>\u041d\u0430\u043a\u043e\u043d\u0435\u0446, \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043c\u0435\u0442\u0440\u0438\u043a\u0443 BLEU \u0434\u043b\u044f \u0438\u0437\u043c\u0435\u0440\u0435\u043d\u0438\u044f \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0430 \u043d\u0430\u0448\u0438\u0445 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u043e\u0432.<\/p>\n<p>\u041a\u0430\u043a \u0438 \u0440\u0430\u043d\u0435\u0435, \u0435\u0441\u043b\u0438 \u0432\u0438\u0437\u0443\u0430\u043b\u044c\u043d\u044b\u0439 \u0444\u043e\u0440\u043c\u0430\u0442 \u043f\u043e\u0441\u0442\u0430 \u0432\u0430\u0441 \u043d\u0435 \u0443\u0434\u043e\u0432\u043b\u0435\u0442\u0432\u043e\u0440\u044f\u0435\u0442, \u0442\u043e \u043d\u0438\u0436\u0435 \u0441\u0441\u044b\u043b\u043a\u0438 \u043d\u0430 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u0443\u044e \u0438 \u0440\u0443\u0441\u0441\u043a\u0443\u044e \u0432\u0435\u0440\u0441\u0438\u044e jupyter notebook:<\/p>\n<p><a href=\"https:\/\/github.com\/bentrevett\/pytorch-seq2seq\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">\u0418\u0441\u0445\u043e\u0434\u043d\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f<\/a> <a href=\"https:\/\/colab.research.google.com\/github\/bentrevett\/pytorch-seq2seq\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">Open jupyter notebook In Colab<\/a><\/p>\n<p><a href=\"https:\/\/github.com\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">\u0420\u0443\u0441\u0441\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f<\/a> <a href=\"https:\/\/colab.research.google.com\/github\/vasiliyeskin\/bentrevett-pytorch-seq2seq_ru\/blob\/master\/4%20-%20Packed%20Padded%20Sequences%2C%20Masking%2C%20Inference%20and%20BLEU.ipynb\" rel=\"noopener noreferrer nofollow\">Open jupyter notebook In Colab<\/a><\/p>\n<p><strong>\u0417\u0430\u043c\u0435\u0447\u0430\u043d\u0438\u0435<\/strong>: \u0440\u0443\u0441\u0441\u043a\u0430\u044f \u0432\u0435\u0440\u0441\u0438\u044f jupyter notebook \u043e\u0442\u043b\u0438\u0447\u0430\u0435\u0442\u0441\u044f \u043e\u0442 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0439 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u043d\u044b\u043c \u0432 \u043a\u043e\u043d\u0446\u0435 \u0442\u0435\u0441\u0442\u043e\u043c \u043d\u0430 \u0438\u043d\u0432\u0435\u0440\u0441\u0438\u044e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f. \u041e\u0447\u0435\u043d\u044c \u043f\u0440\u043e\u0441\u0442\u0430\u044f \u0434\u043b\u044f \u0447\u0435\u043b\u043e\u0432\u0435\u043a\u0430 \u0437\u0430\u0434\u0430\u0447\u0430, \u043d\u043e, \u043f\u043e\u0440\u043e\u0439, \u043d\u0435\u043f\u0440\u0435\u043e\u0434\u043e\u043b\u0438\u043c\u0430 \u0434\u043b\u044f \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0441\u0438\u0441\u0442\u0435\u043c.<\/p>\n<h3>\u041f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043a\u0430 \u0434\u0430\u043d\u043d\u044b\u0445<\/h3>\n<p>\u0421\u043d\u0430\u0447\u0430\u043b\u0430 \u043c\u044b \u0438\u043c\u043f\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u0432\u0441\u0435 \u043c\u043e\u0434\u0443\u043b\u0438, \u043a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u0441 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u043c \u043c\u043e\u0434\u0443\u043b\u0435\u0439 <code>matplotlib<\/code>, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u0445 \u0434\u043b\u044f \u043f\u0440\u043e\u0441\u043c\u043e\u0442\u0440\u0430 \u0432\u0435\u043a\u0442\u043e\u0440\u0430 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F  from torchtext.legacy.datasets import Multi30k from torchtext.legacy.data import Field, BucketIterator, TabularDataset  import matplotlib.pyplot as plt import matplotlib.ticker as ticker  import spacy import numpy as np  import random import math import time<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0443\u0441\u0442\u0430\u043d\u043e\u0432\u0438\u043c \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0435 \u043d\u0430\u0447\u0430\u043b\u044c\u043d\u043e\u0435 \u0447\u0438\u0441\u043b\u043e \u0434\u043b\u044f \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u043c\u043e\u0441\u0442\u0438.<\/p>\n<pre><code>SEED = 1234  random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True<\/code><\/pre>\n<p>\u0417\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u0435 \u043d\u0435\u043c\u0435\u0446\u043a\u0443\u044e \u0438 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u0443\u044e \u043c\u043e\u0434\u0435\u043b\u0438 spaCy.<\/p>\n<pre><code class=\"python\">python -m spacy download en_core_web_sm python -m spacy download de_core_news_sm<\/code><\/pre>\n<p>\u0414\u043b\u044f \u0437\u0430\u0433\u0440\u0443\u0437\u043a\u0438 \u0432 Google Colab \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0435 \u043a\u043e\u043c\u0430\u043d\u0434\u044b (\u041f\u043e\u0441\u043b\u0435 \u0437\u0430\u0433\u0440\u0443\u0437\u043a\u0438 \u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u043f\u0435\u0440\u0435\u0437\u0430\u043f\u0443\u0441\u0442\u0438\u0442\u0435 colab runtime! \u041d\u0430\u0438\u0431\u044b\u0441\u0442\u0440\u0435\u0439\u0448\u0438\u0439 \u0441\u043f\u043e\u0441\u043e\u0431 \u0447\u0435\u0440\u0435\u0437 \u043a\u043e\u0440\u043e\u0442\u043a\u0443\u044e \u043a\u043e\u043c\u0430\u0434\u0443\uff1a <strong>Ctrl + M + .<\/strong>):<\/p>\n<pre><code class=\"python\">!pip install -U spacy==3.0 !python -m spacy download en_core_web_sm !python -m spacy download de_core_news_sm<\/code><\/pre>\n<p>\u041a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u043c\u044b \u0438\u043c\u043f\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c spaCy \u0438 \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0438\u043c \u0442\u043e\u043a\u0435\u043d\u0438\u0437\u0430\u0442\u043e\u0440\u044b \u0434\u043b\u044f \u043d\u0435\u043c\u0435\u0446\u043a\u043e\u0433\u043e \u0438 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u043e\u0432.<\/p>\n<pre><code class=\"python\">spacy_de = spacy.load('de_core_news_sm') spacy_en = spacy.load('en_core_web_sm')<\/code><\/pre>\n<pre><code class=\"python\">def tokenize_de(text):     \"\"\"     Tokenizes German text from a string into a list of strings     \"\"\"     return [tok.text for tok in spacy_de.tokenizer(text)]  def tokenize_en(text):     \"\"\"     Tokenizes English text from a string into a list of strings     \"\"\"     return [tok.text for tok in spacy_en.tokenizer(text)]<\/code><\/pre>\n<p>\u041f\u0440\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0438 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439 \u043d\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u0441\u043e\u043e\u0431\u0449\u0438\u0442\u044c PyTorch, \u043a\u0430\u043a\u043e\u0432\u0430 \u0434\u043b\u0438\u043d\u0430 \u0444\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 (\u043d\u0435 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445) \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439. \u041a \u0441\u0447\u0430\u0441\u0442\u044c\u044e \u0434\u043b\u044f \u043d\u0430\u0441, \u043e\u0431\u044a\u0435\u043a\u0442\u044b TorchText <code>Field<\/code> \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u044e\u0442 \u043d\u0430\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442<code> include_lengths<\/code>, \u044d\u0442\u043e \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u0442 \u043a \u0442\u043e\u043c\u0443, \u0447\u0442\u043e \u043d\u0430\u0448 <code>batch.src<\/code> \u0431\u0443\u0434\u0435\u0442 \u043a\u043e\u0440\u0442\u0435\u0436\u0435\u043c. \u041f\u0435\u0440\u0432\u044b\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u043a\u043e\u0440\u0442\u0435\u0436\u0430 \u0442\u0430\u043a\u043e\u0439 \u0436\u0435, \u043a\u0430\u043a \u0438 \u0440\u0430\u043d\u044c\u0448\u0435, \u043f\u0430\u043a\u0435\u0442 \u0447\u0438\u0441\u043b\u043e\u0432\u044b\u0445 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0442\u0435\u043d\u0437\u043e\u0440\u0430, \u0430 \u0432\u0442\u043e\u0440\u043e\u0439 \u044d\u043b\u0435\u043c\u0435\u043d\u0442 \u2014 \u044d\u0442\u043e \u0434\u043b\u0438\u043d\u044b \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439 \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0432 \u043f\u0430\u043a\u0435\u0442\u0435.<\/p>\n<pre><code class=\"python\">SRC = Field(tokenize = tokenize_de,              init_token = '&lt;sos&gt;',              eos_token = '&lt;eos&gt;',              lower = True,              include_lengths = True)  TRG = Field(tokenize = tokenize_en,              init_token = '&lt;sos&gt;',              eos_token = '&lt;eos&gt;',              lower = True)<\/code><\/pre>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0437\u0430\u0433\u0440\u0443\u0436\u0430\u0435\u043c \u0434\u0430\u043d\u043d\u044b\u0435.<\/p>\n<pre><code class=\"python\">train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'),                                                      fields = (SRC, TRG))<\/code><\/pre>\n<p>\u0418 \u0437\u0430\u043f\u043e\u043b\u043d\u044f\u0435\u043c \u0441\u043b\u043e\u0432\u0430\u0440\u044c.<\/p>\n<pre><code class=\"python\">SRC.build_vocab(train_data, min_freq = 2) TRG.build_vocab(train_data, min_freq = 2)<\/code><\/pre>\n<p>\u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u043f\u043e\u0434\u0433\u043e\u0442\u0430\u0432\u043b\u0438\u0432\u0430\u0435\u043c \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u044b.<\/p>\n<p>\u041e\u0434\u043d\u0430 \u0438\u0437 \u043e\u0441\u043e\u0431\u0435\u043d\u043d\u043e\u0441\u0442\u0435\u0439 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439 \u0441 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f\u043c\u0438 \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u0432\u0441\u0435 \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u044b (\u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f) \u0432 \u043f\u0430\u043a\u0435\u0442\u0435 \u0434\u043e\u043b\u0436\u043d\u044b \u0431\u044b\u0442\u044c \u043e\u0442\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u044b \u043f\u043e \u0438\u0445 \u0434\u043b\u0438\u043d\u0435 \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439 \u0432 \u043f\u043e\u0440\u044f\u0434\u043a\u0435 \u0443\u0431\u044b\u0432\u0430\u043d\u0438\u044f, \u0442.\u0435. \u043f\u0435\u0440\u0432\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0432 \u043f\u0430\u043a\u0435\u0442\u0435 \u0434\u043e\u043b\u0436\u043d\u043e \u0431\u044b\u0442\u044c \u0441\u0430\u043c\u044b\u043c \u0434\u043b\u0438\u043d\u043d\u044b\u043c. \u041c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0434\u0432\u0430 \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u0430 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0430 \u0434\u043b\u044f \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u044d\u0442\u043e\u0433\u043e: <code>sort_within_batch<\/code>, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0441\u043e\u043e\u0431\u0449\u0430\u0435\u0442 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0443, \u0447\u0442\u043e \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u043c\u043e\u0435 \u043f\u0430\u043a\u0435\u0442\u0430 \u043d\u0435\u043e\u0431\u0445\u043e\u0434\u0438\u043c\u043e \u043e\u0442\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c, \u0438 <code>sort_key<\/code> &#8212; \u0444\u0443\u043d\u043a\u0446\u0438\u044e, \u043a\u043e\u0442\u043e\u0440\u0430\u044f \u0441\u043e\u043e\u0431\u0449\u0430\u0435\u0442 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u0443, \u043a\u0430\u043a \u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u044b \u0432 \u043f\u0430\u043a\u0435\u0442\u0435. \u0417\u0434\u0435\u0441\u044c \u043c\u044b \u0441\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u043f\u043e \u0434\u043b\u0438\u043d\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f <code>src<\/code>.<\/p>\n<pre><code class=\"python\">BATCH_SIZE = 128  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  train_iterator, valid_iterator, test_iterator = BucketIterator.splits(     (train_data, valid_data, test_data),       batch_size = BATCH_SIZE,      sort_within_batch = True,      sort_key = lambda x : len(x.src),      device = device)<\/code><\/pre>\n<h3>\u041f\u043e\u0441\u0442\u0440\u043e\u0435\u043d\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438<\/h3>\n<h4>\u041a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a<\/h4>\n<p>\u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u043c \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0449\u0438\u043a.<\/p>\n<p>\u0412\u0441\u0435 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u0437\u0434\u0435\u0441\u044c \u043f\u0440\u043e\u0438\u0441\u0445\u043e\u0434\u044f\u0442 \u0432 \u043c\u0435\u0442\u043e\u0434\u0435 <code>forward<\/code>. \u0422\u0435\u043f\u0435\u0440\u044c \u043e\u043d \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0435\u0442 \u0434\u043b\u0438\u043d\u0443 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439 \u0438 \u0441\u0430\u043c\u0438 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f.<\/p>\n<p>\u041f\u043e\u0441\u043b\u0435 \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 (\u0430\u0432\u0442\u043e\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u043e\u0435 \u0438\u0442\u0435\u0440\u0430\u0442\u043e\u0440\u043e\u043c) \u0431\u044b\u043b\u043e \u043f\u0440\u043e\u043f\u0443\u0449\u0435\u043d\u043e \u0447\u0435\u0440\u0435\u0437 \u044d\u043c\u0431\u0435\u0434\u0434\u0438\u043d\u0433, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043b\u044f \u043d\u0435\u0433\u043e <code>pack_padded_sequence<\/code> \u0441 \u0434\u043b\u0438\u043d\u0430\u043c\u0438 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0439. \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435, \u0447\u0442\u043e \u0442\u0435\u043d\u0437\u043e\u0440, \u0441\u043e\u0434\u0435\u0440\u0436\u0430\u0449\u0438\u0439 \u0434\u043b\u0438\u043d\u044b \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439, \u0434\u043e\u043b\u0436\u0435\u043d \u0431\u044b\u0442\u044c \u0442\u0435\u043d\u0437\u043e\u0440\u043e\u043c CPU \u0432 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0439 \u0432\u0435\u0440\u0441\u0438\u0438 PyTorch, \u0447\u0442\u043e \u043c\u044b \u044f\u0432\u043d\u043e \u0434\u0435\u043b\u0430\u0435\u043c \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e <code>to ('cpu')<\/code>. <code>Packaged_embedded<\/code> \u0431\u0443\u0434\u0435\u0442 \u043d\u0430\u0448\u0435\u0439 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e. \u0417\u0430\u0442\u0435\u043c \u043f\u0435\u0440\u0435\u0434\u0430\u0451\u043c \u0432 \u043d\u0430\u0448\u0443 RNN \u043a\u0430\u043a \u043e\u0431\u044b\u0447\u043d\u043e, \u0438 \u043e\u043d\u0430 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 <code>packed_outputs<\/code>, \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0439 \u0442\u0435\u043d\u0437\u043e\u0440, \u0441\u043e\u0434\u0435\u0440\u0436\u0430\u0449\u0438\u0439 \u0432\u0441\u0435 \u0441\u043a\u0440\u044b\u0442\u044b\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438\u0437 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438, \u0438 <code>hidden<\/code> &#8212; \u044d\u0442\u043e \u043f\u0440\u043e\u0441\u0442\u043e \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0435 \u0441\u043a\u0440\u044b\u0442\u043e\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0438\u0437 \u043d\u0430\u0448\u0435\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438. <code>hidden<\/code> \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u043d\u044b\u0439 \u0442\u0435\u043d\u0437\u043e\u0440 \u0438 \u043d\u0438\u043a\u0430\u043a \u043d\u0435 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u044b\u0439, \u0435\u0434\u0438\u043d\u0441\u0442\u0432\u0435\u043d\u043d\u0430\u044f \u0440\u0430\u0437\u043d\u0438\u0446\u0430 \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 \u0431\u044b\u043b\u0430 \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u0430\u044f \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c, \u044d\u0442\u043e\u0442 \u0442\u0435\u043d\u0437\u043e\u0440 \u0438\u0437 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0433\u043e <strong>\u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430 \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0439<\/strong> \u0432 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438.<\/p>\n<p>\u0417\u0430\u0442\u0435\u043c \u043c\u044b \u0440\u0430\u0441\u043f\u0430\u043a\u043e\u0432\u044b\u0432\u0430\u0435\u043c \u043d\u0430\u0448\u0438 <code>packed_outputs<\/code> \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c <code>pad_packed_sequence<\/code> \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u0435\u0442 <code>outputs<\/code> \u0438 \u0434\u043b\u0438\u043d\u044b \u043a\u0430\u0436\u0434\u043e\u0433\u043e, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u043d\u0430\u043c \u043d\u0435 \u043d\u0443\u0436\u043d\u044b.<\/p>\n<p>\u041f\u0435\u0440\u0432\u043e\u0435 \u0438\u0437\u043c\u0435\u0440\u0435\u043d\u0438\u0435 <code>outputs<\/code> &#8212; \u044d\u0442\u043e \u0434\u043b\u0438\u043d\u044b \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u044b\u0445 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0435\u0439.  \u041e\u0434\u043d\u0430\u043a\u043e \u0438\u0437-\u0437\u0430 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0443\u043f\u0430\u043a\u043e\u0432\u0430\u043d\u043d\u043e\u0439 \u0434\u043e\u043f\u043e\u043b\u043d\u0435\u043d\u043d\u043e\u0439 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0442\u0435\u043d\u0437\u043e\u0440\u043e\u0432 \u0431\u0443\u0434\u0443\u0442 \u043d\u0443\u043b\u0435\u0432\u044b\u043c\u0438, \u0432 \u0441\u043b\u0443\u0447\u0430\u0435 \u043a\u043e\u0433\u0434\u0430 \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 \u0431\u044b\u043b \u043c\u0430\u0440\u043a\u0435\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f.<\/p>\n<pre><code class=\"python\">class Encoder(nn.Module):     def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):         super().__init__()                  self.embedding = nn.Embedding(input_dim, emb_dim)                  self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional = True)                  self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)                  self.dropout = nn.Dropout(dropout)              def forward(self, src, src_len):                  #src = [src len, batch size]         #src_len = [batch size]                  embedded = self.dropout(self.embedding(src))                  #embedded = [src len, batch size, emb dim]                          #need to explicitly put lengths on cpu!         packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, src_len.to('cpu'))                          packed_outputs, hidden = self.rnn(packed_embedded)                                           #packed_outputs is a packed sequence containing all hidden states         #hidden is now from the final non-padded element in the batch                      outputs, _ = nn.utils.rnn.pad_packed_sequence(packed_outputs)                       #outputs is now a non-packed sequence, all hidden states obtained         #  when the input is a pad token are all zeros                      #outputs = [src len, batch size, hid dim * num directions]         #hidden = [n layers * num directions, batch size, hid dim]                  #hidden is stacked [forward_1, backward_1, forward_2, backward_2, ...]         #outputs are always from the last layer                  #hidden [-2, :, : ] is the last of the forwards RNN          #hidden [-1, :, : ] is the last of the backwards RNN                  #initial decoder hidden is final hidden state of the forwards and backwards          #  encoder RNNs fed through a linear layer         hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))                  #outputs = [src len, batch size, enc hid dim * 2]         #hidden = [batch size, dec hid dim]                  return outputs, hidden<\/code><\/pre>\n<h4>\u0412\u043d\u0438\u043c\u0430\u043d\u0438\u0435<\/h4>\n<p>\u041c\u043e\u0434\u0443\u043b\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u2014 \u044d\u0442\u043e \u0442\u0430 \u0447\u0430\u0441\u0442\u044c, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u043c\u044b \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u043f\u043e \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c\u0443 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044e.<\/p>\n<p>\u0420\u0430\u043d\u0435\u0435 \u043c\u044b \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u043b\u0438 \u044d\u0442\u043e\u043c\u0443 \u043c\u043e\u0434\u0443\u043b\u044e \u00ab\u043e\u0431\u0440\u0430\u0449\u0430\u0442\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435\u00bb \u043d\u0430 \u0442\u043e\u043a\u0435\u043d\u044b \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0438. \u041e\u0434\u043d\u0430\u043a\u043e, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f <em>\u043c\u0430\u0441\u043a\u0438\u0440\u043e\u0432\u043a\u0443<\/em>, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0437\u0430\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u043a\u043e\u043d\u0446\u0435\u043d\u0442\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c\u0441\u044f \u0442\u043e\u043b\u044c\u043a\u043e \u043d\u0430 \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430\u0445, \u043d\u0435 \u044f\u0432\u043b\u044f\u044e\u0449\u0438\u0435\u0441\u044f \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435\u043c.<\/p>\n<p>\u041c\u0435\u0442\u043e\u0434 <code>forward<\/code> \u0442\u0435\u043f\u0435\u0440\u044c \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0435\u0442 \u043d\u0430 \u0432\u0445\u043e\u0434\u0435 <code>mask<\/code>. \u042d\u0442\u043e \u0442\u0435\u043d\u0437\u043e\u0440 <strong>[batch size, source sentence length]<\/strong>, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0440\u0430\u0432\u0435\u043d 1, \u0435\u0441\u043b\u0438 \u0442\u043e\u043a\u0435\u043d \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u043d\u0435 \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0442\u043e\u043a\u0435\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f, \u0438 0, \u043a\u043e\u0433\u0434\u0430 \u044d\u0442\u043e \u043c\u0430\u0440\u043a\u0435\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f. \u041d\u0430\u043f\u0440\u0438\u043c\u0435\u0440, \u0435\u0441\u043b\u0438 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 [&#171;hello&#187;, &#171;how&#187;, &#171;are&#187;, &#171;you&#187;, &#171;?&#187;, <code>&lt;pad&gt;<\/code>, <code>&lt;pad&gt;<\/code>], \u0442\u043e \u043c\u0430\u0441\u043a\u0430 \u0431\u0443\u0434\u0435\u0442 [1, 1, 1, 1, 1, 0, 0].<\/p>\n<p>\u041c\u044b \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u0435\u043c \u043c\u0430\u0441\u043a\u0443 \u043f\u043e\u0441\u043b\u0435 \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435 \u0431\u044b\u043b\u043e \u0440\u0430\u0441\u0441\u0447\u0438\u0442\u0430\u043d\u043e, \u043d\u043e \u0434\u043e \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u043e\u043d\u043e \u0431\u0443\u0434\u0435\u0442 \u043d\u043e\u0440\u043c\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043e \u0444\u0443\u043d\u043a\u0446\u0438\u0435\u0439 <code>softmax<\/code>. \u042d\u0442\u043e \u0440\u0435\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043e \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c <code>masked_fill<\/code>. \u0422\u0435\u043d\u0437\u043e\u0440 \u0437\u0430\u043f\u043e\u043b\u043d\u044f\u0435\u0442\u0441\u044f \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u0430, \u0434\u043b\u044f \u043a\u043e\u0442\u043e\u0440\u043e\u0433\u043e \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0435\u0442\u0441\u044f \u0443\u0441\u043b\u043e\u0432\u0438\u0435 \u0434\u043b\u044f \u043f\u0435\u0440\u0432\u043e\u0433\u043e \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u0430 <code>mask == 0<\/code>, \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c, \u0437\u0430\u0434\u0430\u043d\u043d\u044b\u043c \u0432\u0442\u043e\u0440\u044b\u043c \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u043e\u043c <code> -1e10<\/code>. \u0414\u0440\u0443\u0433\u0438\u043c\u0438 \u0441\u043b\u043e\u0432\u0430\u043c\u0438, \u043e\u043d \u043f\u0440\u0438\u043c\u0435\u0442 \u043d\u0435\u043d\u043e\u0440\u043c\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u0438 \u0438\u0437\u043c\u0435\u043d\u0438\u0442 \u0438\u0445 \u0434\u043b\u044f \u044d\u043b\u0435\u043c\u0435\u043d\u0442\u043e\u0432 \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u043d\u0430<code>-1e10<\/code>. \u041f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u044d\u0442\u0438 \u0447\u0438\u0441\u043b\u0430 \u0431\u0443\u0434\u0443\u0442 \u043c\u0438\u0437\u0435\u0440\u043d\u044b\u043c\u0438 \u043f\u043e \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044e \u0441 \u0434\u0440\u0443\u0433\u0438\u043c\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f\u043c\u0438, \u043e\u043d\u0438 \u0441\u0442\u0430\u043d\u0443\u0442 \u0440\u0430\u0432\u043d\u044b\u043c\u0438 \u043d\u0443\u043b\u044e \u043f\u0440\u0438 \u043f\u0440\u043e\u0445\u043e\u0436\u0434\u0435\u043d\u0438\u0438 \u0447\u0435\u0440\u0435\u0437 \u0441\u043b\u043e\u0439 <code>softmax<\/code>, \u0447\u0442\u043e \u0433\u0430\u0440\u0430\u043d\u0442\u0438\u0440\u0443\u0435\u0442 \u043e\u0442\u0441\u0443\u0442\u0441\u0442\u0432\u0438\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u044f \u043a \u043c\u0430\u0440\u043a\u0435\u0440\u0430\u043c \u0437\u0430\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u0432 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u043c \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0438.<\/p>\n<pre><code class=\"python\">class Attention(nn.Module):     def __init__(self, enc_hid_dim, dec_hid_dim):         super().__init__()                  self.attn = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)         self.v = nn.Linear(dec_hid_dim, 1, bias = False)              def forward(self, hidden, encoder_outputs, mask):                  #hidden = [batch size, dec hid dim]<\/code><\/pre>\n<\/div>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"class_list":["post-326527","post","type-post","status-publish","format-standard","hentry"],"_links":{"self":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/326527","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=326527"}],"version-history":[{"count":0,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/326527\/revisions"}],"wp:attachment":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=326527"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=326527"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=326527"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}