У меня есть следующий код в моей облачной функции -
import os
import numpy as np
import requests
import torch
from torch import nn
from torch.nn import functional as F
import math
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import TensorDataset
from transformers import AdamW, XLNetTokenizer, XLNetModel, XLNetLMHeadModel, XLNetConfig
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
def polarization(request):
MODEL_URL = 'https://polarization.s3-us-west-1.amazonaws.com/classifier_state_dict.pt'
print(MODEL_URL)
r = requests.get(MODEL_URL)
print(r)
#Cloud function vm is a read only s/m. The only writable place is the tmp folder
file = open("/tmp/model.pth", "wb")
file.write(r.content)
file.close()
print("Wrote to the tmp file")
# State dict requires model object
model = XLNetForPolarizationClassification(num_labels=1)
model.load_state_dict(torch.load('/tmp/model.pth'))
# Tokenize the embedded article
embeddedArticle = request["embeddedArticle"]
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)
textIds = tokenize_inputs(embeddedArticle, tokenizer, num_embeddings=250)
# Generate the attention masks and padding
masks = create_attn_masks(textIds)
article = pd.DataFrame()
article["features"] = textIds.tolist()
article["masks"] = masks
# Call generate_predictions
pred = generate_predictions(model, article, 1)
return pred
## Extracting parameter and returning prediction
def generate_predictions(model, df, num_labels, device="cpu"):
model.eval()
X = df_subset["features"].values.tolist()
masks = df_subset["masks"].values.tolist()
X = torch.tensor(X)
masks = torch.tensor(masks, dtype=torch.long)
with torch.no_grad():
# Run the model with the input_ids and attention_masks separately
logits = model(input_ids=X, attention_mask=masks)
# Get the logits for each class
logits = logits.sigmoid().detach().cpu().numpy()
return round(logits)
class XLNetForPolarizationClassification(torch.nn.Module):
def __init__(self, num_labels=2):
super(XLNetForPolarizationClassification, self).__init__()
self.num_labels = num_labels
self.xlnet = XLNetModel.from_pretrained('xlnet-base-cased')
self.classifier = torch.nn.Linear(768, 1)
torch.nn.init.xavier_normal_(self.classifier.weight)
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
last_hidden_state = self.xlnet(input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids)
mean_last_hidden_state = self.pool_hidden_state(last_hidden_state)
logits = self.classifier(mean_last_hidden_state)
# If you know the labels, compute the loss otherwise
if labels is not None:
loss_fct = BCEWithLogitsLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels))
return loss
else:
return logits
def pool_hidden_state(self, last_hidden_state):
last_hidden_state = last_hidden_state[0]
mean_last_hidden_state = torch.mean(last_hidden_state, 1)
return mean_last_hidden_state
def create_attn_masks(input_ids):
"""
This will set a 1 or 0 based on if it is a mask or an actual input it for the word
"""
attention_masks = []
for seq in input_ids:
seq_mask = [float(i>0) for i in seq]
attention_masks.append(seq_mask)
return attention_masks
def tokenize_inputs(text, tokenizer, num_embeddings=250):
# tokenize the text, then truncate sequence to the desired length minus 2 for
# the 2 special characters
tokenized_texts = list(map(lambda t: tokenizer.tokenize(t)[:num_embeddings-2], text))
# convert tokenized text into numeric ids for the appropriate LM
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
# append special token "<s>" and </s> to end of sentence
input_ids = [tokenizer.build_inputs_with_special_tokens(x) for x in input_ids]
# pad sequences
input_ids = pad_sequences(input_ids, maxlen=num_embeddings, dtype="long", truncating="post", padding="post")
return input_ids
и requirements.txt
certifi==2020.6.20
chardet==3.0.4
click==7.1.2
cycler==0.10.0
filelock==3.0.12
future==0.18.2
h5py==2.10.0
idna==2.10
joblib==0.16.0
Keras==2.4.3
kiwisolver==1.2.0
matplotlib==3.3.0
numpy==1.19.1
packaging==20.4
Pillow==7.2.0
pyparsing==2.4.7
python-dateutil==2.8.1
PyYAML==5.3.1
regex==2020.7.14
requests==2.24.0
sacremoses==0.0.43
scipy==1.5.2
sentencepiece==0.1.91
six==1.15.0
tokenizers==0.8.1rc1
torch==1.6.0
tqdm==4.48.2
transformers==3.0.2
urllib3==1.25.10
Однако, когда я развертываю, это дает мне время ожидания сборки. Журналы не показывают никаких ошибок и показывают, что каждая из зависимостей в файле requirements.txt построена. Первый оператор печати также не регистрируется. Я не вижу, какая часть модели / требований вызывает проблему с тайм-аутом. Вот скриншот журналов - ошибок нет, все журналы содержат информацию до тех пор, пока не истечет крайний срок Conext. Я не могу поделиться фактическими журналами, не предоставив доступа к функции, которую я считаю. Я установил тайм-аут на 9 минут (540 секунд) 1
def function_name(request)
илиdef function_name(data, context)
, она отсутствует? - person Dustin Ingram   schedule 13.08.2020