What is different between Pytorch and Tensorflow?
Pytorch | Tensorflow(keras) | 생성 / 구현 |
nn.Linear | keras.layers.Dense | Dense 층 |
nn.Module | keras.layers.Layer | 모델구성 기반클래스 |
nn.Dropout | keras.layers.Dropout | Dropout 층 |
nn.LayerNorm | keras.layers.LayerNormalization | Layer Normalization |
nn.Embedding | keras.layers.Embedding | Embedding 층 |
nn.GELU | keras.activations.gelu | GELU 활성화 함수 |
nn.Bmm | tf.matmul | Batch 행렬곱셈 |
model.forward | model.call | 모델 정방향 패스 |
모델 정의 방법
tensorflow.keras
tf.debugging.set_log_device_placement(True)
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel,self).__init__()
self.flatten = tf.keras.layers.Flatten(input_shape=(28,28))
self.fc1 = tf.keras.layers.Dense(512,activation='relu')
self.dropout = tf.keras.layers.Dropout(0.2)
self.fc2 = tf.keras.layers.Dense(10, activation='softmax')
def call(self,inputs):
x = self.flatten(inputs)
x = self.fc1(x)
x = self.dropout(x)
x = self.fc2(x)
return x
model = MyModel()
cf) tensorflow는 tf.debugging.set_log_device_placement(True) 코드를 통해서 어디에 할당되어있는지를 확인할 수 있다.
PyTorch
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, inputs):
x = self.flatten(inputs)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
모델 정의 시 차이점을 비교해 보자.
GPU 할당 | 상속 클래스 | fully connected layer | 순방향 계산 함수명 | 모델 인스턴스 할당 | |
PyTorch | cuda.is_available() | nn.Module | nn.Linear | call() | model.to(device) |
Tensorflow | 자동 | keras.Model | keras.layers.Dense | forward() | model() |
PyTorch는 GPU 탑재 여부를 담은 변수 device를 모델 객체에 할당할 때 포함시킵니다.
model = NeuralNetwork().to(device)
모델 컴파일 방법
tensorflow.keras
- tensorflow는 정의한 모델 객체의 complile 메서드를 통해서 optimizer , loss function , metrics를 정의.
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
PyTorch
- PyTorch는 optimizer , loss function 변수에 저장
- 저장된 변수를 학습 함수에서 직접 사용.
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
모델 학습 방법
차이점은 모델 학습 부분에서 가장 큰 차이를 보인다.
tensorflow.keras
- tensorflow 는 fit() 함수로 간단하게 학습을 사용할 수 있다.
model.fit(train_images, train_labels, epochs=5)
PyTorch
- PyTorch는 함수 속 for 문을 정의하여 모델 학습(train)과 검증(test) 코드를 구현해야 합니다.
PyTorch가 구현하기 어렵지만 더욱더 직관적이고 학습하기 용이하다.
- optimizer.zero_grad() : Gradient를 초기화합니다.
loss.backward() : Loss를 기준으로 Gradient를 저장합니다.
optimizer.step() : 파라미터(weight) 업데이트.
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
Ex) CIFAR100 with Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# Device 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# CIFAR-100 데이터셋 불러오기
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
# 신경망 모델 정의
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.relu1 = nn.ReLU()
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.relu2 = nn.ReLU()
self.fc1 = nn.Linear(64 * 8 * 8, 512)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(512, 100)
def forward(self, x):
x = self.pool(self.relu1(self.conv1(x)))
x = self.pool(self.relu2(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = self.relu3(self.fc1(x))
x = self.fc2(x)
return x
model = CNN().to(device)
# 손실 함수 및 최적화 알고리즘 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 모델 학습
num_epochs = 100
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data[0].to(device), data[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 100 == 99:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(trainloader)}], Loss: {running_loss/100:.4f}')
running_loss = 0.0
print("Training finished!")
# 테스트 데이터셋을 사용하여 모델 평가
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data[0].to(device), data[1].to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Accuracy on test set: {accuracy:.2f}%')
'A.I > 자연어 처리' 카테고리의 다른 글
self.NLP(4). preview for NLP task (0) | 2023.06.30 |
---|---|
self.NLP(3-2). Transformer with pytorch (0) | 2023.06.30 |
self.NLP(3). Transformer & etc. (0) | 2023.06.29 |
self.NLP(2). RNN의 한계와 Attention Mechanism (0) | 2023.06.29 |
self.NLP(1). tokenization (2) | 2022.12.29 |