What is different between Pytorch and Tensorflow?

Pytorch Tensorflow(keras) 생성 / 구현
nn.Linear keras.layers.Dense Dense
nn.Module keras.layers.Layer 모델구성 기반클래스
nn.Dropout keras.layers.Dropout Dropout
nn.LayerNorm keras.layers.LayerNormalization Layer Normalization
nn.Embedding keras.layers.Embedding Embedding
nn.GELU keras.activations.gelu GELU 활성화 함수
nn.Bmm tf.matmul Batch 행렬곱셈
model.forward model.call 모델 정방향 패스

 

 

모델 정의 방법

tensorflow.keras

tf.debugging.set_log_device_placement(True)

class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel,self).__init__()
        
        self.flatten = tf.keras.layers.Flatten(input_shape=(28,28))
        self.fc1 = tf.keras.layers.Dense(512,activation='relu')
        self.dropout = tf.keras.layers.Dropout(0.2)
        self.fc2 = tf.keras.layers.Dense(10, activation='softmax')
        
    def call(self,inputs):
        x = self.flatten(inputs)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
        
model = MyModel()

cf) tensorflow는 tf.debugging.set_log_device_placement(True) 코드를 통해서 어디에 할당되어있는지를 확인할 수 있다.


PyTorch

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, inputs):
        x = self.flatten(inputs)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)


모델 정의 시 차이점을 비교해 보자.

  GPU 할당 상속 클래스 fully connected layer 순방향 계산 함수명 모델 인스턴스 할당
PyTorch cuda.is_available() nn.Module nn.Linear call() model.to(device)
Tensorflow 자동 keras.Model keras.layers.Dense forward() model()



PyTorch는 GPU 탑재 여부를 담은 변수 device를 모델 객체에 할당할 때 포함시킵니다.
model = NeuralNetwork().to(device)


모델 컴파일 방법

tensorflow.keras

  • tensorflow는 정의한 모델 객체의 complile 메서드를 통해서 optimizer , loss function , metrics를 정의.
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


PyTorch

  • PyTorch는 optimizer , loss function 변수에 저장
  • 저장된 변수를 학습 함수에서 직접 사용.
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)





모델 학습 방법

차이점은 모델 학습 부분에서 가장 큰 차이를 보인다.

tensorflow.keras

  • tensorflow 는 fit() 함수로 간단하게 학습을 사용할 수 있다.
model.fit(train_images, train_labels, epochs=5)


PyTorch

  • PyTorch는 함수 속 for 문을 정의하여 모델 학습(train)과 검증(test) 코드를 구현해야 합니다.
    PyTorch가 구현하기 어렵지만 더욱더 직관적이고 학습하기 용이하다.
  • optimizer.zero_grad() : Gradient를 초기화합니다.
    loss.backward() : Loss를 기준으로 Gradient를 저장합니다.
    optimizer.step() : 파라미터(weight) 업데이트.
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
         
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")




 

 

Ex) CIFAR100 with Pytorch

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Device 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# CIFAR-100 데이터셋 불러오기
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 신경망 모델 정의
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.relu2 = nn.ReLU()
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(512, 100)
    
    def forward(self, x):
        x = self.pool(self.relu1(self.conv1(x)))
        x = self.pool(self.relu2(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN().to(device)

# 손실 함수 및 최적화 알고리즘 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# 모델 학습
num_epochs = 100

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(trainloader)}], Loss: {running_loss/100:.4f}')
            running_loss = 0.0

print("Training finished!")

# 테스트 데이터셋을 사용하여 모델 평가
correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on test set: {accuracy:.2f}%')

 

 

 

 

+ Recent posts