sentiment analysis model

Build and Save Sentiment Model

#!/usr/bin/env python
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from transformers import BertTokenizer, BertModel
from sklearn.metrics import accuracy_score, classification_report

# Define the TextDataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts     = texts
        self.labels    = labels
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text   = self.texts[idx]
        label  = self.labels[idx]
        tokens = self.tokenizer(
            text, 
            padding='max_length', 
            max_length=128, 
            truncation=True, 
            return_tensors='pt'
        )
        input_ids = tokens['input_ids'].squeeze(0)
        attention_mask = tokens['attention_mask'].squeeze(0)
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.float)
        }

# Expanded dataset with both positive and negative examples
text_data = [
    "This movie is amazing!",               # positive
    "I really disliked the plot.",          # negative
    "The acting was superb.",               # positive
    "This book is a masterpiece.",          # positive
    "What a terrible waste of time.",       # negative
    "I love this!",                         # positive
    "This was the worst movie ever.",       # negative
    "Fantastic experience, loved it.",      # positive
    "Not good at all.",                     # negative
    "Absolutely fantastic.",                # positive
    "I hated every minute of it.",          # negative
    "It was a decent watch.",               # neutral/positive
    "Great movie!",                         # positive
    "I will never watch this again.",       # negative
    "Loved every second of it.",            # positive
    "Awful plot, horrible acting.",         # negative
    "Brilliant direction and storytelling.",# positive
    "Worst experience I've had.",           # negative
    "The movie was quite boring.",          # negative
    "Best performance of the year."         # positive
]
labels = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1]

# Split into train and test sets (80% training, 20% testing)
train_size    = int(0.8 * len(text_data))
train_dataset = TextDataset(text_data[:train_size], labels[:train_size])
test_dataset  = TextDataset(text_data[train_size:], labels[train_size:])

train_dataloader = DataLoader(train_dataset, batch_size=4)
test_dataloader = DataLoader(test_dataset, batch_size=4)

# Define the Model with two fully connected layers
class SentimentClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc1  = nn.Linear(self.bert.config.hidden_size, 128)
        self.fc2  = nn.Linear(128, 1)

    def forward(self, input_ids, attention_mask):
        outputs       = self.bert(input_ids, attention_mask)
        pooled_output = outputs.pooler_output
        x             = torch.relu(self.fc1(pooled_output))  # Add non-linearity
        output        = self.fc2(x)
        return output  # Return raw logits, no sigmoid here

# Initialize the Model
model = SentimentClassifier()

# Loss Function and Optimizer
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss
optimizer = torch.optim.Adam(model.parameters(), lr=3e-5)

# Train the model or load the existing model
train_model = True  # Set to False if you want to skip training and load the model

if train_model:
    # Training Loop
    num_epochs = 3
    for epoch in range(num_epochs):
        model.train()  # Ensure the model is in training mode
        for batch in train_dataloader:
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels'].float()
            
            optimizer.zero_grad()
            
            # Get model outputs
            outputs = model(input_ids, attention_mask)
            
            # Squeeze the outputs to remove the extra dimension
            outputs = outputs.squeeze(1)
            
            # Compute the loss
            loss = criterion(outputs, labels)
            
            # Backpropagation
            loss.backward()
            optimizer.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Loss: {loss.item()}')

    # Save the trained model
    torch.save(model.state_dict(), 'sentiment_model.pth')
    print("Model saved as 'sentiment_model.pth'")
else:
    # Load the saved model
    model.load_state_dict(torch.load('sentiment_model.pth'))
    model.eval()  # Set to evaluation mode
    print("Model loaded from 'sentiment_model.pth'")

# Evaluation
model.eval()  # Set the model to evaluation mode
all_preds  = []
all_labels = []

threshold = 0.5  # Set a threshold for classification

with torch.no_grad():
    for batch in test_dataloader:
        input_ids      = batch['input_ids']
        attention_mask = batch['attention_mask']
        outputs        = model(input_ids, attention_mask)
        
        # Squeeze the outputs and apply sigmoid + threshold to get predictions
        outputs = torch.sigmoid(outputs.squeeze(1))
        preds   = (outputs > threshold).float()
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(batch['labels'].cpu().numpy())

# Print accuracy and classification report
print(f'Accuracy: {accuracy_score(all_labels, all_preds)}')
print(classification_report(all_labels, all_preds))

# Optional: Print raw outputs for debugging
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        outputs = model(input_ids, attention_mask)
        
        # Print raw outputs to inspect the values
        print("Model outputs:", outputs)
        
        preds = (torch.sigmoid(outputs.squeeze(1)) > threshold).float()
        print("Predictions:", preds)

Use Sentiment Model



#!/usr/bin/env python
import torch
from torch import nn
from transformers import BertTokenizer, BertModel
from prettytable import PrettyTable

# Define the Model with two fully connected layers
class SentimentClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc1  = nn.Linear(self.bert.config.hidden_size, 128)
        self.fc2  = nn.Linear(128, 1)

    def forward(self, input_ids, attention_mask):
        outputs       = self.bert(input_ids, attention_mask)
        pooled_output = outputs.pooler_output
        x             = torch.relu(self.fc1(pooled_output))  # Add non-linearity
        output        = self.fc2(x)
        return output  # Return raw logits

# Load the trained model
model = SentimentClassifier()
model.load_state_dict(torch.load('sentiment_model.pth'))
model.eval()  # Set the model to evaluation mode

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example positive and negative statements
statements = [
    "I absolutely love this movie!",             # Positive
    "This is the worst film I have ever seen.",  # Negative
    "The book was fantastic!",                   # Positive
    "I really hated the acting.",                # Negative
    "Amazing story and great performances.",     # Positive
    "It was a complete waste of time.",          # Negative
]

# Prepare a table using PrettyTable
table = PrettyTable()
table.field_names = ["Statement", "Prediction"]

# Run inference on each statement
threshold = 0.5  # Classification threshold
for statement in statements:
    # Tokenize and preprocess the input statement
    tokens = tokenizer(
        statement, 
        padding        = 'max_length',
        max_length     = 128,
        truncation     = True,
        return_tensors = 'pt'
    )
    input_ids = tokens['input_ids']
    attention_mask = tokens['attention_mask']
    
    # Run the model and get predictions
    with torch.no_grad():
        outputs    = model(input_ids, attention_mask)
        outputs    = torch.sigmoid(outputs).squeeze(1)  # Apply sigmoid to logits
        prediction = (outputs > threshold).float().item()  # Apply threshold
    
    # Convert the prediction to "Positive" or "Negative"
    sentiment = "Positive" if prediction == 1.0 else "Negative"
    
    # Add the statement and its prediction to the table
    table.add_row([statement, sentiment])

# Print the table
print(table)

Output


+-----------------------------------------------+------------+
|                   Statement                   | Prediction |
+-----------------------------------------------+------------+
| I absolutely love this movie!                 | Positive   |
| This is the worst film I have ever seen.      | Negative   |
| The book was fantastic!                       | Positive   |
| I really hated the acting.                    | Negative   |
| Amazing story and great performances.         | Positive   |
| It was a complete waste of time.              | Negative   |
+-----------------------------------------------+------------+