Dataset¶

https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE

from qiskit import QuantumCircuit
from qiskit.circuit import Parameter, ParameterVector
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes, TwoLocal, PauliFeatureMap, EfficientSU2
from qiskit_algorithms.optimizers import COBYLA, SPSA, ADAM
from qiskit.primitives import Sampler

try:
    from qiskit_machine_learning.algorithms import QSVC
    from qiskit_machine_learning.kernels import FidelityQuantumKernel
    QSVC_AVAILABLE = True
    print("Using FidelityQuantumKernel (latest version)")
except ImportError as e:
    print(f"Import error: {e}")
    QSVC_AVAILABLE = False
    raise ImportError("QSVC is not available. Install the latest qiskit-machine-learning.")

import warnings
warnings.filterwarnings('ignore')
# Graph style settings
plt.style.use('default')
sns.set_palette("husl")
Using FidelityQuantumKernel (latest version)
In [2]:
# %%
# Loading dataset from CSV file
def load_dataset(filepath='mycsv.csv'):
    """
        filepath: Path to CSV file (default: 'mycsv.csv')
    """
    try:
        # Load CSV file
        df = pd.read_csv(filepath)
        print(f"Dataset successfully loaded from: {filepath}")
        
        # Check if it has the correct columns
        expected_columns = [
            'fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
            'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
            'pH', 'sulphates', 'alcohol', 'quality'
        ]
        
        # Check if we have all expected columns
        missing_columns = set(expected_columns) - set(df.columns)
        if missing_columns:
            raise ValueError(f"Missing columns: {missing_columns}")
        
        # Basic data cleaning
        df = df.dropna()  # Remove rows with missing values
        
        if len(df) == 0:
            raise ValueError("Dataset is empty after removing missing values")
        
        # Dataset information
        print(f" Dataset information:")
        print(f"   - Number of rows: {len(df)}")
        print(f"   - Number of columns: {len(df.columns)}")
        print(f"   - Columns: {df.columns.tolist()}")
        
        return df
        
    except FileNotFoundError:
        raise FileNotFoundError(f"File '{filepath}' not found! Check the file path.")
        
    except Exception as e:
        print(f"  Error loading CSV: {e}")
        print("   Use the correct CSV format with the following columns:")
        print("   fixed_acidity, volatile_acidity, citric_acid, residual_sugar,")
        print("   chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density,")
        print("   pH, sulphates, alcohol, quality")
        raise

df = load_dataset('winequality-red.csv')  # or another file path
print(f"   Dataset loaded:")
print(f"   - Dimensions: {df.shape}")
print(f"   - Features: {df.columns[:-1].tolist()}")
print(f"   - Quality range: {df['quality'].min()} - {df['quality'].max()}")
print(f"   - Quality distribution: {dict(df['quality'].value_counts().sort_index())}")

# Display basic statistics
print(df.describe().round(2))

# Check for missing values
missing_values = df.isnull().sum()
if missing_values.any():
    print(f"\n Missing values:")
    print(missing_values[missing_values > 0])
else:
    print("\n No missing values")

# Display first rows
print("\n First 5 rows of dataset:")
print(df.head())
Dataset successfully loaded from: winequality-red.csv
 Dataset information:
   - Number of rows: 1599
   - Number of columns: 12
   - Columns: ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
   Dataset loaded:
   - Dimensions: (1599, 12)
   - Features: ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol']
   - Quality range: 3 - 8
   - Quality distribution: {3: np.int64(10), 4: np.int64(53), 5: np.int64(681), 6: np.int64(638), 7: np.int64(199), 8: np.int64(18)}
       fixed_acidity  volatile_acidity  citric_acid  residual_sugar  \
count        1599.00           1599.00      1599.00         1599.00   
mean            8.32              0.53         0.27            2.54   
std             1.74              0.18         0.19            1.41   
min             4.60              0.12         0.00            0.90   
25%             7.10              0.39         0.09            1.90   
50%             7.90              0.52         0.26            2.20   
75%             9.20              0.64         0.42            2.60   
max            15.90              1.58         1.00           15.50   

       chlorides  free_sulfur_dioxide  total_sulfur_dioxide  density       pH  \
count    1599.00              1599.00               1599.00  1599.00  1599.00   
mean        0.09                15.87                 46.47     1.00     3.31   
std         0.05                10.46                 32.90     0.00     0.15   
min         0.01                 1.00                  6.00     0.99     2.74   
25%         0.07                 7.00                 22.00     1.00     3.21   
50%         0.08                14.00                 38.00     1.00     3.31   
75%         0.09                21.00                 62.00     1.00     3.40   
max         0.61                72.00                289.00     1.00     4.01   

       sulphates  alcohol  quality  
count    1599.00  1599.00  1599.00  
mean        0.66    10.42     5.64  
std         0.17     1.07     0.81  
min         0.33     8.40     3.00  
25%         0.55     9.50     5.00  
50%         0.62    10.20     6.00  
75%         0.73    11.10     6.00  
max         2.00    14.90     8.00  

 No missing values

 First 5 rows of dataset:
   fixed_acidity  volatile_acidity  citric_acid  residual_sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free_sulfur_dioxide  total_sulfur_dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.8        5  
3      9.8        6  
4      9.4        5  
In [3]:
# %%
# Exploratory data analysis
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Quality distribution
axes[0,0].hist(df['quality'], bins=len(df['quality'].unique()), 
               alpha=0.7, color='skyblue', edgecolor='black')
axes[0,0].set_title('Wine Quality Distribution', fontsize=14, fontweight='bold')
axes[0,0].set_xlabel('Quality')
axes[0,0].set_ylabel('Number of Samples')
axes[0,0].grid(True, alpha=0.3)

# 2. Correlation heatmap
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='RdBu_r', center=0, 
            ax=axes[0,1], cbar_kws={'label': 'Correlation'})
axes[0,1].set_title('Correlation Matrix', fontsize=14, fontweight='bold')

# 3. Boxplot - alcohol effect on quality
df.boxplot(column='alcohol', by='quality', ax=axes[1,0])
axes[1,0].set_title('Alcohol vs Quality')
axes[1,0].set_xlabel('Quality')
axes[1,0].set_ylabel('Alcohol (%)')
plt.sca(axes[1,0])
plt.xticks(rotation=0)

# 4. Scatter plot - pH vs volatile acidity
for quality in sorted(df['quality'].unique()):
    subset = df[df['quality'] == quality]
    axes[1,1].scatter(subset['alcohol'], subset['citric_acid'], 
                     label=f'Quality {quality}', alpha=0.7, s=60)
axes[1,1].set_title('Alcohol vs Citric Acidity', fontsize=14, fontweight='bold')
axes[1,1].set_xlabel('Alcohol')
axes[1,1].set_ylabel('Citric Acidity')
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3)

plt.suptitle('')  # Remove the automatic suptitle from boxplot
plt.tight_layout()
plt.show()
No description has been provided for this image
In [4]:
# %%
class QSVCWineClassifier:
    """Quantum Support Vector Classifier for wine quality classification"""
    
    def __init__(self, num_features=4, feature_map='ZZ', reps=2):
        self.num_features = num_features
        self.reps = reps
        self.feature_map_type = feature_map
        self.model = None
        self.quantum_kernel = None
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.pca = PCA(n_components=num_features)
        self.original_features = 11  # Number of original features
        
        # Quantum components
        self.feature_map = None
        self.sampler = Sampler()
        
        self._setup_quantum_components()
        print(f"QSVC classifier initialized with {num_features} features")
    
    def _setup_quantum_components(self):
        """Setup quantum components"""
        
        # Feature map selection
        if self.feature_map_type == 'ZZ':
            self.feature_map = ZZFeatureMap(
                feature_dimension=self.num_features,
                reps=self.reps,
                entanglement='linear'
            )
        elif self.feature_map_type == 'Pauli':
            self.feature_map = PauliFeatureMap(
                feature_dimension=self.num_features,
                reps=self.reps,
                paulis=['Z', 'ZZ']
            )
        else:
            self.feature_map = ZZFeatureMap(
                feature_dimension=self.num_features,
                reps=self.reps,
                entanglement='linear'
            )
        
        try:
            self.quantum_kernel = FidelityQuantumKernel(
                feature_map=self.feature_map
                # sampler is not passed, will be used internally
            )
            print("Quantum kernel created successfully")
        except Exception as e:
            print(f"Error creating quantum kernel: {e}")
            # Try without additional parameters
            self.quantum_kernel = FidelityQuantumKernel(
                feature_map=self.feature_map
            )
    
    def fit(self, X, y, C=1.0):
        """Training QSVC classifier"""
        
        print(" Starting QSVC training...")
        
        # Preprocessing
        X_scaled = self.scaler.fit_transform(X)
        X_reduced = self.pca.fit_transform(X_scaled)
        print(f" PCA variance explained: {self.pca.explained_variance_ratio_.sum():.2%}")
        
        # Encode labels
        y_encoded = self.label_encoder.fit_transform(y)
        
        unique_classes = len(np.unique(y_encoded))
        print(f" Number of classes: {unique_classes}")
        print(f" Original classes: {self.label_encoder.classes_}")
        
        # QSVC model
        try:
            self.model = QSVC(
                quantum_kernel=self.quantum_kernel,
                C=C
            )
            
            print(f" Training QSVC on {X_reduced.shape[0]} samples with {X_reduced.shape[1]} features...")
            self.model.fit(X_reduced, y_encoded)
            print("  QSVC training completed!")
            
            # Model info
            if hasattr(self.model, 'support_'):
                print(f" Number of support vectors: {len(self.model.support_)}")
            
        except Exception as e:
            print(f" Error during QSVC training: {e}")
            print(" Trying with simpler configuration...")
            
            # Fallback with simpler configuration
            self.feature_map = ZZFeatureMap(
                feature_dimension=self.num_features,
                reps=1,
                entanglement='linear'
            )
            
            self.quantum_kernel = FidelityQuantumKernel(
                feature_map=self.feature_map
            )
            
            self.model = QSVC(
                quantum_kernel=self.quantum_kernel,
                C=0.5  # Lower regularization
            )
            
            self.model.fit(X_reduced, y_encoded)
            print("  Fallback QSVC training completed!")
        
        return self
    
    def predict(self, X):
        """Prediction using QSVC or VQC"""
        if self.model is None:
            raise ValueError("Model is not trained! Call fit() first.")
        
        # If we have 11 features (original), transform them
        if X.shape[1] == self.original_features:
            X_scaled = self.scaler.transform(X)
            X_reduced = self.pca.transform(X_scaled)
        else:
            X_reduced = X
        
        y_pred_encoded = self.model.predict(X_reduced)
        
        # Ensure correct format
        y_pred_encoded = np.atleast_1d(y_pred_encoded)
        
        # Clip predictions to valid class range
        y_pred_encoded = np.clip(y_pred_encoded, 0, len(self.label_encoder.classes_) - 1)
        
        try:
            y_pred = self.label_encoder.inverse_transform(y_pred_encoded.astype(int))
        except ValueError as e:
            print(f"Warning: Prediction issue - {e}")
            # Fallback - return most frequent class
            y_pred = np.array([self.label_encoder.classes_[0]])
        
        return y_pred
    
    def score(self, X, y):
        """Calculate accuracy score"""
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)
    
    def get_kernel_matrix_sample(self, X_sample):
        """Sample quantum kernel matrix for a small data sample"""
        if self.quantum_kernel is None:
            print("Quantum kernel is not initialized")
            return None
        
        # Take a small sample for visualization
        if len(X_sample) > 8:  # Smaller for faster computation
            X_sample = X_sample[:8]
        
        # Data transformation
        if X_sample.shape[1] == self.original_features:
            X_scaled = self.scaler.transform(X_sample)
            X_reduced = self.pca.transform(X_scaled)
        else:
            X_reduced = X_sample
            
        try:
            # In the latest version, it might be called differently
            kernel_matrix = self.quantum_kernel.evaluate(x_vec=X_reduced, y_vec=X_reduced)
            return kernel_matrix
        except Exception as e:
            print(f"Cannot create kernel matrix: {e}")
            try:
                # Alternative call
                kernel_matrix = self.quantum_kernel.evaluate(X_reduced)
                return kernel_matrix
            except Exception as e2:
                print(f"Alternative call also doesn't work: {e2}")
                return None
In [5]:
# %%
# Data preparation and training - QSVC only
X = df.drop('quality', axis=1).values
y = df['quality'].values

print(f" Data preparation for QSVC:")
print(f"   - Features shape: {X.shape}")
print(f"   - Target shape: {y.shape}")
print(f"   - Unique classes: {np.unique(y)}")

# For QSVC - reasonable size limitations
if len(X) > 1000:
    print(f" Dataset is large ({len(X)} samples), reducing to 1000 samples for QSVC...")
    indices = np.random.choice(len(X), size=1000, replace=False)
    X = X[indices]
    y = y[indices]
    print(f" Reduced dataset: {X.shape}")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print("Class distribution in training set:", dict(pd.Series(y_train).value_counts().sort_index()))

# For QSVC - reasonable use of SMOTE
use_smote = len(y_train) < 400  # SMOTE for smaller datasets
if use_smote:
    print("Using SMOTE for data balancing...")
    smote = SMOTE(random_state=42, k_neighbors=3)  # Fewer neighbors for stability
    X_train_final, y_train_final = smote.fit_resample(X_train, y_train)
    print("Class distribution after SMOTE:", dict(pd.Series(y_train_final).value_counts().sort_index()))
else:
    print("Not using SMOTE - dataset is sufficiently large")
    X_train_final, y_train_final = X_train, y_train

print(f"   - Final training set: {X_train_final.shape[0]} samples")
print(f"   - Test set: {X_test.shape[0]} samples")
 Data preparation for QSVC:
   - Features shape: (1599, 11)
   - Target shape: (1599,)
   - Unique classes: [3 4 5 6 7 8]
 Dataset is large (1599 samples), reducing to 1000 samples for QSVC...
 Reduced dataset: (1000, 11)
Class distribution in training set: {3: np.int64(5), 4: np.int64(29), 5: np.int64(290), 6: np.int64(275), 7: np.int64(92), 8: np.int64(9)}
Not using SMOTE - dataset is sufficiently large
   - Final training set: 700 samples
   - Test set: 300 samples
In [6]:
# %%
# Tested configurations - from simplest to more complex
configurations = [
#    {'num_features': 3, 'feature_map': 'ZZ', 'reps': 1, 'C': 1.0},
#    {'num_features': 4, 'feature_map': 'ZZ', 'reps': 1, 'C': 1.0},
#    {'num_features': 3, 'feature_map': 'Pauli', 'reps': 1, 'C': 1.0},
#    {'num_features': 4, 'feature_map': 'ZZ', 'reps': 2, 'C': 1.0},
    {'num_features': 6, 'feature_map': 'ZZ', 'reps': 3, 'C': 0.5},
#    {'num_features': 4, 'feature_map': 'Pauli', 'reps': 2, 'C': 1.0},
]

best_config = None
best_accuracy = 0
best_model = None
results_configs = {}

for i, config in enumerate(configurations):
    print(f"\n Testing configuration {i+1}/{len(configurations)}: {config}")
    
    try:
        qwc_temp = QSVCWineClassifier(
            num_features=config['num_features'],
            feature_map=config['feature_map'],
            reps=config['reps']
        )
        
        qwc_temp.fit(X_train_final, y_train_final, C=config['C'])
        temp_accuracy = qwc_temp.score(X_test, y_test)
        
        config_name = f"QSVC_{config['num_features']}f_{config['feature_map']}_{config['reps']}r"
        results_configs[config_name] = temp_accuracy
        
        print(f"    Test Accuracy: {temp_accuracy:.2%}")
        
        if temp_accuracy > best_accuracy:
            best_accuracy = temp_accuracy
            best_config = config.copy()
            best_config['name'] = config_name
            best_model = qwc_temp
            
    except Exception as e:
        print(f"    Error: {e}")
        results_configs[f"QSVC_config_{i+1}"] = 0.0

if best_config and best_model:
    print(f"\n BEST QSVC configuration found!")
    print(f"    Model: {best_config['name']}")
    print(f"    Parameters: {best_config}")
    print(f"    Accuracy: {best_accuracy:.2%}")
    
    # Use the best model for all further operations
    qwc = best_model
    test_accuracy = best_accuracy
else:
    raise Exception(" No configuration was successful! Check data and settings.")
 Testing configuration 1/1: {'num_features': 6, 'feature_map': 'ZZ', 'reps': 3, 'C': 0.5}
Quantum kernel created successfully
QSVC classifier initialized with 6 features
 Starting QSVC training...
 PCA variance explained: 85.01%
 Number of classes: 6
 Original classes: [3 4 5 6 7 8]
 Training QSVC on 700 samples with 6 features...
  QSVC training completed!
 Number of support vectors: 699
    Test Accuracy: 47.67%

 BEST QSVC configuration found!
    Model: QSVC_6f_ZZ_3r
    Parameters: {'num_features': 6, 'feature_map': 'ZZ', 'reps': 3, 'C': 0.5, 'name': 'QSVC_6f_ZZ_3r'}
    Accuracy: 47.67%
In [7]:
# %%
# Comparison with classical models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

print("\n Comparison QSVC vs Classical models:")

# Data preparation for classical models (without PCA reduction)
X_train_scaled = StandardScaler().fit_transform(X_train_final)
X_test_scaled = StandardScaler().fit_transform(X_test)

# Classical models
classical_models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Classical SVM': SVC(kernel='rbf', random_state=42, C=1.0),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'KNN': KNeighborsClassifier(n_neighbors=5)
}

# Training and evaluation
all_results = {'Best QSVC': best_accuracy if best_config else test_accuracy}

for name, model in classical_models.items():
    try:
        if 'SVM' in name:
            model.fit(X_train_scaled, y_train_final)
            y_pred_classical = model.predict(X_test_scaled)
        else:
            model.fit(X_train_final, y_train_final)
            y_pred_classical = model.predict(X_test)
        
        accuracy_classical = accuracy_score(y_test, y_pred_classical)
        all_results[name] = accuracy_classical
        print(f"   {name}: {accuracy_classical:.2%}")
        
    except Exception as e:
        print(f"   {name}: Error - {e}")
        all_results[name] = 0.0
 Comparison QSVC vs Classical models:
   Random Forest: 65.00%
   Classical SVM: 59.67%
   Gradient Boosting: 63.33%
   Logistic Regression: 56.33%
   KNN: 44.67%
In [8]:
# %%
# Visualization of comparison
plt.figure(figsize=(14, 8))

models = list(all_results.keys())
accuracies = list(all_results.values())

# Colors according to model type
colors = ['#FF6B6B' if 'QSVC' in m or 'Quantum' in m else '#4ECDC4' for m in models]

bars = plt.bar(range(len(models)), accuracies, color=colors, alpha=0.8, edgecolor='black')

plt.xticks(range(len(models)), models, rotation=45, ha='right')
plt.ylabel('Test Accuracy', fontweight='bold', fontsize=12)
plt.title('Comparison of QSVC vs Classical Models', fontsize=16, fontweight='bold')
plt.ylim(0, 1)

# Values on bars
for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
            f'{acc:.2%}', ha='center', fontweight='bold', fontsize=10)

# Legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='#FF6B6B', label='Quantum Models (QSVC)'),
                  Patch(facecolor='#4ECDC4', label='Classical Models')]
plt.legend(handles=legend_elements, loc='upper left')

plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

# Find the best model
best_overall = max(all_results.items(), key=lambda x: x[1])
print(f"\n Best model overall: {best_overall[0]} with accuracy {best_overall[1]:.2%}")
No description has been provided for this image
 Best model overall: Random Forest with accuracy 65.00%
In [9]:
# %%
# Testing function for new samples
def test_wine_sample(qwc_model, sample_data, feature_names, model_info=""):
    """Tests wine quality for a new sample using QSVC"""
    
    sample = np.array([sample_data])
    prediction = qwc_model.predict(sample)[0]
    
    quality_desc = {
        3: ("Terrible yuck", "3"),
        4: ("Low", "4"),
        5: ("Average", "5"), 
        6: ("Good", "6"),
        7: ("Excellent", "7"),
        8: ("OMGanesha", "8")
    }
    
    desc, stars = quality_desc.get(prediction, ("Unknown", "?"))
    
    print(f" QSVC Prediction{model_info}: {prediction} - {desc} {stars}")
    return prediction

print(f"\n Testing samples using the best QSVC model:")

if best_model:
    model_info = f" ({best_config['name']})"
    test_model = qwc  # Best model
else:
    model_info = " (basic configuration)"
    test_model = qwc  # Original model

# Test samples with original 11 features
sample1 = [7.4, 0.7, 0.0, 1.9, 0.076, 11.0, 34.0, 0.9978, 3.51, 0.56, 12.0]
test_wine_sample(qwc, sample1, feature_names="Sample with high alcohol")

sample2 = [5.0, 0.4, 0.1, 2.0, 0.050, 15.0, 30.0, 0.9950, 3.8, 0.7, 10.0]
test_wine_sample(qwc, sample2, feature_names="Sample with low acidity")

sample3 = [7.8, 0.6, 0.05, 2.0, 0.080, 15.0, 50.0, 0.9970, 3.3, 0.6, 9.5]
test_wine_sample(qwc, sample3, feature_names="Average sample")

sample4 = [8.5, 0.3, 0.3, 1.5, 0.045, 20.0, 45.0, 0.9940, 3.2, 0.8, 13.5]
test_wine_sample(qwc, sample4, feature_names="High quality - ideal values")
 Testing samples using the best QSVC model:
 QSVC Prediction: 6 - Good 6
 QSVC Prediction: 5 - Average 5
 QSVC Prediction: 5 - Average 5
 QSVC Prediction: 5 - Average 5
Out[9]:
np.int64(5)
In [10]:
# %%
# Visualization of quantum kernel (best model)
print(f"\n Quantum Kernel Analysis ({best_config['name']}):")

try:
    sample_data = X_test[:5]  # Small sample for demo
    kernel_matrix = qwc.get_kernel_matrix_sample(sample_data)
    
    if kernel_matrix is not None:
        plt.figure(figsize=(8, 6))
        sns.heatmap(kernel_matrix, annot=True, cmap='coolwarm', center=0,
                   fmt='.3f', square=True)
        plt.title(f'Quantum Kernel Matrix - {best_config["name"]} (5x5 sample)', fontweight='bold')
        plt.xlabel('Sample Index')
        plt.ylabel('Sample Index') 
        plt.show()
        
        print(f"Kernel matrix shape: {kernel_matrix.shape}")
        print(f"Kernel values range: [{kernel_matrix.min():.3f}, {kernel_matrix.max():.3f}]")
    else:
        print("Kernel matrix is not available")
        
except Exception as e:
    print(f"Cannot display kernel matrix: {e}")
 Quantum Kernel Analysis (QSVC_6f_ZZ_3r):
No description has been provided for this image
Kernel matrix shape: (5, 5)
Kernel values range: [0.003, 1.000]
In [11]:
# %%
# Use parameters from the best model
final_accuracy = best_accuracy if best_model else test_accuracy
final_model_info = best_config['name'] if best_config else "basic configuration"
final_features = best_config['num_features'] if best_config else qwc.num_features
final_reps = best_config['reps'] if best_config else qwc.reps
final_feature_map = best_config['feature_map'] if best_config else qwc.feature_map_type

print(f"""
DATASET SUMMARY:
   • Total samples: {len(X)} (optimized for QSVC)
   • Number of features: {final_features}
   • Quality range: 4-7 (MULTI-CLASS)
   • Train/Test split: {len(X_train_final)}/{len(X_test)}
   • SMOTE used: {'Yes' if use_smote else 'No'}

BEST QSVC MODEL ({final_model_info}):
   • Architecture: Quantum Support Vector Classifier
   • Feature Map: {final_feature_map} ({final_reps} repetitions)
   • PCA components: {final_features}
   • Quantum Kernel: FidelityQuantumKernel
   • C parameter: {best_config.get('C', 1.0) if best_config else 1.0}

RESULTS:
   • BEST QSVC Accuracy: {final_accuracy:.2%}
   • Best model overall: {best_overall[0]} ({best_overall[1]:.2%})
""")

# Comparison of all tested configurations
if results_configs:
    print("\n COMPARISON OF ALL QSVC CONFIGURATIONS:")
    for config_name, accuracy in sorted(results_configs.items(), key=lambda x: x[1], reverse=True):
        status = "X" if accuracy == final_accuracy else "  "
        print(f"{status} {config_name}: {accuracy:.2%}")
DATASET SUMMARY:
   • Total samples: 1000 (optimized for QSVC)
   • Number of features: 6
   • Quality range: 4-7 (MULTI-CLASS)
   • Train/Test split: 700/300
   • SMOTE used: No

BEST QSVC MODEL (QSVC_6f_ZZ_3r):
   • Architecture: Quantum Support Vector Classifier
   • Feature Map: ZZ (3 repetitions)
   • PCA components: 6
   • Quantum Kernel: FidelityQuantumKernel
   • C parameter: 0.5

RESULTS:
   • BEST QSVC Accuracy: 47.67%
   • Best model overall: Random Forest (65.00%)


 COMPARISON OF ALL QSVC CONFIGURATIONS:
X QSVC_6f_ZZ_3r: 47.67%
In [12]:
# %%
# Model saving
import pickle

# Calculate train_accuracy for the best model
y_train_pred = qwc.predict(X_train_final)
train_accuracy = accuracy_score(y_train_final, y_train_pred)

# Create dictionary with model
model_data = {
    'quantum_classifier': qwc,
    'train_accuracy': train_accuracy,
    'test_accuracy': best_accuracy,
    'feature_names': df.columns[:-1].tolist(),
    'best_config': best_config,
    'results_configs': results_configs
}

# Save model
with open('qsvc_wine_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)
In [13]:
#import pickle
with open('qsvc_wine_model.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

loaded_qwc = loaded_data['quantum_classifier']

test_sample = [7.4, 0.7, 0.0, 1.9, 0.076, 11.0, 34.0, 0.9978, 3.51, 0.56, 12.0]
prediction = loaded_qwc.predict(np.array([test_sample]))
print(f"Prediction: {prediction[0]}")
Prediction: 6
In [ ]: