In order to do MDS in python we need to install the sklearnpackage (scikit-learn).

In [1]:
!pip install sklearn
Requirement already satisfied: sklearn in /Users/junaid/miniconda/lib/python3.8/site-packages (0.0)
Requirement already satisfied: scikit-learn in /Users/junaid/miniconda/lib/python3.8/site-packages (from sklearn) (0.24.2)
Requirement already satisfied: scipy>=0.19.1 in /Users/junaid/miniconda/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.7.0)
Requirement already satisfied: joblib>=0.11 in /Users/junaid/miniconda/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.0.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/junaid/miniconda/lib/python3.8/site-packages (from scikit-learn->sklearn) (2.2.0)
Requirement already satisfied: numpy>=1.13.3 in /Users/junaid/miniconda/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.20.3)
In [2]:
import numpy as np

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

from sklearn.manifold import MDS
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

Let us load the Iris dataset.

In [3]:
from sklearn import datasets

iris_data = datasets.load_iris()
X = iris_data.data
In [4]:
iris_data
Out[4]:
{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
        [5.5, 4.2, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.2],
        [5. , 3.2, 1.2, 0.2],
        [5.5, 3.5, 1.3, 0.2],
        [4.9, 3.6, 1.4, 0.1],
        [4.4, 3. , 1.3, 0.2],
        [5.1, 3.4, 1.5, 0.2],
        [5. , 3.5, 1.3, 0.3],
        [4.5, 2.3, 1.3, 0.3],
        [4.4, 3.2, 1.3, 0.2],
        [5. , 3.5, 1.6, 0.6],
        [5.1, 3.8, 1.9, 0.4],
        [4.8, 3. , 1.4, 0.3],
        [5.1, 3.8, 1.6, 0.2],
        [4.6, 3.2, 1.4, 0.2],
        [5.3, 3.7, 1.5, 0.2],
        [5. , 3.3, 1.4, 0.2],
        [7. , 3.2, 4.7, 1.4],
        [6.4, 3.2, 4.5, 1.5],
        [6.9, 3.1, 4.9, 1.5],
        [5.5, 2.3, 4. , 1.3],
        [6.5, 2.8, 4.6, 1.5],
        [5.7, 2.8, 4.5, 1.3],
        [6.3, 3.3, 4.7, 1.6],
        [4.9, 2.4, 3.3, 1. ],
        [6.6, 2.9, 4.6, 1.3],
        [5.2, 2.7, 3.9, 1.4],
        [5. , 2. , 3.5, 1. ],
        [5.9, 3. , 4.2, 1.5],
        [6. , 2.2, 4. , 1. ],
        [6.1, 2.9, 4.7, 1.4],
        [5.6, 2.9, 3.6, 1.3],
        [6.7, 3.1, 4.4, 1.4],
        [5.6, 3. , 4.5, 1.5],
        [5.8, 2.7, 4.1, 1. ],
        [6.2, 2.2, 4.5, 1.5],
        [5.6, 2.5, 3.9, 1.1],
        [5.9, 3.2, 4.8, 1.8],
        [6.1, 2.8, 4. , 1.3],
        [6.3, 2.5, 4.9, 1.5],
        [6.1, 2.8, 4.7, 1.2],
        [6.4, 2.9, 4.3, 1.3],
        [6.6, 3. , 4.4, 1.4],
        [6.8, 2.8, 4.8, 1.4],
        [6.7, 3. , 5. , 1.7],
        [6. , 2.9, 4.5, 1.5],
        [5.7, 2.6, 3.5, 1. ],
        [5.5, 2.4, 3.8, 1.1],
        [5.5, 2.4, 3.7, 1. ],
        [5.8, 2.7, 3.9, 1.2],
        [6. , 2.7, 5.1, 1.6],
        [5.4, 3. , 4.5, 1.5],
        [6. , 3.4, 4.5, 1.6],
        [6.7, 3.1, 4.7, 1.5],
        [6.3, 2.3, 4.4, 1.3],
        [5.6, 3. , 4.1, 1.3],
        [5.5, 2.5, 4. , 1.3],
        [5.5, 2.6, 4.4, 1.2],
        [6.1, 3. , 4.6, 1.4],
        [5.8, 2.6, 4. , 1.2],
        [5. , 2.3, 3.3, 1. ],
        [5.6, 2.7, 4.2, 1.3],
        [5.7, 3. , 4.2, 1.2],
        [5.7, 2.9, 4.2, 1.3],
        [6.2, 2.9, 4.3, 1.3],
        [5.1, 2.5, 3. , 1.1],
        [5.7, 2.8, 4.1, 1.3],
        [6.3, 3.3, 6. , 2.5],
        [5.8, 2.7, 5.1, 1.9],
        [7.1, 3. , 5.9, 2.1],
        [6.3, 2.9, 5.6, 1.8],
        [6.5, 3. , 5.8, 2.2],
        [7.6, 3. , 6.6, 2.1],
        [4.9, 2.5, 4.5, 1.7],
        [7.3, 2.9, 6.3, 1.8],
        [6.7, 2.5, 5.8, 1.8],
        [7.2, 3.6, 6.1, 2.5],
        [6.5, 3.2, 5.1, 2. ],
        [6.4, 2.7, 5.3, 1.9],
        [6.8, 3. , 5.5, 2.1],
        [5.7, 2.5, 5. , 2. ],
        [5.8, 2.8, 5.1, 2.4],
        [6.4, 3.2, 5.3, 2.3],
        [6.5, 3. , 5.5, 1.8],
        [7.7, 3.8, 6.7, 2.2],
        [7.7, 2.6, 6.9, 2.3],
        [6. , 2.2, 5. , 1.5],
        [6.9, 3.2, 5.7, 2.3],
        [5.6, 2.8, 4.9, 2. ],
        [7.7, 2.8, 6.7, 2. ],
        [6.3, 2.7, 4.9, 1.8],
        [6.7, 3.3, 5.7, 2.1],
        [7.2, 3.2, 6. , 1.8],
        [6.2, 2.8, 4.8, 1.8],
        [6.1, 3. , 4.9, 1.8],
        [6.4, 2.8, 5.6, 2.1],
        [7.2, 3. , 5.8, 1.6],
        [7.4, 2.8, 6.1, 1.9],
        [7.9, 3.8, 6.4, 2. ],
        [6.4, 2.8, 5.6, 2.2],
        [6.3, 2.8, 5.1, 1.5],
        [6.1, 2.6, 5.6, 1.4],
        [7.7, 3. , 6.1, 2.3],
        [6.3, 3.4, 5.6, 2.4],
        [6.4, 3.1, 5.5, 1.8],
        [6. , 3. , 4.8, 1.8],
        [6.9, 3.1, 5.4, 2.1],
        [6.7, 3.1, 5.6, 2.4],
        [6.9, 3.1, 5.1, 2.3],
        [5.8, 2.7, 5.1, 1.9],
        [6.8, 3.2, 5.9, 2.3],
        [6.7, 3.3, 5.7, 2.5],
        [6.7, 3. , 5.2, 2.3],
        [6.3, 2.5, 5. , 1.9],
        [6.5, 3. , 5.2, 2. ],
        [6.2, 3.4, 5.4, 2.3],
        [5.9, 3. , 5.1, 1.8]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
 'frame': None,
 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10'),
 'DESCR': '.. _iris_dataset:\n\nIris plants dataset\n--------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 150 (50 in each of three classes)\n    :Number of Attributes: 4 numeric, predictive attributes and the class\n    :Attribute Information:\n        - sepal length in cm\n        - sepal width in cm\n        - petal length in cm\n        - petal width in cm\n        - class:\n                - Iris-Setosa\n                - Iris-Versicolour\n                - Iris-Virginica\n                \n    :Summary Statistics:\n\n    ============== ==== ==== ======= ===== ====================\n                    Min  Max   Mean    SD   Class Correlation\n    ============== ==== ==== ======= ===== ====================\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)\n    ============== ==== ==== ======= ===== ====================\n\n    :Missing Attribute Values: None\n    :Class Distribution: 33.3% for each of 3 classes.\n    :Creator: R.A. Fisher\n    :Donor: Michael Marshall (MARSHALL%[email protected])\n    :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher\'s paper. Note that it\'s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature.  Fisher\'s paper is a classic in the field and\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant.  One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n.. topic:: References\n\n   - Fisher, R.A. "The use of multiple measurements in taxonomic problems"\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n     Mathematical Statistics" (John Wiley, NY, 1950).\n   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n     Structure and Classification Rule for Recognition in Partially Exposed\n     Environments".  IEEE Transactions on Pattern Analysis and Machine\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions\n     on Information Theory, May 1972, 431-433.\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II\n     conceptual clustering system finds 3 classes in the data.\n   - Many, many more ...',
 'feature_names': ['sepal length (cm)',
  'sepal width (cm)',
  'petal length (cm)',
  'petal width (cm)'],
 'filename': '/Users/junaid/miniconda/lib/python3.8/site-packages/sklearn/datasets/data/iris.csv'}

The IRIS data set contains a 150 by 4 array of the three species of Iris (flower). Four featues were measured: length and width of sepals and petals. More info here

In [6]:
X.shape
Out[6]:
(150, 4)

Observe that some features may outweigh other features. If we want equal emphasis on each feature, we need to scale. This can be done via sklearn.preprocessing.StandardScaler or sklearn.preprocessing.MinMaxScaler. They perform two most common kind of scaling.

  • The StandardScaler scales so that the data has mean 0 and variance 1.

  • The MinMaxScaler scales so that the data has a specified minimum and maximum.

In [7]:
X_standard = StandardScaler().fit_transform(X)
In [8]:
X_standard
Out[8]:
array([[-9.00681170e-01,  1.01900435e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00, -1.31979479e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.38535265e+00,  3.28414053e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-1.50652052e+00,  9.82172869e-02, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  1.24920112e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-5.37177559e-01,  1.93979142e+00, -1.16971425e+00,
        -1.05217993e+00],
       [-1.50652052e+00,  7.88807586e-01, -1.34022653e+00,
        -1.18381211e+00],
       [-1.02184904e+00,  7.88807586e-01, -1.28338910e+00,
        -1.31544430e+00],
       [-1.74885626e+00, -3.62176246e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00,  9.82172869e-02, -1.28338910e+00,
        -1.44707648e+00],
       [-5.37177559e-01,  1.47939788e+00, -1.28338910e+00,
        -1.31544430e+00],
       [-1.26418478e+00,  7.88807586e-01, -1.22655167e+00,
        -1.31544430e+00],
       [-1.26418478e+00, -1.31979479e-01, -1.34022653e+00,
        -1.44707648e+00],
       [-1.87002413e+00, -1.31979479e-01, -1.51073881e+00,
        -1.44707648e+00],
       [-5.25060772e-02,  2.16998818e+00, -1.45390138e+00,
        -1.31544430e+00],
       [-1.73673948e-01,  3.09077525e+00, -1.28338910e+00,
        -1.05217993e+00],
       [-5.37177559e-01,  1.93979142e+00, -1.39706395e+00,
        -1.05217993e+00],
       [-9.00681170e-01,  1.01900435e+00, -1.34022653e+00,
        -1.18381211e+00],
       [-1.73673948e-01,  1.70959465e+00, -1.16971425e+00,
        -1.18381211e+00],
       [-9.00681170e-01,  1.70959465e+00, -1.28338910e+00,
        -1.18381211e+00],
       [-5.37177559e-01,  7.88807586e-01, -1.16971425e+00,
        -1.31544430e+00],
       [-9.00681170e-01,  1.47939788e+00, -1.28338910e+00,
        -1.05217993e+00],
       [-1.50652052e+00,  1.24920112e+00, -1.56757623e+00,
        -1.31544430e+00],
       [-9.00681170e-01,  5.58610819e-01, -1.16971425e+00,
        -9.20547742e-01],
       [-1.26418478e+00,  7.88807586e-01, -1.05603939e+00,
        -1.31544430e+00],
       [-1.02184904e+00, -1.31979479e-01, -1.22655167e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  7.88807586e-01, -1.22655167e+00,
        -1.05217993e+00],
       [-7.79513300e-01,  1.01900435e+00, -1.28338910e+00,
        -1.31544430e+00],
       [-7.79513300e-01,  7.88807586e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.38535265e+00,  3.28414053e-01, -1.22655167e+00,
        -1.31544430e+00],
       [-1.26418478e+00,  9.82172869e-02, -1.22655167e+00,
        -1.31544430e+00],
       [-5.37177559e-01,  7.88807586e-01, -1.28338910e+00,
        -1.05217993e+00],
       [-7.79513300e-01,  2.40018495e+00, -1.28338910e+00,
        -1.44707648e+00],
       [-4.16009689e-01,  2.63038172e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00,  9.82172869e-02, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  3.28414053e-01, -1.45390138e+00,
        -1.31544430e+00],
       [-4.16009689e-01,  1.01900435e+00, -1.39706395e+00,
        -1.31544430e+00],
       [-1.14301691e+00,  1.24920112e+00, -1.34022653e+00,
        -1.44707648e+00],
       [-1.74885626e+00, -1.31979479e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-9.00681170e-01,  7.88807586e-01, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  1.01900435e+00, -1.39706395e+00,
        -1.18381211e+00],
       [-1.62768839e+00, -1.74335684e+00, -1.39706395e+00,
        -1.18381211e+00],
       [-1.74885626e+00,  3.28414053e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  1.01900435e+00, -1.22655167e+00,
        -7.88915558e-01],
       [-9.00681170e-01,  1.70959465e+00, -1.05603939e+00,
        -1.05217993e+00],
       [-1.26418478e+00, -1.31979479e-01, -1.34022653e+00,
        -1.18381211e+00],
       [-9.00681170e-01,  1.70959465e+00, -1.22655167e+00,
        -1.31544430e+00],
       [-1.50652052e+00,  3.28414053e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-6.58345429e-01,  1.47939788e+00, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  5.58610819e-01, -1.34022653e+00,
        -1.31544430e+00],
       [ 1.40150837e+00,  3.28414053e-01,  5.35408562e-01,
         2.64141916e-01],
       [ 6.74501145e-01,  3.28414053e-01,  4.21733708e-01,
         3.95774101e-01],
       [ 1.28034050e+00,  9.82172869e-02,  6.49083415e-01,
         3.95774101e-01],
       [-4.16009689e-01, -1.74335684e+00,  1.37546573e-01,
         1.32509732e-01],
       [ 7.95669016e-01, -5.92373012e-01,  4.78571135e-01,
         3.95774101e-01],
       [-1.73673948e-01, -5.92373012e-01,  4.21733708e-01,
         1.32509732e-01],
       [ 5.53333275e-01,  5.58610819e-01,  5.35408562e-01,
         5.27406285e-01],
       [-1.14301691e+00, -1.51316008e+00, -2.60315415e-01,
        -2.62386821e-01],
       [ 9.16836886e-01, -3.62176246e-01,  4.78571135e-01,
         1.32509732e-01],
       [-7.79513300e-01, -8.22569778e-01,  8.07091462e-02,
         2.64141916e-01],
       [-1.02184904e+00, -2.43394714e+00, -1.46640561e-01,
        -2.62386821e-01],
       [ 6.86617933e-02, -1.31979479e-01,  2.51221427e-01,
         3.95774101e-01],
       [ 1.89829664e-01, -1.97355361e+00,  1.37546573e-01,
        -2.62386821e-01],
       [ 3.10997534e-01, -3.62176246e-01,  5.35408562e-01,
         2.64141916e-01],
       [-2.94841818e-01, -3.62176246e-01, -8.98031345e-02,
         1.32509732e-01],
       [ 1.03800476e+00,  9.82172869e-02,  3.64896281e-01,
         2.64141916e-01],
       [-2.94841818e-01, -1.31979479e-01,  4.21733708e-01,
         3.95774101e-01],
       [-5.25060772e-02, -8.22569778e-01,  1.94384000e-01,
        -2.62386821e-01],
       [ 4.32165405e-01, -1.97355361e+00,  4.21733708e-01,
         3.95774101e-01],
       [-2.94841818e-01, -1.28296331e+00,  8.07091462e-02,
        -1.30754636e-01],
       [ 6.86617933e-02,  3.28414053e-01,  5.92245988e-01,
         7.90670654e-01],
       [ 3.10997534e-01, -5.92373012e-01,  1.37546573e-01,
         1.32509732e-01],
       [ 5.53333275e-01, -1.28296331e+00,  6.49083415e-01,
         3.95774101e-01],
       [ 3.10997534e-01, -5.92373012e-01,  5.35408562e-01,
         8.77547895e-04],
       [ 6.74501145e-01, -3.62176246e-01,  3.08058854e-01,
         1.32509732e-01],
       [ 9.16836886e-01, -1.31979479e-01,  3.64896281e-01,
         2.64141916e-01],
       [ 1.15917263e+00, -5.92373012e-01,  5.92245988e-01,
         2.64141916e-01],
       [ 1.03800476e+00, -1.31979479e-01,  7.05920842e-01,
         6.59038469e-01],
       [ 1.89829664e-01, -3.62176246e-01,  4.21733708e-01,
         3.95774101e-01],
       [-1.73673948e-01, -1.05276654e+00, -1.46640561e-01,
        -2.62386821e-01],
       [-4.16009689e-01, -1.51316008e+00,  2.38717193e-02,
        -1.30754636e-01],
       [-4.16009689e-01, -1.51316008e+00, -3.29657076e-02,
        -2.62386821e-01],
       [-5.25060772e-02, -8.22569778e-01,  8.07091462e-02,
         8.77547895e-04],
       [ 1.89829664e-01, -8.22569778e-01,  7.62758269e-01,
         5.27406285e-01],
       [-5.37177559e-01, -1.31979479e-01,  4.21733708e-01,
         3.95774101e-01],
       [ 1.89829664e-01,  7.88807586e-01,  4.21733708e-01,
         5.27406285e-01],
       [ 1.03800476e+00,  9.82172869e-02,  5.35408562e-01,
         3.95774101e-01],
       [ 5.53333275e-01, -1.74335684e+00,  3.64896281e-01,
         1.32509732e-01],
       [-2.94841818e-01, -1.31979479e-01,  1.94384000e-01,
         1.32509732e-01],
       [-4.16009689e-01, -1.28296331e+00,  1.37546573e-01,
         1.32509732e-01],
       [-4.16009689e-01, -1.05276654e+00,  3.64896281e-01,
         8.77547895e-04],
       [ 3.10997534e-01, -1.31979479e-01,  4.78571135e-01,
         2.64141916e-01],
       [-5.25060772e-02, -1.05276654e+00,  1.37546573e-01,
         8.77547895e-04],
       [-1.02184904e+00, -1.74335684e+00, -2.60315415e-01,
        -2.62386821e-01],
       [-2.94841818e-01, -8.22569778e-01,  2.51221427e-01,
         1.32509732e-01],
       [-1.73673948e-01, -1.31979479e-01,  2.51221427e-01,
         8.77547895e-04],
       [-1.73673948e-01, -3.62176246e-01,  2.51221427e-01,
         1.32509732e-01],
       [ 4.32165405e-01, -3.62176246e-01,  3.08058854e-01,
         1.32509732e-01],
       [-9.00681170e-01, -1.28296331e+00, -4.30827696e-01,
        -1.30754636e-01],
       [-1.73673948e-01, -5.92373012e-01,  1.94384000e-01,
         1.32509732e-01],
       [ 5.53333275e-01,  5.58610819e-01,  1.27429511e+00,
         1.71209594e+00],
       [-5.25060772e-02, -8.22569778e-01,  7.62758269e-01,
         9.22302838e-01],
       [ 1.52267624e+00, -1.31979479e-01,  1.21745768e+00,
         1.18556721e+00],
       [ 5.53333275e-01, -3.62176246e-01,  1.04694540e+00,
         7.90670654e-01],
       [ 7.95669016e-01, -1.31979479e-01,  1.16062026e+00,
         1.31719939e+00],
       [ 2.12851559e+00, -1.31979479e-01,  1.61531967e+00,
         1.18556721e+00],
       [-1.14301691e+00, -1.28296331e+00,  4.21733708e-01,
         6.59038469e-01],
       [ 1.76501198e+00, -3.62176246e-01,  1.44480739e+00,
         7.90670654e-01],
       [ 1.03800476e+00, -1.28296331e+00,  1.16062026e+00,
         7.90670654e-01],
       [ 1.64384411e+00,  1.24920112e+00,  1.33113254e+00,
         1.71209594e+00],
       [ 7.95669016e-01,  3.28414053e-01,  7.62758269e-01,
         1.05393502e+00],
       [ 6.74501145e-01, -8.22569778e-01,  8.76433123e-01,
         9.22302838e-01],
       [ 1.15917263e+00, -1.31979479e-01,  9.90107977e-01,
         1.18556721e+00],
       [-1.73673948e-01, -1.28296331e+00,  7.05920842e-01,
         1.05393502e+00],
       [-5.25060772e-02, -5.92373012e-01,  7.62758269e-01,
         1.58046376e+00],
       [ 6.74501145e-01,  3.28414053e-01,  8.76433123e-01,
         1.44883158e+00],
       [ 7.95669016e-01, -1.31979479e-01,  9.90107977e-01,
         7.90670654e-01],
       [ 2.24968346e+00,  1.70959465e+00,  1.67215710e+00,
         1.31719939e+00],
       [ 2.24968346e+00, -1.05276654e+00,  1.78583195e+00,
         1.44883158e+00],
       [ 1.89829664e-01, -1.97355361e+00,  7.05920842e-01,
         3.95774101e-01],
       [ 1.28034050e+00,  3.28414053e-01,  1.10378283e+00,
         1.44883158e+00],
       [-2.94841818e-01, -5.92373012e-01,  6.49083415e-01,
         1.05393502e+00],
       [ 2.24968346e+00, -5.92373012e-01,  1.67215710e+00,
         1.05393502e+00],
       [ 5.53333275e-01, -8.22569778e-01,  6.49083415e-01,
         7.90670654e-01],
       [ 1.03800476e+00,  5.58610819e-01,  1.10378283e+00,
         1.18556721e+00],
       [ 1.64384411e+00,  3.28414053e-01,  1.27429511e+00,
         7.90670654e-01],
       [ 4.32165405e-01, -5.92373012e-01,  5.92245988e-01,
         7.90670654e-01],
       [ 3.10997534e-01, -1.31979479e-01,  6.49083415e-01,
         7.90670654e-01],
       [ 6.74501145e-01, -5.92373012e-01,  1.04694540e+00,
         1.18556721e+00],
       [ 1.64384411e+00, -1.31979479e-01,  1.16062026e+00,
         5.27406285e-01],
       [ 1.88617985e+00, -5.92373012e-01,  1.33113254e+00,
         9.22302838e-01],
       [ 2.49201920e+00,  1.70959465e+00,  1.50164482e+00,
         1.05393502e+00],
       [ 6.74501145e-01, -5.92373012e-01,  1.04694540e+00,
         1.31719939e+00],
       [ 5.53333275e-01, -5.92373012e-01,  7.62758269e-01,
         3.95774101e-01],
       [ 3.10997534e-01, -1.05276654e+00,  1.04694540e+00,
         2.64141916e-01],
       [ 2.24968346e+00, -1.31979479e-01,  1.33113254e+00,
         1.44883158e+00],
       [ 5.53333275e-01,  7.88807586e-01,  1.04694540e+00,
         1.58046376e+00],
       [ 6.74501145e-01,  9.82172869e-02,  9.90107977e-01,
         7.90670654e-01],
       [ 1.89829664e-01, -1.31979479e-01,  5.92245988e-01,
         7.90670654e-01],
       [ 1.28034050e+00,  9.82172869e-02,  9.33270550e-01,
         1.18556721e+00],
       [ 1.03800476e+00,  9.82172869e-02,  1.04694540e+00,
         1.58046376e+00],
       [ 1.28034050e+00,  9.82172869e-02,  7.62758269e-01,
         1.44883158e+00],
       [-5.25060772e-02, -8.22569778e-01,  7.62758269e-01,
         9.22302838e-01],
       [ 1.15917263e+00,  3.28414053e-01,  1.21745768e+00,
         1.44883158e+00],
       [ 1.03800476e+00,  5.58610819e-01,  1.10378283e+00,
         1.71209594e+00],
       [ 1.03800476e+00, -1.31979479e-01,  8.19595696e-01,
         1.44883158e+00],
       [ 5.53333275e-01, -1.28296331e+00,  7.05920842e-01,
         9.22302838e-01],
       [ 7.95669016e-01, -1.31979479e-01,  8.19595696e-01,
         1.05393502e+00],
       [ 4.32165405e-01,  7.88807586e-01,  9.33270550e-01,
         1.44883158e+00],
       [ 6.86617933e-02, -1.31979479e-01,  7.62758269e-01,
         7.90670654e-01]])
In [9]:
X_0_1 = MinMaxScaler().fit_transform(X)
In [10]:
X_0_1
Out[10]:
array([[0.22222222, 0.625     , 0.06779661, 0.04166667],
       [0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.05084746, 0.04166667],
       [0.08333333, 0.45833333, 0.08474576, 0.04166667],
       [0.19444444, 0.66666667, 0.06779661, 0.04166667],
       [0.30555556, 0.79166667, 0.11864407, 0.125     ],
       [0.08333333, 0.58333333, 0.06779661, 0.08333333],
       [0.19444444, 0.58333333, 0.08474576, 0.04166667],
       [0.02777778, 0.375     , 0.06779661, 0.04166667],
       [0.16666667, 0.45833333, 0.08474576, 0.        ],
       [0.30555556, 0.70833333, 0.08474576, 0.04166667],
       [0.13888889, 0.58333333, 0.10169492, 0.04166667],
       [0.13888889, 0.41666667, 0.06779661, 0.        ],
       [0.        , 0.41666667, 0.01694915, 0.        ],
       [0.41666667, 0.83333333, 0.03389831, 0.04166667],
       [0.38888889, 1.        , 0.08474576, 0.125     ],
       [0.30555556, 0.79166667, 0.05084746, 0.125     ],
       [0.22222222, 0.625     , 0.06779661, 0.08333333],
       [0.38888889, 0.75      , 0.11864407, 0.08333333],
       [0.22222222, 0.75      , 0.08474576, 0.08333333],
       [0.30555556, 0.58333333, 0.11864407, 0.04166667],
       [0.22222222, 0.70833333, 0.08474576, 0.125     ],
       [0.08333333, 0.66666667, 0.        , 0.04166667],
       [0.22222222, 0.54166667, 0.11864407, 0.16666667],
       [0.13888889, 0.58333333, 0.15254237, 0.04166667],
       [0.19444444, 0.41666667, 0.10169492, 0.04166667],
       [0.19444444, 0.58333333, 0.10169492, 0.125     ],
       [0.25      , 0.625     , 0.08474576, 0.04166667],
       [0.25      , 0.58333333, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.10169492, 0.04166667],
       [0.13888889, 0.45833333, 0.10169492, 0.04166667],
       [0.30555556, 0.58333333, 0.08474576, 0.125     ],
       [0.25      , 0.875     , 0.08474576, 0.        ],
       [0.33333333, 0.91666667, 0.06779661, 0.04166667],
       [0.16666667, 0.45833333, 0.08474576, 0.04166667],
       [0.19444444, 0.5       , 0.03389831, 0.04166667],
       [0.33333333, 0.625     , 0.05084746, 0.04166667],
       [0.16666667, 0.66666667, 0.06779661, 0.        ],
       [0.02777778, 0.41666667, 0.05084746, 0.04166667],
       [0.22222222, 0.58333333, 0.08474576, 0.04166667],
       [0.19444444, 0.625     , 0.05084746, 0.08333333],
       [0.05555556, 0.125     , 0.05084746, 0.08333333],
       [0.02777778, 0.5       , 0.05084746, 0.04166667],
       [0.19444444, 0.625     , 0.10169492, 0.20833333],
       [0.22222222, 0.75      , 0.15254237, 0.125     ],
       [0.13888889, 0.41666667, 0.06779661, 0.08333333],
       [0.22222222, 0.75      , 0.10169492, 0.04166667],
       [0.08333333, 0.5       , 0.06779661, 0.04166667],
       [0.27777778, 0.70833333, 0.08474576, 0.04166667],
       [0.19444444, 0.54166667, 0.06779661, 0.04166667],
       [0.75      , 0.5       , 0.62711864, 0.54166667],
       [0.58333333, 0.5       , 0.59322034, 0.58333333],
       [0.72222222, 0.45833333, 0.66101695, 0.58333333],
       [0.33333333, 0.125     , 0.50847458, 0.5       ],
       [0.61111111, 0.33333333, 0.61016949, 0.58333333],
       [0.38888889, 0.33333333, 0.59322034, 0.5       ],
       [0.55555556, 0.54166667, 0.62711864, 0.625     ],
       [0.16666667, 0.16666667, 0.38983051, 0.375     ],
       [0.63888889, 0.375     , 0.61016949, 0.5       ],
       [0.25      , 0.29166667, 0.49152542, 0.54166667],
       [0.19444444, 0.        , 0.42372881, 0.375     ],
       [0.44444444, 0.41666667, 0.54237288, 0.58333333],
       [0.47222222, 0.08333333, 0.50847458, 0.375     ],
       [0.5       , 0.375     , 0.62711864, 0.54166667],
       [0.36111111, 0.375     , 0.44067797, 0.5       ],
       [0.66666667, 0.45833333, 0.57627119, 0.54166667],
       [0.36111111, 0.41666667, 0.59322034, 0.58333333],
       [0.41666667, 0.29166667, 0.52542373, 0.375     ],
       [0.52777778, 0.08333333, 0.59322034, 0.58333333],
       [0.36111111, 0.20833333, 0.49152542, 0.41666667],
       [0.44444444, 0.5       , 0.6440678 , 0.70833333],
       [0.5       , 0.33333333, 0.50847458, 0.5       ],
       [0.55555556, 0.20833333, 0.66101695, 0.58333333],
       [0.5       , 0.33333333, 0.62711864, 0.45833333],
       [0.58333333, 0.375     , 0.55932203, 0.5       ],
       [0.63888889, 0.41666667, 0.57627119, 0.54166667],
       [0.69444444, 0.33333333, 0.6440678 , 0.54166667],
       [0.66666667, 0.41666667, 0.6779661 , 0.66666667],
       [0.47222222, 0.375     , 0.59322034, 0.58333333],
       [0.38888889, 0.25      , 0.42372881, 0.375     ],
       [0.33333333, 0.16666667, 0.47457627, 0.41666667],
       [0.33333333, 0.16666667, 0.45762712, 0.375     ],
       [0.41666667, 0.29166667, 0.49152542, 0.45833333],
       [0.47222222, 0.29166667, 0.69491525, 0.625     ],
       [0.30555556, 0.41666667, 0.59322034, 0.58333333],
       [0.47222222, 0.58333333, 0.59322034, 0.625     ],
       [0.66666667, 0.45833333, 0.62711864, 0.58333333],
       [0.55555556, 0.125     , 0.57627119, 0.5       ],
       [0.36111111, 0.41666667, 0.52542373, 0.5       ],
       [0.33333333, 0.20833333, 0.50847458, 0.5       ],
       [0.33333333, 0.25      , 0.57627119, 0.45833333],
       [0.5       , 0.41666667, 0.61016949, 0.54166667],
       [0.41666667, 0.25      , 0.50847458, 0.45833333],
       [0.19444444, 0.125     , 0.38983051, 0.375     ],
       [0.36111111, 0.29166667, 0.54237288, 0.5       ],
       [0.38888889, 0.41666667, 0.54237288, 0.45833333],
       [0.38888889, 0.375     , 0.54237288, 0.5       ],
       [0.52777778, 0.375     , 0.55932203, 0.5       ],
       [0.22222222, 0.20833333, 0.33898305, 0.41666667],
       [0.38888889, 0.33333333, 0.52542373, 0.5       ],
       [0.55555556, 0.54166667, 0.84745763, 1.        ],
       [0.41666667, 0.29166667, 0.69491525, 0.75      ],
       [0.77777778, 0.41666667, 0.83050847, 0.83333333],
       [0.55555556, 0.375     , 0.77966102, 0.70833333],
       [0.61111111, 0.41666667, 0.81355932, 0.875     ],
       [0.91666667, 0.41666667, 0.94915254, 0.83333333],
       [0.16666667, 0.20833333, 0.59322034, 0.66666667],
       [0.83333333, 0.375     , 0.89830508, 0.70833333],
       [0.66666667, 0.20833333, 0.81355932, 0.70833333],
       [0.80555556, 0.66666667, 0.86440678, 1.        ],
       [0.61111111, 0.5       , 0.69491525, 0.79166667],
       [0.58333333, 0.29166667, 0.72881356, 0.75      ],
       [0.69444444, 0.41666667, 0.76271186, 0.83333333],
       [0.38888889, 0.20833333, 0.6779661 , 0.79166667],
       [0.41666667, 0.33333333, 0.69491525, 0.95833333],
       [0.58333333, 0.5       , 0.72881356, 0.91666667],
       [0.61111111, 0.41666667, 0.76271186, 0.70833333],
       [0.94444444, 0.75      , 0.96610169, 0.875     ],
       [0.94444444, 0.25      , 1.        , 0.91666667],
       [0.47222222, 0.08333333, 0.6779661 , 0.58333333],
       [0.72222222, 0.5       , 0.79661017, 0.91666667],
       [0.36111111, 0.33333333, 0.66101695, 0.79166667],
       [0.94444444, 0.33333333, 0.96610169, 0.79166667],
       [0.55555556, 0.29166667, 0.66101695, 0.70833333],
       [0.66666667, 0.54166667, 0.79661017, 0.83333333],
       [0.80555556, 0.5       , 0.84745763, 0.70833333],
       [0.52777778, 0.33333333, 0.6440678 , 0.70833333],
       [0.5       , 0.41666667, 0.66101695, 0.70833333],
       [0.58333333, 0.33333333, 0.77966102, 0.83333333],
       [0.80555556, 0.41666667, 0.81355932, 0.625     ],
       [0.86111111, 0.33333333, 0.86440678, 0.75      ],
       [1.        , 0.75      , 0.91525424, 0.79166667],
       [0.58333333, 0.33333333, 0.77966102, 0.875     ],
       [0.55555556, 0.33333333, 0.69491525, 0.58333333],
       [0.5       , 0.25      , 0.77966102, 0.54166667],
       [0.94444444, 0.41666667, 0.86440678, 0.91666667],
       [0.55555556, 0.58333333, 0.77966102, 0.95833333],
       [0.58333333, 0.45833333, 0.76271186, 0.70833333],
       [0.47222222, 0.41666667, 0.6440678 , 0.70833333],
       [0.72222222, 0.45833333, 0.74576271, 0.83333333],
       [0.66666667, 0.45833333, 0.77966102, 0.95833333],
       [0.72222222, 0.45833333, 0.69491525, 0.91666667],
       [0.41666667, 0.29166667, 0.69491525, 0.75      ],
       [0.69444444, 0.5       , 0.83050847, 0.91666667],
       [0.66666667, 0.54166667, 0.79661017, 1.        ],
       [0.66666667, 0.41666667, 0.71186441, 0.91666667],
       [0.55555556, 0.20833333, 0.6779661 , 0.75      ],
       [0.61111111, 0.41666667, 0.71186441, 0.79166667],
       [0.52777778, 0.58333333, 0.74576271, 0.91666667],
       [0.44444444, 0.41666667, 0.69491525, 0.70833333]])

If we want to visualize the distance matrix between the 150 flowers. We get this 150 by 150 matrix.

In [11]:
from sklearn.metrics import pairwise_distances
D = pairwise_distances(X_0_1)
D.shape

plt.imshow(D, zorder=2, cmap='Blues', interpolation='nearest')
plt.colorbar();
No description has been provided for this image
  • X_standard is that every column of X has been scaled to a gaussian with mean 0 and variance 1.

  • X_0_1 is that every column lies between 0 and 1.

In [12]:
MDS?
In [13]:
mds2d = MDS(2, random_state=0)
In [14]:
X_standard_2d = mds2d.fit_transform?
In [15]:
X_standard_2d = mds2d.fit_transform(X_standard)
In [16]:
X_0_1_2d = mds2d.fit_transform(X_0_1)
In [18]:
X_0_1_2d.shape
Out[18]:
(150, 2)
In [28]:
data.target?
In [29]:
np.unique?
In [19]:
colors = ['red', 'green', 'blue']

plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)

for i in np.unique(iris_data.target):
    subset = X_standard_2d[iris_data.target == i]
    
    x = [row[0] for row in subset]
    y = [row[1] for row in subset]
    
    plt.scatter(x,y,c=colors[i], label=iris_data.target_names[i])

plt.legend()
plt.show()
No description has been provided for this image
In [20]:
colors = ['red', 'green', 'blue']

plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)

for i in np.unique(iris_data.target):
    subset = X_0_1_2d[iris_data.target == i]
    
    x = [row[0] for row in subset]
    y = [row[1] for row in subset]
    
    plt.scatter(x,y,c=colors[i], label=iris_data.target_names[i])

plt.legend()
plt.show()
No description has been provided for this image

Observe that even though we have reduced the dimension from 4 to 2, still the points corresponding to the three distinct species can be separated.

Let us load another data set and do MDS on it.¶

In [21]:
digits_data = datasets.load_digits()
In [22]:
digits_data
Out[22]:
{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
  'pixel_5_2',
  'pixel_5_3',
  'pixel_5_4',
  'pixel_5_5',
  'pixel_5_6',
  'pixel_5_7',
  'pixel_6_0',
  'pixel_6_1',
  'pixel_6_2',
  'pixel_6_3',
  'pixel_6_4',
  'pixel_6_5',
  'pixel_6_6',
  'pixel_6_7',
  'pixel_7_0',
  'pixel_7_1',
  'pixel_7_2',
  'pixel_7_3',
  'pixel_7_4',
  'pixel_7_5',
  'pixel_7_6',
  'pixel_7_7'],
 'target_names': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 'images': array([[[ 0.,  0.,  5., ...,  1.,  0.,  0.],
         [ 0.,  0., 13., ..., 15.,  5.,  0.],
         [ 0.,  3., 15., ..., 11.,  8.,  0.],
         ...,
         [ 0.,  4., 11., ..., 12.,  7.,  0.],
         [ 0.,  2., 14., ..., 12.,  0.,  0.],
         [ 0.,  0.,  6., ...,  0.,  0.,  0.]],
 
        [[ 0.,  0.,  0., ...,  5.,  0.,  0.],
         [ 0.,  0.,  0., ...,  9.,  0.,  0.],
         [ 0.,  0.,  3., ...,  6.,  0.,  0.],
         ...,
         [ 0.,  0.,  1., ...,  6.,  0.,  0.],
         [ 0.,  0.,  1., ...,  6.,  0.,  0.],
         [ 0.,  0.,  0., ..., 10.,  0.,  0.]],
 
        [[ 0.,  0.,  0., ..., 12.,  0.,  0.],
         [ 0.,  0.,  3., ..., 14.,  0.,  0.],
         [ 0.,  0.,  8., ..., 16.,  0.,  0.],
         ...,
         [ 0.,  9., 16., ...,  0.,  0.,  0.],
         [ 0.,  3., 13., ..., 11.,  5.,  0.],
         [ 0.,  0.,  0., ..., 16.,  9.,  0.]],
 
        ...,
 
        [[ 0.,  0.,  1., ...,  1.,  0.,  0.],
         [ 0.,  0., 13., ...,  2.,  1.,  0.],
         [ 0.,  0., 16., ..., 16.,  5.,  0.],
         ...,
         [ 0.,  0., 16., ..., 15.,  0.,  0.],
         [ 0.,  0., 15., ..., 16.,  0.,  0.],
         [ 0.,  0.,  2., ...,  6.,  0.,  0.]],
 
        [[ 0.,  0.,  2., ...,  0.,  0.,  0.],
         [ 0.,  0., 14., ..., 15.,  1.,  0.],
         [ 0.,  4., 16., ..., 16.,  7.,  0.],
         ...,
         [ 0.,  0.,  0., ..., 16.,  2.,  0.],
         [ 0.,  0.,  4., ..., 16.,  2.,  0.],
         [ 0.,  0.,  5., ..., 12.,  0.,  0.]],
 
        [[ 0.,  0., 10., ...,  1.,  0.,  0.],
         [ 0.,  2., 16., ...,  1.,  0.,  0.],
         [ 0.,  0., 15., ..., 15.,  0.,  0.],
         ...,
         [ 0.,  4., 16., ..., 16.,  6.,  0.],
         [ 0.,  8., 16., ..., 16.,  8.,  0.],
         [ 0.,  1.,  8., ..., 12.,  1.,  0.]]]),
 'DESCR': ".. _digits_dataset:\n\nOptical recognition of handwritten digits dataset\n--------------------------------------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 1797\n    :Number of Attributes: 64\n    :Attribute Information: 8x8 image of integer pixels in the range 0..16.\n    :Missing Attribute Values: None\n    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)\n    :Date: July; 1998\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nThe data set contains images of hand-written digits: 10 classes where\neach class refers to a digit.\n\nPreprocessing programs made available by NIST were used to extract\nnormalized bitmaps of handwritten digits from a preprinted form. From a\ntotal of 43 people, 30 contributed to the training set and different 13\nto the test set. 32x32 bitmaps are divided into nonoverlapping blocks of\n4x4 and the number of on pixels are counted in each block. This generates\nan input matrix of 8x8 where each element is an integer in the range\n0..16. This reduces dimensionality and gives invariance to small\ndistortions.\n\nFor info on NIST preprocessing routines, see M. D. Garris, J. L. Blue, G.\nT. Candela, D. L. Dimmick, J. Geist, P. J. Grother, S. A. Janet, and C.\nL. Wilson, NIST Form-Based Handprint Recognition System, NISTIR 5469,\n1994.\n\n.. topic:: References\n\n  - C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their\n    Applications to Handwritten Digit Recognition, MSc Thesis, Institute of\n    Graduate Studies in Science and Engineering, Bogazici University.\n  - E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.\n  - Ken Tang and Ponnuthurai N. Suganthan and Xi Yao and A. Kai Qin.\n    Linear dimensionalityreduction using relevance weighted LDA. School of\n    Electrical and Electronic Engineering Nanyang Technological University.\n    2005.\n  - Claudio Gentile. A New Approximate Maximal Margin Classification\n    Algorithm. NIPS. 2000.\n"}
In [23]:
Digits = digits_data.data
In [24]:
Digits.shape
Out[24]:
(1797, 64)

Observe that the digits data is a 64 dimensional data. (8 by 8 gray scale image of each digit). It is a handwritten data set. More infor here

For instance if we want to visualize an instance

In [27]:
n = 450
plt.figure(1, figsize=(3,3))
plt.imshow(digits_data.images[n], cmap = plt.cm.gray_r, interpolation='nearest')
plt.show()
No description has been provided for this image
In [28]:
Digits_standard = StandardScaler().fit_transform(Digits)
Digits_0_1 = MinMaxScaler().fit_transform(Digits)
In [29]:
mds2d_fast = MDS(n_components=2, max_iter=60, random_state=1)
In [30]:
Digits_standard_2d = mds2d_fast.fit_transform(Digits_standard)
Digits_0_1_2d = mds2d_fast.fit_transform(Digits_0_1)
In [31]:
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)

for i in np.unique(digits_data.target):
    subset = Digits_standard_2d[digits_data.target == i]
    
    x = [row[0] for row in subset]
    y = [row[1] for row in subset]
    
    plt.scatter(x,y,c=colors[i], label=digits_data.target_names[i])

plt.legend()
plt.show()
No description has been provided for this image
In [32]:
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)

for i in np.unique(digits_data.target):
    subset = Digits_0_1_2d[digits_data.target == i]
    
    x = [row[0] for row in subset]
    y = [row[1] for row in subset]
    
    plt.scatter(x,y,c=colors[i], label=digits_data.target_names[i])

plt.legend()
plt.show()
No description has been provided for this image

Observe that 2 dimensions may be too much for reducing from 64 dimensions. However note that in the 0-1 scale still the digits are quite separated.

In [33]:
Distance_matrix = np.array([[0, 1868, 1608, 1182, 877],
                            [1868, 0, 3438, 1361, 1053],
                            [1608, 3438, 0, 2374, 2485],
                            [1182, 1361, 2374, 0, 1105],
                            [877, 1053, 2485, 1105, 0]])
In [39]:
mds2d_distance = MDS(2, dissimilarity='precomputed', random_state=1)
In [90]:
Cities_2d = mds2d_distance.fit_transform?
In [35]:
Cities_2d = mds2d_distance.fit_transform(Distance_matrix)
In [40]:
Cities_2d
Out[40]:
array([[ -234.09114802,   392.97501419],
       [ 1384.57705369,  -541.23077699],
       [-1810.42134487,   722.39240171],
       [   41.99708432,  -758.11205459],
       [  617.93835487,   183.97541568]])
In [41]:
plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)
citynames = ['berlin', 'madrid', 'moscow', 'rome', 'paris']

x = list(Cities_2d[:,0])
y = list(Cities_2d[:,1])

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
plt.rcParams['figure.figsize'] = [7,7]
plt.rc('font', size=14)

for i in range(5):
    
    a = x[i]
    b = y[i]
    
    plt.scatter(a,b,c=colors[i], label=citynames[i])

plt.legend()
plt.show()
No description has been provided for this image
In [42]:
Distance_mds = np.zeros((5,5))
In [43]:
Distance_mds[1,2]
Out[43]:
0.0
In [44]:
Cities_2d[1,1]
Out[44]:
-541.2307769905755
In [45]:
import math
In [46]:
for i in range(5):
    for j in range(5):
        Distance_mds[i,j] = math.floor(math.sqrt((Cities_2d[i,1]-Cities_2d[j,1])**2 + (Cities_2d[i,0]-Cities_2d[j,0])**2))
In [47]:
Distance_mds
Out[47]:
array([[   0., 1868., 1610., 1183.,  877.],
       [1868.,    0., 3435., 1359., 1055.],
       [1610., 3435.,    0., 2371., 2487.],
       [1183., 1359., 2371.,    0., 1104.],
       [ 877., 1055., 2487., 1104.,    0.]])
In [48]:
Distance_matrix
Out[48]:
array([[   0, 1868, 1608, 1182,  877],
       [1868,    0, 3438, 1361, 1053],
       [1608, 3438,    0, 2374, 2485],
       [1182, 1361, 2374,    0, 1105],
       [ 877, 1053, 2485, 1105,    0]])
In [ ]: