import numpy as np

v = [1,3,1,4]

for p in range(1,10):
    print(p,np.power(sum(np.power(np.abs(np.array(v)),p)),1/p))

1 9.0
2 5.196152422706632
3 4.530654896083492
4 4.290915128445443
5 4.175344598847825
6 4.110988070009078
7 4.0723049678331895
8 4.048006070825583
9 4.032310478684122

# https://www.mathworks.com/help/matlab/ref/cond.html

A1 = [[4.1, 2.8],
      [9.7, 6.6]]
print(np.linalg.inv(A1))

A2 = [[4.1,    2.8  ], 
      [9.671,  6.608]]
print(np.linalg.inv(A2))

[[-66.  28.]
 [ 97. -41.]]
[[ 472.         -200.        ]
 [-690.78571429  292.85714286]]

# https://www.mathworks.com/help/matlab/ref/cond.html

A1 = [[4.1, 2.8],
      [9.7, 6.6]]
print(np.linalg.inv(A1))

A2 = [[4.1,    2.8  ], 
      [9.671,  6.608]]
print(np.linalg.inv(A2))

[[-66.  28.]
 [ 97. -41.]]
[[ 472.         -200.        ]
 [-690.78571429  292.85714286]]

print(np.linalg.cond(A1),np.linalg.cond(A2))

1622.9993838565106 11560.27884206701

v1 = np.array([1.,2.])

A = np.array([v1,2.*v1])

print(A)
print('rank =',np.linalg.matrix_rank(A))

[[1. 2.]
 [2. 4.]]
rank = 1

An = A + 1e-10*np.random.rand(2,2)
print('rank =',np.linalg.matrix_rank(An))
print('cond =',np.linalg.cond(An))

rank = 2
cond = 282960980020.97266

from sklearn import datasets
iris = datasets.load_iris()

dir(iris)

['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']

(iris.data, iris.target)

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
        [5.5, 4.2, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.2],
        [5. , 3.2, 1.2, 0.2],
        [5.5, 3.5, 1.3, 0.2],
        [4.9, 3.6, 1.4, 0.1],
        [4.4, 3. , 1.3, 0.2],
        [5.1, 3.4, 1.5, 0.2],
        [5. , 3.5, 1.3, 0.3],
        [4.5, 2.3, 1.3, 0.3],
        [4.4, 3.2, 1.3, 0.2],
        [5. , 3.5, 1.6, 0.6],
        [5.1, 3.8, 1.9, 0.4],
        [4.8, 3. , 1.4, 0.3],
        [5.1, 3.8, 1.6, 0.2],
        [4.6, 3.2, 1.4, 0.2],
        [5.3, 3.7, 1.5, 0.2],
        [5. , 3.3, 1.4, 0.2],
        [7. , 3.2, 4.7, 1.4],
        [6.4, 3.2, 4.5, 1.5],
        [6.9, 3.1, 4.9, 1.5],
        [5.5, 2.3, 4. , 1.3],
        [6.5, 2.8, 4.6, 1.5],
        [5.7, 2.8, 4.5, 1.3],
        [6.3, 3.3, 4.7, 1.6],
        [4.9, 2.4, 3.3, 1. ],
        [6.6, 2.9, 4.6, 1.3],
        [5.2, 2.7, 3.9, 1.4],
        [5. , 2. , 3.5, 1. ],
        [5.9, 3. , 4.2, 1.5],
        [6. , 2.2, 4. , 1. ],
        [6.1, 2.9, 4.7, 1.4],
        [5.6, 2.9, 3.6, 1.3],
        [6.7, 3.1, 4.4, 1.4],
        [5.6, 3. , 4.5, 1.5],
        [5.8, 2.7, 4.1, 1. ],
        [6.2, 2.2, 4.5, 1.5],
        [5.6, 2.5, 3.9, 1.1],
        [5.9, 3.2, 4.8, 1.8],
        [6.1, 2.8, 4. , 1.3],
        [6.3, 2.5, 4.9, 1.5],
        [6.1, 2.8, 4.7, 1.2],
        [6.4, 2.9, 4.3, 1.3],
        [6.6, 3. , 4.4, 1.4],
        [6.8, 2.8, 4.8, 1.4],
        [6.7, 3. , 5. , 1.7],
        [6. , 2.9, 4.5, 1.5],
        [5.7, 2.6, 3.5, 1. ],
        [5.5, 2.4, 3.8, 1.1],
        [5.5, 2.4, 3.7, 1. ],
        [5.8, 2.7, 3.9, 1.2],
        [6. , 2.7, 5.1, 1.6],
        [5.4, 3. , 4.5, 1.5],
        [6. , 3.4, 4.5, 1.6],
        [6.7, 3.1, 4.7, 1.5],
        [6.3, 2.3, 4.4, 1.3],
        [5.6, 3. , 4.1, 1.3],
        [5.5, 2.5, 4. , 1.3],
        [5.5, 2.6, 4.4, 1.2],
        [6.1, 3. , 4.6, 1.4],
        [5.8, 2.6, 4. , 1.2],
        [5. , 2.3, 3.3, 1. ],
        [5.6, 2.7, 4.2, 1.3],
        [5.7, 3. , 4.2, 1.2],
        [5.7, 2.9, 4.2, 1.3],
        [6.2, 2.9, 4.3, 1.3],
        [5.1, 2.5, 3. , 1.1],
        [5.7, 2.8, 4.1, 1.3],
        [6.3, 3.3, 6. , 2.5],
        [5.8, 2.7, 5.1, 1.9],
        [7.1, 3. , 5.9, 2.1],
        [6.3, 2.9, 5.6, 1.8],
        [6.5, 3. , 5.8, 2.2],
        [7.6, 3. , 6.6, 2.1],
        [4.9, 2.5, 4.5, 1.7],
        [7.3, 2.9, 6.3, 1.8],
        [6.7, 2.5, 5.8, 1.8],
        [7.2, 3.6, 6.1, 2.5],
        [6.5, 3.2, 5.1, 2. ],
        [6.4, 2.7, 5.3, 1.9],
        [6.8, 3. , 5.5, 2.1],
        [5.7, 2.5, 5. , 2. ],
        [5.8, 2.8, 5.1, 2.4],
        [6.4, 3.2, 5.3, 2.3],
        [6.5, 3. , 5.5, 1.8],
        [7.7, 3.8, 6.7, 2.2],
        [7.7, 2.6, 6.9, 2.3],
        [6. , 2.2, 5. , 1.5],
        [6.9, 3.2, 5.7, 2.3],
        [5.6, 2.8, 4.9, 2. ],
        [7.7, 2.8, 6.7, 2. ],
        [6.3, 2.7, 4.9, 1.8],
        [6.7, 3.3, 5.7, 2.1],
        [7.2, 3.2, 6. , 1.8],
        [6.2, 2.8, 4.8, 1.8],
        [6.1, 3. , 4.9, 1.8],
        [6.4, 2.8, 5.6, 2.1],
        [7.2, 3. , 5.8, 1.6],
        [7.4, 2.8, 6.1, 1.9],
        [7.9, 3.8, 6.4, 2. ],
        [6.4, 2.8, 5.6, 2.2],
        [6.3, 2.8, 5.1, 1.5],
        [6.1, 2.6, 5.6, 1.4],
        [7.7, 3. , 6.1, 2.3],
        [6.3, 3.4, 5.6, 2.4],
        [6.4, 3.1, 5.5, 1.8],
        [6. , 3. , 4.8, 1.8],
        [6.9, 3.1, 5.4, 2.1],
        [6.7, 3.1, 5.6, 2.4],
        [6.9, 3.1, 5.1, 2.3],
        [5.8, 2.7, 5.1, 1.9],
        [6.8, 3.2, 5.9, 2.3],
        [6.7, 3.3, 5.7, 2.5],
        [6.7, 3. , 5.2, 2.3],
        [6.3, 2.5, 5. , 1.9],
        [6.5, 3. , 5.2, 2. ],
        [6.2, 3.4, 5.4, 2.3],
        [5.9, 3. , 5.1, 1.8]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))

iris.data[52]

array([6.9, 3.1, 4.9, 1.5])

from numpy import random

printcols(dir(random))

BitGenerator          __package__           default_rng           noncentral_chisquare  set_state             
Generator             __path__              dirichlet             noncentral_f          shuffle               
MT19937               __spec__              exponential           normal                standard_cauchy       
PCG64                 _bounded_integers     f                     pareto                standard_exponential  
PCG64DXSM             _common               gamma                 permutation           standard_gamma        
Philox                _generator            geometric             poisson               standard_normal       
RandomState           _mt19937              get_state             power                 standard_t            
SFC64                 _pcg64                gumbel                rand                  test                  
SeedSequence          _philox               hypergeometric        randint               triangular            
__RandomState_ctor    _pickle               laplace               randn                 uniform               
__all__               _sfc64                logistic              random                vonmises              
__builtins__          beta                  lognormal             random_integers       wald                  
__cached__            binomial              logseries             random_sample         weibull               
__doc__               bit_generator         mtrand                ranf                  zipf                  
__file__              bytes                 multinomial           rayleigh                                    
__loader__            chisquare             multivariate_normal   sample                                      
__name__              choice                negative_binomial     seed

import numpy as np
from matplotlib import pyplot as plt

def univariate_normal(x, mean, var):
    return ((1. / np.sqrt(2 * np.pi * var)) * np.exp(-(x - mean)**2 / (2 * var)))

x = np.linspace(-5,5,1000)
plt.plot(x,univariate_normal(x,1,2));
plt.show()

def multivariate_normal(x, n, mean, cov):
    return (1./(np.sqrt((2*np.pi)**n * np.linalg.det(cov))) * np.exp(-1/2*(x - mean).T@np.linalg.inv(cov)@(x - mean)))


mean = np.array([35,70])
cov = 100*np.array([[1,.5],[.5,1]])
pic = np.zeros((100,100))
for x1 in np.arange(0,100):
    for x2 in np.arange(0,100):
        x = [x1,x2]
        pic[x1,x2] = multivariate_normal(x, 2, mean, cov)
        
plt.contour(pic);

BDS 761: Data Science and Machine Learning I

Topic 6: Norms, Distances, & Statistics

This topic:¶

Metric¶

Euclidean Norm¶

Example¶

Root-mean-square value (RMS)¶

Chebychev Inequality¶

Motivation for other types of norms¶

Norms¶

$p$-Norms¶

Famous "norms"¶

Exercise¶

"Norm balls"¶

Norms, Inner products, and angles¶

Cauchy-Schwartz Inequality $v^T w \le \| v \|\| w \|$¶

Triangle Inequality¶

$S$-norm¶

$S$ inner product¶

Minkowski metric¶

II. Distances¶

Euclidean distance between two vectors $a$ and $b$ in $\mathbb{R}^{n}$:¶

Norm versus Distance¶

Distance properties¶

Exercise¶

Triangle Inequality¶

Application: feature distances¶

Application: Nearest Neighbor Search¶

Application: Document dissimilarity¶

Application: rms prediction error¶

Manhattan or "Taxicab" Distance, also "Rectilinear distance"¶

Exercise¶

Chebyschev Distance¶

Cosine Distance¶

Exercise¶

Exercise¶

III. Matrix Norms¶

Matrix norms: motivation¶

Matrix norms¶

Matrix norms: Froenius Norm¶

Orthogonal matrix¶

Matrix norms from vector norms via vectorization¶

Matrix norms from vector norms via matrix-vector product¶

What does that give?¶

Connections¶

Spectral and related norms¶

Spectral Radius¶

"Medical norm"¶

Condition number¶

Condition number for solving linear system $Ax=b$¶

"Matrix Condition number"¶

Exercise¶

IV. Distance-based Classification¶

Lab: simple classification with distances¶

The "1-Nearest-Neighbor" Algorithm¶

Hamming or "Rook" Distance¶

Edit distance¶

V. Clustering¶

Dimensionality Reduction¶

Clustering References¶

Marketing Motivation¶

K-means clustering¶

K-means algorithm¶

k-Means as Greedy Optimization¶

Lab¶

Scikit hints¶

Variants of k-Means¶

Statistics with Python¶

Random sampling¶

The Normal Distribution¶

Exercise¶

Statistics¶

Exercise¶

The Standard Normal Distribution $Z$¶

Standardizing data has two tasks¶

Lab: Standardizing data¶

Multivariate Gaussian (for $n$ dimensions)¶

Tricky Exercise¶

Exercise:¶