-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
130 lines (101 loc) · 3.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
Author: Muhammad Ahmed
VANILLA KMEANS
"""
import numpy as np
import matplotlib.pyplot as plt
# Generate the Data
n1 = 1000
n2 = 100
# random ellipse 1 centered at 0,0
x = np.random.randn(n1+n2,1)
y = 0.5 * np.random.randn(n1+n2,1)
# random ellipse 2 centered at (1,-2) and rotated by a certain angle theta
x2 = np.random.randn(n1+n2,1) + 1
y2 = 0.2 * np.random.randn(n1+n2,1) -2
theta = np.pi/4
# define a rotation matrix
A = np.array([ [np.cos(theta),-1*np.sin(theta)],[np.sin(theta), np.cos(theta)] ])
# perform the rotation
x3 = A[0,0]*x2 + A[0,1]*y2
y3 = A[1,0]*x2 + A[1,1]*y2
# plotting
f, (ax1) = plt.subplots(1, 1, figsize=(12, 6), sharey=True)
# Scatter plot for cluster 1
# Choose a dark color like 'darkgreen' with a white edgecolor for contrast
ax1.scatter(x, y, color='darkgreen', marker='o', edgecolor='white', s=50) # Increased size for visibility
# Set title and labels with a default white background
ax1.set_title('Data Cluster', fontsize=14)
ax1.set_xlabel('X-axis', fontsize=12)
ax1.set_ylabel('Y-axis', fontsize=12)
# Scatter plot for cluster 2
# Choose a dark color like 'navy' with a white edgecolor for contrast
ax1.scatter(x3, y3, color='navy', marker='^', edgecolor='white', s=50) # Increased size for visibility
# Set grid for better visibility and adjust layout
ax1.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()
# Training set for Kmeans
X1 = np.hstack ((x3[:n1] , y3[:n1]))
X2 = np.hstack ((x[:n1],y[:n1]))
Y = np.vstack((X1,X2))
Z = np.hstack( (np.ones((n1,1)), 2*np.ones((n1,1))))
# Test data
x1test = np.hstack((x3[n1+1,:],y3[n1+1,:]))
x2test = np.hstack((x[n1+1,:],y[n1+1,:]))
# lets begin with Kmeans
# choose an initial guess
random_val_1 = np.random.randint(100)
g1x = x3[random_val_1]
g1y = y3[random_val_1]
g1 = np.array([g1x, g1y]).flatten()
random_val_2 = np.random.randint(100)
g2x = x[random_val_2]
g2y = y[random_val_2]
g2 = np.array([g2x, g2y]).flatten()
# Plot the points g1 and g2 on the scatter plot with increased size and distinct markers
ax1.plot(g1[0], g1[1], 'r*', markersize=15, label='Initial guess 1') # Red star for g1
ax1.plot(g2[0], g2[1], 'b*', markersize=15, label='Initial guess 2') # Blue star for g2
# Add a legend to the plot
ax1.legend(loc='upper right')
# Start comparing each points in the data with the
# inital guess and put the data in two clusters,
# cluster 1 and cluster 2
# data in X1 and X2 with 2 columns one is x and the other is y
# we combined the data to form Y
# Traverse through Y take each row find the norm of that row with the g1 and g2 respectively
max_iterations = 100 # Adjust iteration according to your need
for j in range(max_iterations):
cluster1 = np.empty((0,2))
cluster2 = np.empty((0,2))
for i in range(len(Y)):
point_1 = Y[i,:]
# norm is calculated for the distance between the X1 row i and the point g1
norm_1 = np.linalg.norm(g1 - point_1)
point_2 = Y[i, :]
# norm is calculated for the distance between the X2 row i and the point g2
norm_2 = np.linalg.norm(g2 - point_2)
if norm_1 < norm_2:
cluster1 = np.vstack([cluster1, [Y[i, 0], Y[i, 1]]])
else:
cluster2 = np.vstack([cluster2, [Y[i, 0], Y[i, 1]]])
# update initial guess values
if cluster1.size > 0:
g1 = np.array([np.mean(cluster1[:, 0]), np.mean(cluster1[:,1])])
else:
random_val_1 = np.random.randint(100)
g1x = x3[random_val_1]
g1y = y3[random_val_1]
g1 = np.array([g1x, g1y]).flatten()
if cluster2.size > 0:
g2 = np.array([np.mean(cluster2[:, 0]), np.mean(cluster2[:,1])])
else:
random_val_2 = np.random.randint(100)
g2x = x[random_val_2]
g2y = y[random_val_2]
g2 = np.array([g2x, g2y]).flatten()
# Plot the points g1 and g2 on the scatter plot with increased size and distinct markers
ax1.plot(g1[0], g1[1], 'ro', markersize=7)
ax1.plot(g2[0], g2[1], 'bo', markersize=7)
# update on the same plot
plt.draw()
plt.show() # Show the final plot