• 首页 首页 icon
  • 工具库 工具库 icon
    • IP查询 IP查询 icon
  • 内容库 内容库 icon
    • 快讯库 快讯库 icon
    • 精品库 精品库 icon
    • 问答库 问答库 icon
  • 更多 更多 icon
    • 服务条款 服务条款 icon

python实现Kmeans++算法

武飞扬头像
就是木子呀
帮助1

K-Means 是一种用于初始化K-Means聚类的方法,它的目的是通过选择合理的初始点来优化K-Means聚类的性能。

K-Means算法的基本流程是:

  1. 随机选择K个初始聚类中心
  2. 对于每个数据点,计算它到每个聚类中心的距离,并将其分配到距离最近的聚类中心
  3. 对于每个聚类,计算所有数据点的平均值,并将其作为新的聚类中心
  4. 重复步骤2和3直到聚类中心不再改变或达到最大迭代次数

K-Means 算法的基本思路是:

  1. 随机选择一个数据点作为第一个聚类中心
  2. 对于每个数据点,计算它到最近聚类中心的距离,并将距离存储在一个列表中
  3. 将距离列表当做概率分布,并从中选择一个新的聚类中心
  4. 重复步骤2和3,直到选择了K个聚类中心

下面是一段python代码的实例:

  1.  
    import random
  2.  
    import math
  3.  
    class KMeansPlusPlus:
  4.  
    def __init__(self, data_points, K, max_iterations):
  5.  
    self.data_points = data_points
  6.  
    self.K = K
  7.  
    self.max_iterations = max_iterations
  8.  
    self.random = random.Random()
  9.  
     
  10.  
    def cluster(self):
  11.  
    # Initialize centroids list
  12.  
    centroids = []
  13.  
    # Randomly select first centroid
  14.  
    centroids.append(self.data_points[self.random.randint(0, len(self.data_points))].centroid)
  15.  
     
  16.  
    # Select remaining centroids
  17.  
    for i in range(1, self.K):
  18.  
    # Calculate distance of each data point to nearest centroid
  19.  
    distances = [dp.distance_to_nearest_centroid(centroids) for dp in self.data_points]
  20.  
    # Convert distances to probability distribution
  21.  
    sum_distances = sum(distances)
  22.  
    probabilities = [d / sum_distances for d in distances]
  23.  
    # Randomly select a new centroid from probability distribution
  24.  
    r = self.random.random()
  25.  
    cumulative_probability = 0
  26.  
    for j, p in enumerate(probabilities):
  27.  
    cumulative_probability = p
  28.  
    if r <= cumulative_probability:
  29.  
    centroids.append(self.data_points[j].centroid)
  30.  
    break
  31.  
     
  32.  
    # Run K-Means algorithm
  33.  
    kmeans = KMeans(self.data_points, centroids, self.max_iterations)
  34.  
    return kmeans.cluster()
  35.  
     
  36.  
    class DataPoint:
  37.  
    def __init__(self, coordinates):
  38.  
    self.coordinates = coordinates
  39.  
    self.centroid = None
  40.  
     
  41.  
    def distance_to_nearest_centroid(self, centroids):
  42.  
    min_distance = float("inf")
  43.  
    for centroid in centroids:
  44.  
    distance = self.euclidean_distance(centroid.coordinates)
  45.  
    if distance < min_distance:
  46.  
    min_distance = distance
  47.  
    return min_distance
  48.  
     
  49.  
    def euclidean_distance(self, coordinates):
  50.  
    sum_squared_distance = 0
  51.  
    for i in range(len(self.coordinates)):
  52.  
    sum_squared_distance = math.pow(self.coordinates[i] - coordinates[i], 2)
  53.  
    return math.sqrt(sum_squared_distance)
  54.  
     
  55.  
    class Centroid:
  56.  
    def __init__(self, coordinates):
  57.  
    self.coordinates = coordinates
  58.  
    self.data_points = []
  59.  
     
  60.  
    def update_coordinates(self):
  61.  
    num_data_points = len(self.data_points)
  62.  
    new_coordinates = [0] * len(self.coordinates)
  63.  
    for data_point in self.data_points:
  64.  
    for i in range(len(new_coordinates)):
  65.  
    new_coordinates[i] = data_point.coordinates[i]
  66.  
    for i in range(len(new_coordinates)):
  67.  
    new_coordinates[i] /= num_data_points
  68.  
    self.coordinates = new_coordinates
  69.  
     
  70.  
    class KMeans:
  71.  
    def __init__(self, data_points, centroids, max_iterations):
  72.  
    self.data_points = data_points
  73.  
    self.centroids = centroids
  74.  
    self.max_iterations = max_iterations
  75.  
     
  76.  
    def cluster(self):
  77.  
    for _ in range(self.max_iterations):
  78.  
    # Clear data points belonging to each centroid
  79.  
    for centroid in self.centroids:
  80.  
    centroid.data_points.clear()
  81.  
     
  82.  
    # Assign each data point to nearest centroid
  83.  
    for data_point in self.data_points:
  84.  
    min_distance = float("inf")
  85.  
    nearest_centroid = None
  86.  
    for centroid in self.centroids:
  87.  
    distance = data_point.euclidean_distance(centroid.coordinates)
  88.  
    if distance < min_distance:
  89.  
    min_distance = distance
  90.  
    nearest_centroid = centroid
  91.  
    nearest_centroid.data_points.append(data_point)
  92.  
    data_point.centroid = nearest_centroid
  93.  
     
  94.  
    # Update centroid coordinates
  95.  
    for centroid in self.centroids:
  96.  
    centroid.update_coordinates()
  97.  
     
  98.  
    return self.centroids

这篇好文章是转载于:学新通技术网

  • 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
  • 本站站名: 学新通技术网
  • 本文地址: /boutique/detail/tanhiaacak
系列文章
更多 icon
同类精品
更多 icon
继续加载