算法及编程语言 - 声振论坛 - 振动,动力学,声学,信号处理,故障诊断

christy 发表于 2007-6-23 06:53

KMEANS 聚类算法实现程序

k均值算法是模式识别的聚分类问题，这是用C实现其算法以下是程序源代码/****************************************************************************
* KMEANS
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>

// FUNCTION PROTOTYPES

// DEFINES
#define                         SUCCESS                         1
#define                         FAILURE                         0
#define                         TRUE                                  1
#define                         FALSE                               0
#define                         MAXVECTDIM                20
#define                         MAXPATTERN                20
#define                         MAXCLUSTER                10

char *f2a(double x, int width){
         char cbuf;
         char *cp;
         int i,k;
         int d,s;
cp=fcvt(x,width,&d,&s);
if (s) {
         strcpy(cbuf,"-");
         }
   else {
         strcpy(cbuf," ");
         } /* endif */
if (d>0) {
         for (i=0; i<d; i++) {
                  cbuf=cp;
                  } /* endfor */
         cbuf=0;
         cp+=d;
         strcat(cbuf,".");
         strcat(cbuf,cp);
         } else {
                  if (d==0) {
                           strcat(cbuf,".");
                           strcat(cbuf,cp);
                           }
                     else {
                           k=-d;
                           strcat(cbuf,".");
                           for (i=0; i<k; i++) {
                                    strcat(cbuf,"0");
                                    } /* endfor */
                           strcat(cbuf,cp);
                           } /* endif */
         } /* endif */
cp=&cbuf;
return cp;
}

// ***** Defined structures & classes *****
struct aCluster {
         double                   Center;

   int                   Member; //Index of
Vectors belonging to this cluster
         int                            NumMembers;
};

struct aVector {
         double                   Center;
         int                            Size;
};

class System {
private:
         double                   Pattern;
         aCluster             Cluster;
         int                            NumPatterns;                            // Number of patterns

   int                   SizeVector;                   //
Number of dimensions in vector
         int                            NumClusters;                            // Number of clusters
         void                         DistributeSamples();    // Step 2 of K-means algorithm
         int                            CalcNewClustCenters();// Step 3 of K-means algorithm
         double                   EucNorm(int, int);       // Calc Euclidean norm vector
         int                            FindClosestCluster(int); //ret indx of clust closest to pattern


         //whose index is arg
public:
         system();
         int LoadPatterns(char *fname);                // Get pattern data to be clustered
         void InitClusters();                                              // Step 1 of K-means algorithm

   void RunKMeans();                                     //
Overall control K-means process
         void ShowClusters();                                              // Show results on screen
         void SaveClusters(char *fname);             // Save results to file
         void ShowCenters();
};

void System::ShowCenters(){
int i,j;
printf("Cluster centers:\n");
for (i=0; i<NumClusters; i++) {
         Cluster.Member=i;
         printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster.Center,Cluster.Center);
         } /* endfor */
printf("\n");
}

int System::LoadPatterns(char *fname){
         FILE *InFilePtr;
         int          i,j;
         double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
            return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns);    // Read # of patterns
fscanf(InFilePtr, "%d", &SizeVector);       // Read dimension of vector
fscanf(InFilePtr, "%d", &NumClusters);    // Read # of clusters for K-Means
for (i=0; i<NumPatterns; i++) {                         // For each vector
         for (j=0; j<SizeVector; j++) {                   // create a pattern
                  fscanf(InFilePtr,"%lg",&x);                   // consisting of all elements
                  Pattern=x;
                  } /* endfor */
         } /* endfor */
printf("Input patterns:\n");
for (i=0; i<NumPatterns; i++) {
         printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern,Pattern);
         } /* endfor */
printf("\n--------------------\n");
return SUCCESS;
}
//***************************************************************************
// InitClusters
//Arbitrarily assign a vector to each ofthe K clusters                                  *
// We choose the first K vectors to do this
//***************************************************************************
void System::InitClusters(){
int i,j;
printf("Initial cluster centers:\n");
for (i=0; i<NumClusters; i++) {
         Cluster.Member=i;
         for (j=0; j<SizeVector; j++) {
                  Cluster.Center=Pattern;
                  } /* endfor */
         } /* endfor */
for (i=0; i<NumClusters; i++) {
         printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster.Center,Cluster.Center);
         } /* endfor */
printf("\n");
}

void System::RunKMeans(){
      int converged;
      int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) {
         printf("PASS=%d\n",pass++);
         DistributeSamples();
         converged=CalcNewClustCenters();
         ShowCenters();
         } /* endwhile */
}

double System::EucNorm(int p, int c){       // Calc Euclidean norm of vector difference

double dist,x;
// between pattern vector, p, and cluster
int
i;
// center, c.
char zout;
char znum;
char *pnum;

pnum=&znum;
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; i<SizeVector ;i++){
         x=(Cluster.Center-Pattern)*(Cluster.Center-Pattern);
         strcat(zout,f2a(x,4));
         if (i==0)
                  strcat(zout,"+");
         dist += (Cluster.Center-Pattern)*(Cluster.Center-Pattern);
         } /* endfor */
printf("%s)\n",zout);
return dist;
}

int System::FindClosestCluster(int pat){
         int i, ClustID;
         double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++) {
         d=EucNorm(pat,i);
         printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
         if (d<MinDist) {
                  MinDist=d;
                  ClustID=i;
                  } /* endif */
         } /* endfor */
if (ClustID<0) {
         printf("Aaargh");
         exit(0);
         } /* endif */
return ClustID;
}

void System::DistributeSamples(){
int i,pat,Clustid,MemberIndex;
//Clear membership list for all current clusters
for (i=0; i<NumClusters;i++){
         Cluster.NumMembers=0;
         }
for (pat=0; pat<NumPatterns; pat++) {
         //Find cluster center to which the pattern is closest
         Clustid= FindClosestCluster(pat);
         printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
         //post this pattern to the cluster
         MemberIndex=Cluster.NumMembers;
         Cluster.Member=pat;
         Cluster.NumMembers++;
         } /* endfor */
}

int    System::CalcNewClustCenters(){
         int ConvFlag,VectID,i,j,k;
         double tmp;
         char xs;
         char ys;
         char nc1;
         char nc2;
         char *pnc1;
         char *pnc2;
         char *fpv;

pnc1=&nc1;
pnc2=&nc2;
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; i<NumClusters; i++) {                                        //for each cluster
         pnc1=itoa(Cluster.NumMembers,nc1,10);
         pnc2=itoa(i,nc2,10);
         strcpy(xs,"Cluster Center");
         strcat(xs,nc2);
         strcat(xs,"(1/");
         strcpy(ys,"(1/");
         strcat(xs,nc1);
         strcat(ys,nc1);
         strcat(xs,")(");
         strcat(ys,")(");
         for (j=0; j<SizeVector; j++) {                                  // clear workspace
                  tmp=0.0;
                  } /* endfor */
         for (j=0; j<Cluster.NumMembers; j++) { //traverse member vectors
                  VectID=Cluster.Member;
                  for (k=0; k<SizeVector; k++) {                         //traverse elements of vector

               tmp += Pattern;             //
add (member) pattern elmnt into temp
                           if (k==0) {
                                          strcat(xs,f2a(Pattern,3));
                                    } else {
                                          strcat(ys,f2a(Pattern,3));
                                          } /* endif */
                           } /* endfor */
                  if(j<Cluster.NumMembers-1){
                           strcat(xs,"+");
                           strcat(ys,"+");
                           }
                        else {
                           strcat(xs,")");
                           strcat(ys,")");
                           }
                  } /* endfor */
         for (k=0; k<SizeVector; k++) {                                  //traverse elements of vector
                  tmp=tmp/Cluster.NumMembers;
                  if (tmp != Cluster.Center)
                           ConvFlag=FALSE;
                  Cluster.Center=tmp;
                  } /* endfor */
         printf("%s,\n",xs);
         printf("%s\n",ys);
         } /* endfor */
return ConvFlag;
}

void System::ShowClusters(){
         int cl;
for (cl=0; cl<NumClusters; cl++) {
         printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster.Center,Cluster.Center);
         } /* endfor */
}

void System::SaveClusters(char *fname){
}


main(int argc, char *argv[]) {
         System kmeans;
if (argc<2) {
         printf("USAGE: KMEANS PATTERN_FILE\n");
         exit(0);
         }
if (kmeans.LoadPatterns(argv)==FAILURE ){
         printf("UNABLE TO READ PATTERN_FILE:%s\n",argv);
         exit(0);
         }
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}

来自：中国视觉网

[ 本帖最后由 christy 于 2007-6-23 06:55 编辑 ]

christy 发表于 2007-6-23 06:56

基于MATLAB的KMEANS 聚类程序

程序和数据如下:%K-means算法主程序
k=4;
x =[ 1.2126 2.1338 0.5115 0.2044
-0.9316 0.7634 0.0125 -0.2752
-2.9593 0.1813 -0.8833 0.8505
3.1104 -2.5393 -0.0588 0.1808
-3.1141 -0.1244 -0.6811 0.9891
-3.2008 0.0024 -1.2901 0.9748
-1.0777 1.1438 0.1996 0.0139
-2.7213 -0.1909 0.1184 0.1013
-1.1467 1.3820 0.1427 -0.2239
1.1497 1.9414 -0.3035 0.3464
2.6993 -2.2556 0.1637 -0.0139
-3.0311 0.1417 0.0888 0.1791
-2.8403 -0.1809 -0.0965 0.0817
1.0118 2.0372 0.1638 -0.0349
-0.8968 1.0260 -0.1013 0.2369
1.1112 1.8802 -0.0291 -0.1506
1.1907 2.2041 -0.1060 0.2167
-1.0114 0.8029 -0.1317 0.0153
-3.1715 0.1041 -0.3338 0.0321
0.9718 1.9634 0.0305 -0.3259
-1.0377 0.8889 -0.2834 0.2301
-0.8989 1.0185 -0.0289 0.0213
-2.9815 -0.4798 0.2245 0.3085
-0.8576 0.9231 -0.2752 -0.0091
-3.1356 0.0026 -1.2138 0.7733
3.4470 -2.2418 0.2014 -0.1556
2.9143 -1.7951 0.1992 -0.2146
3.4961 -2.4969 -0.0121 0.1315
-2.9341 -0.1071 -0.7712 0.8911
-2.8105 -0.0884 -0.0287 -0.1279
3.1006 -2.0677 -0.2002 -0.1303
0.8209 2.1724 0.1548 0.3516
-2.8500 0.3196 0.1359 -0.1179
-2.8679 0.1365 -0.5702 0.7626
-2.8245 -0.1312 0.0881 -0.1305
-0.8322 1.3014 -0.3837 0.2400
-2.6063 0.1431 0.1880 0.0487
-3.1341 -0.0854 -0.0359 -0.2080
0.6893 2.0854 -0.3250 -0.1007
1.0894 1.7271 -0.0176 0.6553
-2.9851 -0.0113 0.0666 -0.0802
1.0371 2.2724 0.1044 0.3982
-2.8032 -0.2737 -0.7391 1.0277
-2.6856 0.0619 -1.1066 1.0485
-2.9445 -0.1602 -0.0019 0.0093
1.2004 2.1302 -0.1650 0.3413
3.2505 -1.9279 0.4462 -0.2405
-1.2080 0.8222 0.1671 0.1576
-2.8274 0.1515 -0.9636 1.0675
2.8190 -1.8626 0.2702 0.0026
1.0507 1.7776 -0.1421 0.0999
-2.8946 0.1446 -0.1645 0.3071
-1.0105 1.0973 0.0241 0.1628
-2.9138 -0.3404 0.0627 0.1286
-3.0646 -0.0008 0.3819 -0.1541
1.2531 1.9830 -0.0774 0.2413
1.1486 2.0440 -0.0582 -0.0650
-3.1401 -0.1447 -0.6580 0.9562
-2.9591 0.1598 -0.6581 1.1937
-2.9219 -0.3637 -0.1538 -0.2085
2.8948 -2.2745 0.2332 -0.0312
-3.2972 -0.0219 -0.0288 -0.1436
-1.2737 0.7648 0.0643 0.0858
-1.0690 0.8108 -0.2723 0.3231
-0.5908 0.7508 -0.5456 0.0190
0.5808 2.0573 -0.1658 0.1709
2.8227 -2.2461 0.2255 -0.3684
0.6174 1.7654 -0.3999 0.4125
3.2587 -1.9310 0.2021 0.0800
1.0999 1.8852 -0.0475 -0.0585
-2.7395 0.2585 -0.8441 0.9987
-1.2223 1.0542 -0.2480 -0.2795
-2.9212 -0.0605 -0.0259 0.2591
3.1598 -2.2631 0.1746 0.1485
0.8476 1.8760 -0.2894 -0.0354
2.9205 -2.2418 0.4137 -0.2499
2.7656 -2.1768 0.0719 -0.1848
-0.8698 1.0249 -0.2084 -0.0008
-1.1444 0.7787 -0.4958 0.3676
-1.0711 1.0450 -0.0477 -0.4030
0.5350 1.8110 -0.0377 0.1622
0.9076 1.8845 -0.1121 0.5700
-2.7887 -0.2119 0.0566 0.0120
-1.2567 0.9274 0.1104 0.1581
-2.9946 -0.2086 -0.8169 0.6662
1.0536 1.9818 -0.0631 0.2581
-2.8465 -0.2222 0.2745 0.1997
-2.8516 0.1649 -0.7566 0.8616
-3.2470 0.0770 0.1173 -0.1092
-2.9322 -0.0631 -0.0062 -0.0511
-2.7919 0.0438 -0.1935 -0.5023
0.9894 1.9475 -0.0146 -0.0390
-2.9659 -0.1300 0.1144 0.3410
-2.7322 -0.0427 -1.0758 0.9718
-1.4852 0.8592 -0.0503 -0.1373
2.8845 -2.1465 -0.0533 -0.1044
-3.1470 0.0536 0.1073 0.3323
2.9423 -2.1572 0.0505 0.1180
-3.0683 0.3434 -0.6563 0.8960
1.3215 2.0951 -0.1557 0.3994
-0.7681 1.2075 -0.2781 0.2372
-0.6964 1.2360 -0.3342 0.1662
-0.6382 0.8204 -0.2587 0.3344
-3.0233 -0.1496 -0.2607 -0.0400
-0.8952 0.9872 0.0019 0.3138
-0.8172 0.6814 -0.0691 0.1009
-3.3032 0.0571 -0.0243 -0.1405
0.7810 1.9013 -0.3996 0.7374
-0.9030 0.8646 -0.1498 0.1112
-0.8461 0.9261 -0.1295 -0.0727
2.8182 -2.0818 -0.1430 -0.0547
2.9295 -2.3846 -0.0244 -0.1400
1.0587 2.2227 -0.1250 0.0957
3.0755 -1.7365 -0.0511 0.1500
-1.3076 0.8791 -0.3720 0.0331
-2.8252 -0.0366 -0.6790 0.7374
-2.6551 -0.1875 0.3222 0.0483
-2.9659 -0.1585 0.4013 -0.1402
-3.2859 -0.1546 0.0104 -0.1781
-0.6679 1.1999 0.1396 -0.3195
-1.0205 1.2226 0.1850 0.0050
-3.0091 -0.0186 -0.9111 0.9663
-3.0339 0.1377 -0.9662 1.0664
0.8952 1.9594 -0.3221 0.3579
-2.8481 0.1963 -0.1428 0.0382
1.0796 2.1353 -0.0792 0.6491
-0.8732 0.8985 -0.0049 0.0068
1.0620 2.1478 -0.1275 0.3553
3.4509 -1.9975 0.1285 -0.1575
-3.2280 -0.0640 -1.1513 0.8235
-0.6654 0.9402 0.0577 -0.0175
-3.2100 0.2762 -0.1053 0.0626
3.0793 -2.0043 0.2948 0.0411
1.3596 1.9481 -0.0167 0.3958
-3.1267 0.1801 0.2228 0.1179
-0.7979 0.9892 -0.2673 0.4734
2.5580 -1.7623 -0.1049 -0.0521
-0.9172 1.0621 -0.0826 0.1501
-0.7817 1.1658 0.1922 0.0803
3.1747 -2.1442 0.1472 -0.3411
2.8476 -1.8056 -0.0680 0.1536
-0.6175 1.4349 -0.1970 -0.1085
0.7308 1.9656 0.2602 0.2801
-1.0310 1.0553 -0.2928 -0.1647
-2.9251 -0.2095 0.0582 -0.1813
-0.9827 1.2720 -0.2225 0.2563
-1.0830 1.1158 -0.0405 -0.1181
-2.8744 0.0195 -0.3811 0.1455
3.1663 -1.9241 0.0455 0.1684
-1.0734 0.7681 -0.4725 -0.1976];
= size(x);
bn=round(n/k*rand);%第一个随机数在前1/K的范围内
nc=;%初始聚类中心

= kmeans(x,k,nc)%调用kmeans函数

for i=1:150,
if cid(i)==1,
   plot(x(i,1),x(i,2),'r*') % 显示第一类
   hold on
else
if cid(i)==2,
      plot(x(i,1),x(i,2),'b*') %显示第二类
      hold on
else
      if cid(i)==3,
         plot(x(i,1),x(i,2),'g*') %显示第三类
         hold on
      else
            if cid(i)==4,
         plot(x(i,1),x(i,2),'k*') %显示第四类
         hold on
            end
      end
end
end
end
strt=['红色*为第一类；蓝色*为第二类；绿色*为第三类；黑色*为第四类' ];
text(-4,-3.6,strt);函数如下:%BasicKMeans.m主类
function = kmeans(x,k,nc)
= size(x);
% 设置cid为分类结果显示矩阵
cid = zeros(1,n);
% Make this different to get the loop started.
oldcid = ones(1,n);
% The number in each cluster.
nr = zeros(1,k);
% Set up maximum number of iterations.
maxgn= 100;
iter = 1;
while iter < maxgn
%计算每个数据到聚类中心的距离
for i = 1:n
dist = sum((repmat(x(i,:),k,1)-nc).^2,2);
= min(dist); % 将当前聚类结果存入cid中
cid(i) = ind;
end
for i = 1:k
%找到每一类的所有数据，计算他们的平均值，作为下次计算的聚类中心
ind = find(cid==i);
nc(i,:) = mean(x(ind,:));
% 统计每一类的数据个数
nr(i) = length(ind);
end
iter = iter + 1;
end

% Now check each observation to see if the error can be minimized some more.
% Loop through all points.
maxiter = 2;
iter = 1;
move = 1;
while iter < maxiter & move ~= 0
move = 0;
% 对所有的数据进行再次判断，寻求最佳聚类结果
for i = 1:n
dist = sum((repmat(x(i,:),k,1)-nc).^2,2);
r = cid(i);% 将当前数据属于的类给r
dadj = nr./(nr+1).*dist'; % 计算调整后的距离
= min(dadj); % 早到该数据距哪个聚类中心最近
if ind ~= r% 如果不等则聚类中心移动
cid(i) = ind;%将新的聚类结果送给cid
ic = find(cid == ind);%重新计算调整当前类别的聚类中心
nc(ind,:) = mean(x(ic,:));
move = 1;
end
end
iter = iter+1;
end
centers = nc;
if move == 0
disp('No points were moved after the initial clustering procedure.')
else
disp('Some points were moved after the initial clustering procedure.')
end 分类效果图

来自：中国AI创业研发俱乐部

zhaopeng80 发表于 2007-11-2 11:01

请求帮忙!

如果我输入的是彩色图像,我如何能用该算法把我图像里的颜色用聚类的算法实现?

zyy55840 发表于 2010-6-22 14:36

:@) 下载来我看看，谢谢LZ

88484532 发表于 2011-7-28 12:23

收藏了，学习一下

页: [1]

声振论坛's Archiver

KMEANS 聚类算法实现程序

请求帮忙!