diff --git a/StatisticalLearning/DataMining_EM/Client.java b/StatisticalLearning/DataMining_EM/Client.java index 044ebdb..d7f7b11 100644 --- a/StatisticalLearning/DataMining_EM/Client.java +++ b/StatisticalLearning/DataMining_EM/Client.java @@ -1,13 +1,15 @@ -package DataMining_EM; + /** - * EM期望最大化算法场景调用类 + * EM鏈熸湜鏈澶у寲绠楁硶鍦烘櫙璋冪敤绫 * @author lyq * */ public class Client { + public static void main(String[] args){ - String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + + String filePath = "D:\\Eclipse_Workstation\\DataMining_EM\\src\\input.txt"; EMTool tool = new EMTool(filePath); tool.readDataFile(); diff --git a/StatisticalLearning/DataMining_EM/DrawPoints.java b/StatisticalLearning/DataMining_EM/DrawPoints.java new file mode 100644 index 0000000..96fffa1 --- /dev/null +++ b/StatisticalLearning/DataMining_EM/DrawPoints.java @@ -0,0 +1,177 @@ + +import java.awt.BasicStroke; +import java.awt.BorderLayout; +import java.awt.Canvas; +import java.awt.Color; +import java.awt.Graphics; +import java.awt.Graphics2D; +import java.awt.Image; +import java.util.ArrayList; + +import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.WindowConstants; + +/** + * + * @author 妯婁繆褰 + * @Time 2014-1-1 + * @modify mindcont + * @date 2016-3-26 + * @since 閰嶅悎 EM 绠楁硶杩涜缁樺埗鍧愭爣骞舵墦鐐 + * + */ +public class DrawPoints extends JFrame { + + private static final long serialVersionUID = 1L; + private Image iBuffer; + + // 妗嗘灦璧风偣鍧愭爣銆佸楂 + private final int FRAME_X = 50; + private final int FRAME_Y = 50; + private final int FRAME_WIDTH = 500; + private final int FRAME_HEIGHT = 500; + + // 鍘熺偣鍧愭爣 + private final int Origin_X = FRAME_X + 40; + private final int Origin_Y = FRAME_Y + FRAME_HEIGHT - 30; + + // X杞淬乊杞寸粓鐐瑰潗鏍 + private final int XAxis_X = FRAME_X + FRAME_WIDTH - 30; + private final int XAxis_Y = Origin_Y; + private final int YAxis_X = Origin_X; + private final int YAxis_Y = FRAME_Y + 30; + + //鍧愭爣杞撮棿闅 + private final int INTERVAL = 20; + + + // 淇濆瓨Point瀵硅薄鐨刋 Y 鍧愭爣 + private int[] Coordinate_X = new int [50]; + private int[] Coordinate_Y = new int [50]; + + + public DrawPoints(ArrayList points) { + super("EM Demo"); + this.setDefaultCloseOperation(EXIT_ON_CLOSE); + this.setBounds(300, 100, 600, 650); + + // 娣诲姞鎺у埗鍒版鏋跺寳閮ㄥ尯 + JPanel topPanel = new JPanel(); + this.add(topPanel, BorderLayout.NORTH); + + // 鏂囨湰妗 + topPanel.add(new JLabel("EM Demo", JLabel.CENTER)); + + //鍧愭爣鐐规暟鎹垪琛ㄤ腑璇诲彇 X杞 Y杞寸殑鍧愭爣鍊 鍒嗗埆璧嬪 缁 Coordinate_X Coordinate_Y + for(int i=0;i YAxis_Y; i -= INTERVAL, j += 20) { + g.drawString(j + "", Origin_X - 20, i + 3); + } + g.drawString("Y杞", YAxis_X - 5, YAxis_Y - 5); + + // 鐢荤綉鏍肩嚎 + g.setColor(Color.BLACK); + // 妯嚎 + for (int i = Origin_Y - INTERVAL; i > YAxis_Y; i -= INTERVAL) { + g.drawLine(Origin_X, i, Origin_X + 21 * INTERVAL, i); + } + // 绔栫嚎 + for (int i = Origin_X + INTERVAL; i < XAxis_X; i += INTERVAL) { + g.drawLine(i, Origin_Y, i, Origin_Y - 21 * INTERVAL); + + } + + //璁剧疆鐢荤瑪棰滆壊涓虹豢鑹 + g.setColor(Color.green); + g2D.setStroke(new BasicStroke(Float.parseFloat("5.0f"))); + //鐢诲嚭 绨囩偣 + g.drawOval(Origin_X+Coordinate_X[0], Origin_Y -Coordinate_Y[0], 5, 5); + g.drawOval(Origin_X+Coordinate_X[1], Origin_Y -Coordinate_Y[1], 5, 5); + + //璁剧疆鐢荤瑪棰滆壊 涓虹孩鑹 + g.setColor(Color.red); + g2D.setStroke(new BasicStroke(Float.parseFloat("5.0f"))); + //鐢诲叾浣欏悇鐐 + for (int i = 2; i < Coordinate_X.length ;i++) { + g.drawLine(Origin_X+ Coordinate_X[i], + Origin_Y - Coordinate_Y[i], + Origin_X+ Coordinate_X[i], + Origin_Y - Coordinate_Y[i]); + } + + + } + + // 鍙岀紦鍐叉妧鏈В鍐冲浘鍍忔樉绀洪棶棰 + public void update(Graphics g) { + if (iBuffer == null) { + iBuffer = createImage(this.getSize().width, + this.getSize().height); + + } + Graphics gBuffer = iBuffer.getGraphics(); + gBuffer.setColor(getBackground()); + gBuffer.fillRect(0, 0, this.getSize().width, this.getSize().height); + paint(gBuffer); + gBuffer.dispose(); + g.drawImage(iBuffer, 0, 0, this); + } + } + + +} + diff --git a/StatisticalLearning/DataMining_EM/EMTool.java b/StatisticalLearning/DataMining_EM/EMTool.java index 4014bc2..ba6158a 100644 --- a/StatisticalLearning/DataMining_EM/EMTool.java +++ b/StatisticalLearning/DataMining_EM/EMTool.java @@ -1,5 +1,3 @@ -package DataMining_EM; - import java.io.BufferedReader; import java.io.File; import java.io.FileReader; @@ -8,32 +6,37 @@ import java.util.ArrayList; /** - * EM最大期望算法工具类 + * EM鏈澶ф湡鏈涚畻娉曞伐鍏风被 * * @author lyq + * @modify mindcont + * @date 2016-3-26 + * @since 鏂板 鍧愭爣绯 鎵撶偣 * */ public class EMTool { - // 测试数据文件地址 + // 娴嬭瘯鏁版嵁鏂囦欢鍦板潃 private String dataFilePath; - // 测试坐标点数据 + // 娴嬭瘯鍧愭爣鐐规暟鎹 private String[][] data; - // 测试坐标点数据列表 + // 娴嬭瘯鍧愭爣鐐规暟鎹垪琛 private ArrayList pointArray; - // 目标C1点 + // 鐩爣C1鐐 private Point p1; - // 目标C2点 + // 鐩爣C2鐐 private Point p2; - + public EMTool(String dataFilePath) { this.dataFilePath = dataFilePath; pointArray = new ArrayList<>(); } /** - * 从文件中读取数据 + * 浠庢枃浠朵腑璇诲彇鏁版嵁 */ public void readDataFile() { + + File file = new File(dataFilePath); ArrayList dataArray = new ArrayList(); @@ -53,64 +56,65 @@ public void readDataFile() { data = new String[dataArray.size()][]; dataArray.toArray(data); - // 开始时默认取头2个点作为2个簇中心 + // 寮濮嬫椂榛樿鍙栧ご2涓偣浣滀负2涓皣涓績 p1 = new Point(Integer.parseInt(data[0][0]), Integer.parseInt(data[0][1])); p2 = new Point(Integer.parseInt(data[1][0]), Integer.parseInt(data[1][1])); - Point p; for (String[] array : data) { - // 将数据转换为对象加入列表方便计算 + // 灏嗘暟鎹浆鎹负瀵硅薄鍔犲叆鍒楄〃鏂逛究璁$畻 p = new Point(Integer.parseInt(array[0]), Integer.parseInt(array[1])); pointArray.add(p); } + } + /** - * 计算坐标点对于2个簇中心点的隶属度 + * 璁$畻鍧愭爣鐐瑰浜2涓皣涓績鐐圭殑闅跺睘搴 * * @param p - * 待测试坐标点 + * 寰呮祴璇曞潗鏍囩偣 */ private void computeMemberShip(Point p) { - // p点距离第一个簇中心点的距离 + // p鐐硅窛绂荤涓涓皣涓績鐐圭殑璺濈 double distance1 = 0; - // p距离第二个中心点的距离 + // p璺濈绗簩涓腑蹇冪偣鐨勮窛绂 double distance2 = 0; - // 用欧式距离计算 + // 鐢ㄦ寮忚窛绂昏绠 distance1 = Math.pow(p.getX() - p1.getX(), 2) + Math.pow(p.getY() - p1.getY(), 2); distance2 = Math.pow(p.getX() - p2.getX(), 2) + Math.pow(p.getY() - p2.getY(), 2); - // 计算对于p1点的隶属度,与距离成反比关系,距离靠近越小,隶属度越大,所以要用大的distance2另外的距离来表示 - p.setMemberShip1(distance2 / (distance1 + distance2)); - // 计算对于p2点的隶属度 - p.setMemberShip2(distance1 / (distance1 + distance2)); + // 璁$畻瀵逛簬p1鐐圭殑闅跺睘搴︼紝涓庤窛绂绘垚鍙嶆瘮鍏崇郴锛岃窛绂婚潬杩戣秺灏忥紝闅跺睘搴﹁秺澶э紝鎵浠ヨ鐢ㄥぇ鐨刣istance2鍙﹀鐨勮窛绂绘潵琛ㄧず + p.setMemberShip1((int) (distance2 / (distance1 + distance2))); + // 璁$畻瀵逛簬p2鐐圭殑闅跺睘搴 + p.setMemberShip2((int) (distance1 / (distance1 + distance2))); } /** - * 执行期望最大化步骤 + * 鎵ц鏈熸湜鏈澶у寲姝ラ */ public void exceptMaxStep() { - // 新的优化过的簇中心点 - double p1X = 0; - double p1Y = 0; - double p2X = 0; - double p2Y = 0; - double temp1 = 0; - double temp2 = 0; - // 误差值 - double errorValue1 = 0; - double errorValue2 = 0; - // 上次更新的簇点坐标 + // 鏂扮殑浼樺寲杩囩殑绨囦腑蹇冪偣 + int p1X = 0; + int p1Y = 0; + int p2X = 0; + int p2Y = 0; + int temp1 = 0; + int temp2 = 0; + // 璇樊鍊 + int errorValue1 = 0; + int errorValue2 = 0; + // 涓婃鏇存柊鐨勭皣鐐瑰潗鏍 Point lastP1 = null; Point lastP2 = null; - // 当开始计算的时候,或是中心点的误差值超过1的时候都需要再次迭代计算 + // 褰撳紑濮嬭绠楃殑鏃跺欙紝鎴栨槸涓績鐐圭殑璇樊鍊艰秴杩1鐨勬椂鍊欓兘闇瑕佸啀娆¤凯浠h绠 while (lastP1 == null || errorValue1 > 1.0 || errorValue2 > 1.0) { for (Point p : pointArray) { computeMemberShip(p); @@ -126,7 +130,7 @@ public void exceptMaxStep() { lastP1 = new Point(p1.getX(), p1.getY()); lastP2 = new Point(p2.getX(), p2.getY()); - // 套公式计算新的簇中心点坐标,最最大化处理 + // 濂楀叕寮忚绠楁柊鐨勭皣涓績鐐瑰潗鏍,鏈鏈澶у寲澶勭悊 p1.setX(p1X / temp1); p1.setY(p1Y / temp1); p2.setX(p2X / temp2); @@ -139,8 +143,10 @@ public void exceptMaxStep() { } System.out.println(MessageFormat.format( - "簇中心节点p1({0}, {1}), p2({2}, {3})", p1.getX(), p1.getY(), + "绨囦腑蹇冭妭鐐筽1({0}, {1}), p2({2}, {3})", p1.getX(), p1.getY(), p2.getX(), p2.getY())); + + new DrawPoints(pointArray);//璋冪敤DrawPoints绫 缁樺埗鍧愭爣绯诲苟鎵撶偣 } - + } diff --git a/StatisticalLearning/DataMining_EM/Point.java b/StatisticalLearning/DataMining_EM/Point.java index d4f3ae6..59f3bc9 100644 --- a/StatisticalLearning/DataMining_EM/Point.java +++ b/StatisticalLearning/DataMining_EM/Point.java @@ -1,55 +1,55 @@ -package DataMining_EM; /** - * 坐标点类 + * 鍧愭爣鐐圭被 * * @author lyq * */ public class Point { - // 坐标点横坐标 - private double x; - // 坐标点纵坐标 - private double y; - // 坐标点对于P1的隶属度 - private double memberShip1; - // 坐标点对于P2的隶属度 - private double memberShip2; - - public Point(double d, double e) { + + // 鍧愭爣鐐规í鍧愭爣 + private int x; + // 鍧愭爣鐐圭旱鍧愭爣 + private int y; + // 鍧愭爣鐐瑰浜嶱1鐨勯毝灞炲害 + private int memberShip1; + // 鍧愭爣鐐瑰浜嶱2鐨勯毝灞炲害 + private int memberShip2; + + public Point(int d, int e) { this.x = d; this.y = e; } - public double getX() { + public int getX() { return x; } - public void setX(double x) { + public void setX(int x) { this.x = x; } - public double getY() { + public int getY() { return y; } - public void setY(double y) { + public void setY(int y) { this.y = y; } - public double getMemberShip1() { + public int getMemberShip1() { return memberShip1; } - public void setMemberShip1(double memberShip1) { + public void setMemberShip1(int memberShip1) { this.memberShip1 = memberShip1; } - public double getMemberShip2() { + public int getMemberShip2() { return memberShip2; } - public void setMemberShip2(double memberShip2) { + public void setMemberShip2(int memberShip2) { this.memberShip2 = memberShip2; } diff --git a/StatisticalLearning/DataMining_EM/input.txt b/StatisticalLearning/DataMining_EM/input.txt index bd72197..5d4a249 100644 --- a/StatisticalLearning/DataMining_EM/input.txt +++ b/StatisticalLearning/DataMining_EM/input.txt @@ -1,6 +1,6 @@ -3 3 -4 10 -9 6 -14 8 -18 11 -21 7 \ No newline at end of file +230 230 +240 100 +250 250 +270 290 +190 105 +280 350 \ No newline at end of file