Fuzzy Clustering using the Perl Data Language

Hello,

Here is a new version of my clustering program using the Perl Data Language.

Cheers,

Lino


#!/usr/bin/perl
use warnings;
use strict;

use PDL;

# fcm: fuzzy c-means implementation in Perl
# usage: $fcm [number_of_clusters] [fuzzification_factor]
# [max_iter] [tolerace]
# returns: prototypes, partition_matrix
#

#
# reading data
#

my ( @data, @tmp, $number_of_patterns, $max_row_number, $max_column_number );

while (defined(my $line = )) {
chomp ($line);
@tmp = split /\s+/, $line;
push @data, [ @tmp ];
}

$number_of_patterns = @data;

my $patterns = pdl(@data);

#
# assigning other variables
#
my $number_of_clusters = shift @ARGV;
my $fuzzification_factor = shift @ARGV;
my $max_iter = shift @ARGV;
my $tolerance = shift @ARGV;

unless (defined($number_of_clusters)) {
$number_of_clusters ||= 2;
}
unless (defined($fuzzification_factor)) {
$fuzzification_factor ||= 2.0;
}
unless (defined($max_iter)) { $max_iter ||= 40; }
unless (defined($tolerance)) { $tolerance ||= 0.00001; }

$number_of_clusters = abs($number_of_clusters);
$fuzzification_factor = abs($fuzzification_factor);
$max_iter = abs($max_iter);
$tolerance = abs($tolerance);

#
# initializing partition matrices
#
my $previous_partition_matrix;
my $current_partition_matrix =
initialize_partition_matrix($number_of_clusters, $number_of_patterns);

#
# output variables
#
my $prototypes;
my $performance_index;

#
# fuzzy c means implementation
#
$max_row_number = $number_of_patterns – 1;
$max_column_number = $number_of_clusters – 1;
my $iter = 0;
while (1) {
# computing each prototype
my $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;
my $temp_prototypes = mv( $temporal_partition_matrix x $patterns,1,0) / sumover($temporal_partition_matrix);
$prototypes = mv($temp_prototypes,1,0);

# copying partition matrix
$previous_partition_matrix = $current_partition_matrix->copy;

# updating the partition matrix
my $dist = zeroes $number_of_patterns, $number_of_clusters;
for my $i (0..$max_row_number){
for my $j (0..$max_column_number){
my $temp_distance = distance($patterns->slice(“:,$i”), $prototypes->slice(“:,$j”), \&euclidean );
$dist->set($i, $j, $temp_distance);
}
}

my $temp_variable = $dist ** (-2/($fuzzification_factor – 1));
$current_partition_matrix = $temp_variable / sumover(mv($temp_variable,1,0));

#
# Performance Index calculation
#
$temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;
$performance_index = sum($temporal_partition_matrix * ( $dist ** 2 ));

# checking stop conditions
my $diff_partition_matrix = $current_partition_matrix – $previous_partition_matrix;
$iter++;
if ( ($diff_partition_matrix->max $max_iter) ) {
last;
}
print “iter = $iter\n”;
}

print “=======================================\n”;
print “clustering completed\n”;
print “performance index = $performance_index\n”;
print “prototypes = \n”;
print $prototypes;
print “current partition matrix = \n”;
print $current_partition_matrix;

# ================================
# initialize_partition_matrix
# partition_matrix =
# initialize_partition_matrix(
# num_clusters, num_patterns)
# ================================
sub initialize_partition_matrix {
my ($partition_matrix, $column_sum);

$partition_matrix = random($_[1],$_[0]);
$column_sum = sumover (mv($partition_matrix, 1, 0));#sum over columns
$partition_matrix /= $column_sum;

return $partition_matrix;
}

# ====================================
# compute distance between two vectors
# dist = distance( vector1, vector2, /&type_of_distance )
# ====================================
sub distance{
my ($vector1, $vector2, $type_of_distance) = @_;
my ($r) = $vector1 – $vector2;
$type_of_distance->($r);
}

sub manhattan{ sum(abs($_[0]));}
sub euclidean{ sum(sqrt($_[0] ** 2) );}
sub tschebyschev{ max(abs($_[0])); }

__DATA__
4.0 4.0
4.0 5.0
5.0 4.0
5.5 6.0
5.0 5.0
4.5 4.5
5.0 5.5
5.5 5.0
5.0 4.5
4.5 5.0
9.5 9.0
9.0 9.5
8.0 8.0
7.0 8.0
8.0 7.0
8.5 7.0
7.0 8.5
7.0 7.0
7.5 7.0
6.5 8.0
8.0 6.5
6.5 7.0
10.0 10.0
10.0 9.0
10.0 9.0
9.5 10.0
8.0 10.0
9.5 9.5
9.0 9.0
9.0 10.0

Advertisements
Explore posts in the same categories: Free and Open Source Software, Perl

%d bloggers like this: