Hello,

Here is a new version of my clustering program using the Perl Data Language.

Cheers,

Lino

#!/usr/bin/perl

use warnings;

use strict;

use PDL;

# fcm: fuzzy c-means implementation in Perl

# usage: $fcm [number_of_clusters] [fuzzification_factor]

# [max_iter] [tolerace]

# returns: prototypes, partition_matrix

#

#

# reading data

#

my ( @data, @tmp, $number_of_patterns, $max_row_number, $max_column_number );

while (defined(my $line = )) {

chomp ($line);

@tmp = split /\s+/, $line;

push @data, [ @tmp ];

}

$number_of_patterns = @data;

my $patterns = pdl(@data);

#

# assigning other variables

#

my $number_of_clusters = shift @ARGV;

my $fuzzification_factor = shift @ARGV;

my $max_iter = shift @ARGV;

my $tolerance = shift @ARGV;

unless (defined($number_of_clusters)) {

$number_of_clusters ||= 2;

}

unless (defined($fuzzification_factor)) {

$fuzzification_factor ||= 2.0;

}

unless (defined($max_iter)) { $max_iter ||= 40; }

unless (defined($tolerance)) { $tolerance ||= 0.00001; }

$number_of_clusters = abs($number_of_clusters);

$fuzzification_factor = abs($fuzzification_factor);

$max_iter = abs($max_iter);

$tolerance = abs($tolerance);

#

# initializing partition matrices

#

my $previous_partition_matrix;

my $current_partition_matrix =

initialize_partition_matrix($number_of_clusters, $number_of_patterns);

#

# output variables

#

my $prototypes;

my $performance_index;

#

# fuzzy c means implementation

#

$max_row_number = $number_of_patterns – 1;

$max_column_number = $number_of_clusters – 1;

my $iter = 0;

while (1) {

# computing each prototype

my $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;

my $temp_prototypes = mv( $temporal_partition_matrix x $patterns,1,0) / sumover($temporal_partition_matrix);

$prototypes = mv($temp_prototypes,1,0);

# copying partition matrix

$previous_partition_matrix = $current_partition_matrix->copy;

# updating the partition matrix

my $dist = zeroes $number_of_patterns, $number_of_clusters;

for my $i (0..$max_row_number){

for my $j (0..$max_column_number){

my $temp_distance = distance($patterns->slice(“:,$i”), $prototypes->slice(“:,$j”), \&euclidean );

$dist->set($i, $j, $temp_distance);

}

}

my $temp_variable = $dist ** (-2/($fuzzification_factor – 1));

$current_partition_matrix = $temp_variable / sumover(mv($temp_variable,1,0));

#

# Performance Index calculation

#

$temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;

$performance_index = sum($temporal_partition_matrix * ( $dist ** 2 ));

# checking stop conditions

my $diff_partition_matrix = $current_partition_matrix – $previous_partition_matrix;

$iter++;

if ( ($diff_partition_matrix->max $max_iter) ) {

last;

}

print “iter = $iter\n”;

}

print “=======================================\n”;

print “clustering completed\n”;

print “performance index = $performance_index\n”;

print “prototypes = \n”;

print $prototypes;

print “current partition matrix = \n”;

print $current_partition_matrix;

# ================================

# initialize_partition_matrix

# partition_matrix =

# initialize_partition_matrix(

# num_clusters, num_patterns)

# ================================

sub initialize_partition_matrix {

my ($partition_matrix, $column_sum);

$partition_matrix = random($_[1],$_[0]);

$column_sum = sumover (mv($partition_matrix, 1, 0));#sum over columns

$partition_matrix /= $column_sum;

return $partition_matrix;

}

# ====================================

# compute distance between two vectors

# dist = distance( vector1, vector2, /&type_of_distance )

# ====================================

sub distance{

my ($vector1, $vector2, $type_of_distance) = @_;

my ($r) = $vector1 – $vector2;

$type_of_distance->($r);

}

sub manhattan{ sum(abs($_[0]));}

sub euclidean{ sum(sqrt($_[0] ** 2) );}

sub tschebyschev{ max(abs($_[0])); }

__DATA__

4.0 4.0

4.0 5.0

5.0 4.0

5.5 6.0

5.0 5.0

4.5 4.5

5.0 5.5

5.5 5.0

5.0 4.5

4.5 5.0

9.5 9.0

9.0 9.5

8.0 8.0

7.0 8.0

8.0 7.0

8.5 7.0

7.0 8.5

7.0 7.0

7.5 7.0

6.5 8.0

8.0 6.5

6.5 7.0

10.0 10.0

10.0 9.0

10.0 9.0

9.5 10.0

8.0 10.0

9.5 9.5

9.0 9.0

9.0 10.0

## Recent Comments