線形クラス分類用トレーニングデータ生成

対象関数、データ範囲、ノイズはYAMLで読み込む。対象関数の次元とかノイズを抽象化して、イケてる感じに書けないかしら。

DataFactory.pl

use strict;
use warnings;
#use Data::Dumper;

my $factory = new DataFactory($ARGV[0]);
for(my $i = 0; $i < 10000; $i++){
  my $data = $factory->data();
  if($$data[2] == 0){
    print $$data[0] . ", " . $$data[1] . ", " . $$data[2] . "\n";
  }
}

##################################
# トレーニングデータ生成クラス
{
  package DataFactory;
  use YAML;
  
  sub new{
    my ($this, $stuff) = @_;
    my $obj = YAML::LoadFile($stuff);
    return bless $obj, $this;
  }
  
  sub data{
    my ($this) = @_;
    
    my $x1 = MyRandom::uniform() * ($this->{maxx} - $this->{minx}) + $this->{minx};
    my $x2 = MyRandom::uniform() * ($this->{maxy} - $this->{miny}) + $this->{miny};
    my $y = $this->{grad} * $x1 + $this->{intercept} + MyRandom::normal(0.0, $this->{deviation});
    return [$x1, $x2, ($x2 > $y) ? 1 : 0];
  }
}

##################################
# 疑似乱数生成
{
  package MyRandom;
  use Math::Random::MT qw(rand srand);
  use Math::Trig;
  srand(1);
  
  sub uniform{
    return Math::Random::MT::rand();
  }
  
  sub normal{
    my ($mean, $deviation) = @_;
    my $rnd1 = rand();
    my $rnd2 = rand();
    return $deviation * sqrt(-2.0 * log($rnd1)) * cos(2.0 * pi * $rnd2) + $mean;
  }
}

DataFactory.yaml

grad : -2.0
intercept : 10.0
minx : 0.0
maxx : 5.0
miny : 0.0
maxy : 10.0
deviation : 0.5