線形クラス分類用トレーニングデータ生成
対象関数、データ範囲、ノイズはYAMLで読み込む。対象関数の次元とかノイズを抽象化して、イケてる感じに書けないかしら。
DataFactory.pl
use strict; use warnings; #use Data::Dumper; my $factory = new DataFactory($ARGV[0]); for(my $i = 0; $i < 10000; $i++){ my $data = $factory->data(); if($$data[2] == 0){ print $$data[0] . ", " . $$data[1] . ", " . $$data[2] . "\n"; } } ################################## # トレーニングデータ生成クラス { package DataFactory; use YAML; sub new{ my ($this, $stuff) = @_; my $obj = YAML::LoadFile($stuff); return bless $obj, $this; } sub data{ my ($this) = @_; my $x1 = MyRandom::uniform() * ($this->{maxx} - $this->{minx}) + $this->{minx}; my $x2 = MyRandom::uniform() * ($this->{maxy} - $this->{miny}) + $this->{miny}; my $y = $this->{grad} * $x1 + $this->{intercept} + MyRandom::normal(0.0, $this->{deviation}); return [$x1, $x2, ($x2 > $y) ? 1 : 0]; } } ################################## # 疑似乱数生成 { package MyRandom; use Math::Random::MT qw(rand srand); use Math::Trig; srand(1); sub uniform{ return Math::Random::MT::rand(); } sub normal{ my ($mean, $deviation) = @_; my $rnd1 = rand(); my $rnd2 = rand(); return $deviation * sqrt(-2.0 * log($rnd1)) * cos(2.0 * pi * $rnd2) + $mean; } }
DataFactory.yaml
grad : -2.0 intercept : 10.0 minx : 0.0 maxx : 5.0 miny : 0.0 maxy : 10.0 deviation : 0.5