diff --git a/.rspec b/.rspec index 8c18f1a..333d91e 100644 --- a/.rspec +++ b/.rspec @@ -1,2 +1,3 @@ --format documentation --color +--order rand diff --git a/lib/weka/classifiers/evaluation.rb b/lib/weka/classifiers/evaluation.rb index 7e8e9c1..f8cfa2d 100644 --- a/lib/weka/classifiers/evaluation.rb +++ b/lib/weka/classifiers/evaluation.rb @@ -11,6 +11,7 @@ class Evaluation alias summary to_summary_string alias class_details to_class_details_string + alias confusion_matrix to_matrix_string alias instance_count num_instances alias correct_count correct diff --git a/lib/weka/core/attribute.rb b/lib/weka/core/attribute.rb index 2f6e383..93dfaa5 100644 --- a/lib/weka/core/attribute.rb +++ b/lib/weka/core/attribute.rb @@ -1,12 +1,73 @@ +require 'weka/concerns/persistent' + module Weka module Core java_import 'weka.core.Attribute' class Attribute + include Weka::Concerns::Persistent + + TYPES = %i(numeric nominal string date).freeze + + class << self + def new_numeric(name) + new(name.to_s) + end + + def new_nominal(name, values) + new(name.to_s, Array(values).map(&:to_s)) + end + + def new_date(name, format) + new(name.to_s, format.to_s) + end + + ## + # Creates a new Attribute instance of type string. + # + # The java class defines the same constructor: + # Attribute(java.lang.String, java.util.List) + # for nominal and string attributes and handles the type internally + # based on the second argument. + # + # In Java you would write following code to create a string Attribute: + # Attribute attribute = new Attribute("name", (FastVector) null); + # + # When we use a similar approach in JRuby: + # attribute = Attribute.new('name', nil) + # then a Java::JavaLang::NullPointerException is thrown. + # + # Thus, we use refelection here and call the contructor explicitly, see + # https://github.com/jruby/jruby/wiki/CallingJavaFromJRuby#constructors + # + # The object returned from Java constructor only has class + # Java::JavaObject so we need to cast it to the proper class + # + # See also: + # https://stackoverflow.com/questions/1792495/casting-objects-in-jruby + def new_string(name) + constructor = Attribute.java_class.declared_constructor( + java.lang.String, + java.util.List + ) + + constructor.new_instance(name.to_s, nil).to_java(Attribute) + end + end + def values enumerate_values.to_a end + ## + # Returns the string representation of the attribute's type. + # Overwrites the weka.core.Attribute type Java method, which returns an + # integer representation of the type based on the defined type constants. + def type + self.class.type_to_string(self) + end + + ## # The order of the if statements is important here, because a date is also # a numeric. def internal_value_of(value) @@ -14,10 +75,8 @@ def internal_value_of(value) return Float::NAN if [nil, '?'].include?(value) return parse_date(value.to_s) if date? return value.to_f if numeric? - return index_of_value(value.to_s) if nominal? + return index_of_value(value.to_s) if nominal? || string? end end - - Weka::Core::Attribute.__persistent__ = true end end diff --git a/lib/weka/core/dense_instance.rb b/lib/weka/core/dense_instance.rb index 8d9d6ad..2332b23 100644 --- a/lib/weka/core/dense_instance.rb +++ b/lib/weka/core/dense_instance.rb @@ -61,7 +61,7 @@ def value_from(value, index) format_date(value, attribute.date_format) elsif attribute.numeric? value - elsif attribute.nominal? + elsif attribute.nominal? || attribute.string? attribute.value(value) end end diff --git a/lib/weka/core/instances.rb b/lib/weka/core/instances.rb index 747e0a3..f098c51 100644 --- a/lib/weka/core/instances.rb +++ b/lib/weka/core/instances.rb @@ -62,9 +62,29 @@ def add_attributes(&block) self end - alias with_attributes add_attributes - alias instances_count num_instances - alias attributes_count num_attributes + alias with_attributes add_attributes + alias instances_count num_instances + alias attributes_count num_attributes + alias has_string_attribute? check_for_string_attributes + + ## Check if the instances has any attribute of the given type + # @param [String, Symbol, Integer] type type of the attribute to check + # String and Symbol argument are converted to corresponding type + # defined in Weka::Core::Attribute + # + # @example Passing String + # instances.has_attribute_type?('string') + # instances.has_attribute_type?('String') + # + # @example Passing Symbol + # instances.has_attribute_type?(:String) + # + # @example Passing Integer + # instances.has_attribute_type?(Attribute::STRING) + def has_attribute_type?(type) + type = map_attribute_type(type) unless type.is_a?(Integer) + check_for_attribute_type(type) + end def each if block_given? @@ -120,25 +140,25 @@ def to_c45(file) end def numeric(name, class_attribute: false) - attribute = Attribute.new(name.to_s) + attribute = Attribute.new_numeric(name) add_attribute(attribute) self.class_attribute = name if class_attribute end def nominal(name, values:, class_attribute: false) - attribute = Attribute.new(name.to_s, Array(values).map(&:to_s)) + attribute = Attribute.new_nominal(name, values) add_attribute(attribute) self.class_attribute = name if class_attribute end def string(name, class_attribute: false) - attribute = Attribute.new(name.to_s, []) + attribute = Attribute.new_string(name) add_attribute(attribute) self.class_attribute = name if class_attribute end def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false) - attribute = Attribute.new(name.to_s, format) + attribute = Attribute.new_date(name, format) add_attribute(attribute) self.class_attribute = name if class_attribute end @@ -226,9 +246,26 @@ def instance_from(instance_or_values, weight:) instance_or_values else data = internal_values_of(instance_or_values) + + # string attribute has unlimited range of possible values. + # Check the return index, if it is -1 then add the value to + # the attribute before creating the instance + data.map!.with_index do |value, index| + if value == -1 && attribute(index).string? + attribute(index).add_string_value(instance_or_values[index].to_s) + else + value + end + end + DenseInstance.new(data, weight: weight) end end + + def map_attribute_type(type) + return -1 unless Attribute::TYPES.include?(type.downcase.to_sym) + Attribute.const_get(type.upcase) + end end Java::WekaCore::Instances.__persistent__ = true diff --git a/spec/classifiers/evaluation_spec.rb b/spec/classifiers/evaluation_spec.rb index 2bb9f54..601b053 100644 --- a/spec/classifiers/evaluation_spec.rb +++ b/spec/classifiers/evaluation_spec.rb @@ -27,7 +27,8 @@ unclassified_count: :unclassified, unclassified_percentage: :pct_unclassified, weighted_f_measure: :weighted_fmeasure, - cumulative_margin_distribution: :toCumulativeMarginDistributionString + cumulative_margin_distribution: :toCumulativeMarginDistributionString, + confusion_matrix: :to_matrix_string }.each do |alias_method, method| it "defines the alias ##{alias_method} for ##{method}" do expect(subject.method(method)).to eq subject.method(alias_method) diff --git a/spec/core/attribute_spec.rb b/spec/core/attribute_spec.rb index 719b816..53ac527 100644 --- a/spec/core/attribute_spec.rb +++ b/spec/core/attribute_spec.rb @@ -1,12 +1,63 @@ require 'spec_helper' describe Weka::Core::Attribute do - let(:values) { %w(yes no) } - subject { Weka::Core::Attribute.new('class', values) } + let(:values) { %w(true false) } + let(:name) { 'name' } + let(:format) { 'yyyy-MM-dd HH:mm' } + + subject { Weka::Core::Attribute.new(name, values) } it { is_expected.to respond_to :values } it { is_expected.to respond_to :internal_value_of } + describe '.new_numeric' do + subject { Weka::Core::Attribute.new_numeric(name) } + + it 'returns a numeric Attribute' do + expect(subject.numeric?).to be true + end + + it 'returns an Attribute with the given name' do + expect(subject.name).to eq name + end + end + + describe '.new_nominal' do + subject { Weka::Core::Attribute.new_nominal(name, values) } + + it 'returns a nominal Attribute' do + expect(subject.nominal?).to be true + end + + it 'returns an Attribute with the given name' do + expect(subject.name).to eq name + end + end + + describe '.new_date' do + subject { Weka::Core::Attribute.new_date(name, format) } + + it 'returns a date Attribute' do + expect(subject.date?).to be true + end + + it 'returns an Attribute with the given name' do + expect(subject.name).to eq name + end + end + + describe '.new_string' do + subject { Weka::Core::Attribute.new_string(name) } + + it 'returns a string Attribute' do + expect(subject.string?).to be true + end + + it 'returns an Attribute with the given name' do + expect(subject.name).to eq name + end + end + describe '#values' do it 'returns an array of the values' do expect(subject.values).to eq values @@ -14,85 +65,171 @@ end describe '#internal_value_of' do - context 'a numeric attribute' do - let(:attribute) { Weka::Core::Attribute.new('numeric attribute') } + context 'for a numeric attribute' do + subject { Weka::Core::Attribute.new_numeric(name) } it 'returns the value as a float' do - expect(attribute.internal_value_of(3.5)).to eq 3.5 + expect(subject.internal_value_of(3.5)).to eq 3.5 end it 'returns the value as a float if given as string' do - expect(attribute.internal_value_of('3.5')).to eq 3.5 + expect(subject.internal_value_of('3.5')).to eq 3.5 end it 'returns NaN if the given value is Float::NAN' do - expect(attribute.internal_value_of(Float::NAN)).to be Float::NAN + expect(subject.internal_value_of(Float::NAN)).to be Float::NAN end it 'returns NaN if the given value is nil' do - expect(attribute.internal_value_of(nil)).to be Float::NAN + expect(subject.internal_value_of(nil)).to be Float::NAN end it 'returns NaN if the given value is "?"' do - expect(attribute.internal_value_of('?')).to be Float::NAN + expect(subject.internal_value_of('?')).to be Float::NAN end end - context 'a nominal attribute' do - let(:attribute) { Weka::Core::Attribute.new('class', %w(true false)) } + context 'for a nominal attribute' do + subject { Weka::Core::Attribute.new_nominal(name, values) } it 'returns the correct internal index' do - expect(attribute.internal_value_of('true')).to eq 0 - expect(attribute.internal_value_of('false')).to eq 1 + expect(subject.internal_value_of('true')).to eq 0 + expect(subject.internal_value_of('false')).to eq 1 end it 'returns the correct internal index as given as a non-String' do - expect(attribute.internal_value_of(true)).to eq 0 - expect(attribute.internal_value_of(false)).to eq 1 + expect(subject.internal_value_of(true)).to eq 0 + expect(subject.internal_value_of(false)).to eq 1 - expect(attribute.internal_value_of(:true)).to eq 0 - expect(attribute.internal_value_of(:false)).to eq 1 + expect(subject.internal_value_of(:true)).to eq 0 + expect(subject.internal_value_of(:false)).to eq 1 end it 'returns NaN if the given value is Float::NAN' do - expect(attribute.internal_value_of(Float::NAN)).to be Float::NAN + expect(subject.internal_value_of(Float::NAN)).to be Float::NAN end it 'returns NaN if the given value is nil' do - expect(attribute.internal_value_of(nil)).to be Float::NAN + expect(subject.internal_value_of(nil)).to be Float::NAN end it 'returns NaN if the given value is "?"' do - expect(attribute.internal_value_of('?')).to be Float::NAN + expect(subject.internal_value_of('?')).to be Float::NAN end end - context 'a data attribute' do - let(:attribute) { Weka::Core::Attribute.new('date', 'yyyy-MM-dd HH:mm') } + context 'for a data attribute' do let(:datetime) { '2015-12-24 11:11' } let(:unix_timestamp) { 1_450_955_460_000.0 } + subject { Weka::Core::Attribute.new_date(name, format) } + before do - allow(attribute) + allow(subject) .to receive(:parse_date) .with(datetime) .and_return(unix_timestamp) end it 'returns the right date timestamp value' do - expect(attribute.internal_value_of(datetime)).to eq unix_timestamp + expect(subject.internal_value_of(datetime)).to eq unix_timestamp end it 'returns NaN if the given value is Float::NAN' do - expect(attribute.internal_value_of(Float::NAN)).to be Float::NAN + expect(subject.internal_value_of(Float::NAN)).to be Float::NAN end it 'returns NaN if the given value is nil' do - expect(attribute.internal_value_of(nil)).to be Float::NAN + expect(subject.internal_value_of(nil)).to be Float::NAN end it 'returns NaN if the given value is "?"' do - expect(attribute.internal_value_of('?')).to be Float::NAN + expect(subject.internal_value_of('?')).to be Float::NAN + end + end + + context 'for a string attribute' do + let(:string_values) { %w(first_string second_string) } + let(:phantom_string_value) { 'i_do_not_exist' } + + subject do + attribute = Weka::Core::Attribute.new_string(name) + string_values.each { |value| attribute.add_string_value(value) } + attribute + end + + it 'returns the correct internal index' do + expect(subject.internal_value_of(string_values[0])).to eq 0 + expect(subject.internal_value_of(string_values[1])).to eq 1 + end + + it 'returns -1 as internal index for non-existent string values' do + expect(subject.internal_value_of(phantom_string_value)).to eq(-1) + end + + it 'returns the correct internal index as given as a non-String' do + expect(subject.internal_value_of(:first_string)).to eq 0 + expect(subject.internal_value_of(:second_string)).to eq 1 + end + + it 'returns -1 as internal index for non-existent non-String values' do + expect(subject.internal_value_of(:phantom_string_value)).to eq(-1) + end + + it 'returns NaN if the given value is Float::NAN' do + expect(subject.internal_value_of(Float::NAN)).to be Float::NAN + end + + it 'returns NaN if the given value is nil' do + expect(subject.internal_value_of(nil)).to be Float::NAN + end + + it 'returns NaN if the given value is "?"' do + expect(subject.internal_value_of('?')).to be Float::NAN + end + end + end + + describe '#type' do + context 'for a numeric attribute' do + subject { Weka::Core::Attribute.new_numeric(name) } + + it 'returns "numeric"' do + expect(subject.type).to eq 'numeric' + end + end + + context 'for a nominal attribute' do + subject { Weka::Core::Attribute.new_nominal(name, values) } + + it 'returns "nominal"' do + expect(subject.type).to eq 'nominal' + end + end + + context 'for a string attribute' do + subject { Weka::Core::Attribute.new_string(name) } + + it 'returns "string"' do + expect(subject.type).to eq 'string' + end + end + + context 'for a data attribute' do + let(:datetime) { '2015-12-24 11:11' } + let(:unix_timestamp) { 1_450_955_460_000.0 } + + subject { Weka::Core::Attribute.new_date(name, format) } + + before do + allow(subject) + .to receive(:parse_date) + .with(datetime) + .and_return(unix_timestamp) + end + + it 'returns "date"' do + expect(subject.type).to eq 'date' end end end diff --git a/spec/core/dense_instance_spec.rb b/spec/core/dense_instance_spec.rb index 966b362..995194d 100644 --- a/spec/core/dense_instance_spec.rb +++ b/spec/core/dense_instance_spec.rb @@ -26,7 +26,7 @@ describe 'instantiation' do describe 'with an Integer value' do - it 'creates a instance with only missing values' do + it 'creates an instance with only missing values' do values = Weka::Core::DenseInstance.new(2).values expect(values).to eq ['?', '?'] end @@ -65,6 +65,29 @@ expect(subject.to_a).to eq values end end + + context 'with string attribute' do + let(:values) do + ['overcast', + 'Wind increasing. A few clouds from time to time. High 32F. Winds WNW at 20 to 30 mph.', + 15.0, + 40.0, + 'TRUE', + 'no' + ] + end + + subject do + instances = load_instances('weather.string.arff') + instances.add_instance(values) + instances.class_attribute = :play + instances.instances.last + end + + it 'returns an Array with the values of the instance' do + expect(subject.to_a).to eq values + end + end end describe '#attributes' do diff --git a/spec/core/instances_spec.rb b/spec/core/instances_spec.rb index 5d7639b..35e4e0b 100644 --- a/spec/core/instances_spec.rb +++ b/spec/core/instances_spec.rb @@ -37,13 +37,14 @@ let(:instances) { described_class.new } { - numeric: :add_numeric_attribute, - string: :add_string_attribute, - nominal: :add_nominal_attribute, - date: :add_date_attribute, - add_attributes: :with_attributes, - instances_count: :num_instances, - attributes_count: :num_attributes + numeric: :add_numeric_attribute, + string: :add_string_attribute, + nominal: :add_nominal_attribute, + date: :add_date_attribute, + add_attributes: :with_attributes, + instances_count: :num_instances, + attributes_count: :num_attributes, + has_string_attribute?: :check_for_string_attributes }.each do |method, alias_method| it "defines the alias ##{alias_method} for ##{method}" do expect(instances.method(method)).to eq instances.method(alias_method) @@ -168,11 +169,18 @@ end end - xdescribe '#string' do + describe '#string' do it 'can be used to add a string attribute' do instances.string(name) expect(instances.attributes.first).to be_string end + + context 'with the class_attribute option' do + it 'defines the attribute as class attribute' do + instances.string(name, class_attribute: true) + expect(instances.class_attribute.name).to eq name + end + end end describe '#nominal' do @@ -211,7 +219,7 @@ end end - xdescribe '#string' do + describe '#string' do it 'can be used to add a string attribute' do instances.string(:attribute_name) expect(instances.attributes.first).to be_string @@ -486,7 +494,7 @@ let(:filter) { double('filter') } before { allow(filter).to receive(:filter).and_return(subject) } - it 'calls the given filter‘s #filter method' do + it 'calls the given filter’s #filter method' do expect(filter).to receive(:filter).once.with(subject) subject.apply_filter(filter) end @@ -496,7 +504,7 @@ let(:filter) { double('filter') } before { allow(filter).to receive(:filter).and_return(subject) } - it 'calls the given filters‘ #filter methods' do + it 'calls the given filters’s #filter methods' do expect(filter).to receive(:filter).twice.with(subject) subject.apply_filters(filter, filter) end @@ -545,4 +553,102 @@ end end end + + describe '#has_string_attribute?' do + context 'if no string attribute exists' do + it 'returns false' do + expect(subject.has_string_attribute?).to be false + end + end + + context 'if dataset has string attribute' do + subject { load_instances('weather.string.arff') } + + it 'returns true' do + expect(subject.has_string_attribute?).to be true + end + end + end + + describe '#has_attribute_type?' do + subject { load_instances('weather.string.arff') } + let(:type) { 'nominal' } + + it 'calls the underlying Java method .check_for_attribute_type' do + expect(subject) + .to receive(:check_for_attribute_type) + .with(subject.send(:map_attribute_type, type)) + + subject.has_attribute_type?(type) + end + + context 'when given String argument' do + Weka::Core::Attribute::TYPES.map(&:to_s).each do |type| + if type == 'date' + it 'returns false if the attribute type does not exist' do + expect(subject.has_attribute_type?(type)).to be false + end + else + it 'returns true if the attribute type exists' do + expect(subject.has_attribute_type?(type)).to be true + end + end + end + + it 'handles attribute type in uppercase' do + expect(subject.has_attribute_type?('STRING')).to be true + end + + it 'returns false for undefined attribute type' do + expect(subject.has_attribute_type?('I_DO_NOT_EXIST')).to be false + end + end + + context 'when given Symbol argument' do + Weka::Core::Attribute::TYPES.each do |type| + if type == :date + it 'returns false if the attribute type does not exist' do + expect(subject.has_attribute_type?(type)).to be false + end + else + it 'returns true if the attribute type exists' do + expect(subject.has_attribute_type?(type)).to be true + end + end + end + + it 'handles attribute type in uppercase' do + expect(subject.has_attribute_type?(:STRING)).to be true + end + + it 'returns false for undefined attribute type' do + expect(subject.has_attribute_type?(:I_DO_NOT_EXIST)).to be false + end + end + + context 'when given Integer argument' do + attribute_types = [ + Weka::Core::Attribute::NUMERIC, + Weka::Core::Attribute::NOMINAL, + Weka::Core::Attribute::STRING, + Weka::Core::Attribute::DATE + ] + + attribute_types.each do |type| + if type == Weka::Core::Attribute::DATE + it 'returns false if the attribute type does not exist' do + expect(subject.has_attribute_type?(type)).to be false + end + else + it 'returns true if the attribute type exists' do + expect(subject.has_attribute_type?(type)).to be true + end + end + end + + it 'returns false for undefined attribute type' do + expect(subject.has_attribute_type?(1000)).to be false + end + end + end end diff --git a/spec/support/resources/weather.string.arff b/spec/support/resources/weather.string.arff new file mode 100644 index 0000000..1dac96a --- /dev/null +++ b/spec/support/resources/weather.string.arff @@ -0,0 +1,25 @@ +@relation weather + +@attribute outlook {sunny, overcast, rainy} +@attribute description string +@attribute temperature real +@attribute humidity real +@attribute windy {TRUE, FALSE} +@attribute play {yes, no} + +@data +sunny,'Sunny. High near 40F. Winds WNW at 15 to 25 mph.',85,85,FALSE,no +sunny,'Sunny skies. High 39F. Winds WNW at 10 to 15 mph.',80,90,FALSE,no +overcast,'Partly cloudy. Low 27F. Winds NW at 5 to 10 mph.',83,86,FALSE,yes +rainy,'Clear skies. Low 27F. Winds WNW at 10 to 15 mph.',70,96,FALSE,yes +rainy,'Cloudy and damp with rain in the morning...then becoming partly cloudy. High 58F. Winds SW at 10 to 15 mph. Chance of rain 100%.',68,80,FALSE,yes +rainy,'Cloudy early with showers for the afternoon hours. High near 45F. Winds NW at 5 to 10 mph. Chance of rain 50%.',65,70,FALSE,no +overcast,'Cloudy. High 41F. Winds SE at 5 to 10 mph.',64,65,TRUE,yes +sunny,'Sunny skies. High 39F. Winds WNW at 10 to 15 mph.',72,95,FALSE,no +sunny,'Except for a few afternoon clouds, mainly sunny. High 34F. Winds WNW at 5 to 10 mph.',69,70,FALSE,yes +rainy,'Rain showers early mixing with snow showers later in the day. Temps nearly steady in the upper 30s. Winds WNW at 5 to 10 mph. Chance of precip 40%. Snow accumulations less than one inch.',75,80,FALSE,yes +overcast,'Partly cloudy skies in the morning will give way to cloudy skies during the afternoon. High 46F. Winds NW at 25 to 30 mph.',75,70,TRUE,yes +overcast,'Partly cloudy. Low 27F. Winds NW at 5 to 10 mph.',72,90,TRUE,yes +overcast,'Considerable clouds early. Some decrease in clouds late. Low 34F. Winds light and variable',81,75,FALSE,yes +rainy,'Overcast with rain showers at times. Low 36F. Winds light and variable. Chance of rain 60%.',71,91,TRUE,no +