Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions lib/ruby-statistics/statistical_test/chi_squared_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module RubyStatistics
module StatisticalTest
class ChiSquaredTest

require 'matrix'

def self.chi_statistic(expected, observed)
# If the expected is a number, we asumme that all expected observations
# has the same probability to occur, hence we expect to see the same number
Expand Down Expand Up @@ -37,6 +40,62 @@ def self.goodness_of_fit(alpha, expected, observed)
alternative: p_value <= alpha,
confidence_level: 1 - alpha }
end

# The following three functions serve to calculate a test of independence for contingency
# tables (short: ct) of the type
#
# A B
# X 20 18
# Y 7 35
#
# They have been tested using 2x2 and 3x3 tables. Tables are implemented as type Matrix.
#
def self.test_of_independence(alpha, observed_matrix)
expected_matrix = calculate_expected_matrix(observed_matrix)
df = (observed_matrix.row_size - 1) * (observed_matrix.column_size - 1)
chi_score = chi_statistic_matrix(observed_matrix, expected_matrix)
probability = Distribution::ChiSquared.new(df).cumulative_function(chi_score)
p_value = 1.0 - probability

{
chi_score: chi_score,
df: df,
probability: probability,
p_value: p_value,
alpha: alpha,
null: alpha < p_value,
alternative: p_value <= alpha,
confidence_level: 1 - alpha,
expected: expected_matrix
}
end

# For a contingency table of observed values, calculate the expected values
def self.calculate_expected_matrix(observed_matrix)
row_sums = observed_matrix.row_vectors.map { |row| row.to_a.sum.to_r }
col_sums = observed_matrix.column_vectors.map { |col| col.to_a.sum.to_r }
total_sum = row_sums.sum

# create a mutable array from the Matrix of observed values
# so we have a 'template' for our Matrx of expected values
expected = observed_matrix.to_a
# calculate the expected values
observed_matrix.each_with_index do |i, row, col|
expected[row][col] = (row_sums[row] * col_sums[col]) / total_sum
end
Matrix.rows(expected)
end

def self.chi_statistic_matrix(observed_matrix, expected_matrix)
sum = 0.0
observed_matrix.each_with_index do |i, row, col|
sum += (observed_matrix[row, col] - expected_matrix[row, col])**2 / expected_matrix[row, col]
end
sum
end

private_class_method :chi_statistic_matrix

end
end
end
1 change: 1 addition & 0 deletions ruby-statistics.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "grb", '~> 0.4.1', '>= 0.4.1'
spec.add_development_dependency 'byebug', '>= 9.1.0'
spec.add_development_dependency 'pry'
spec.add_development_dependency 'matrix'
end
38 changes: 38 additions & 0 deletions spec/ruby-statistics/statistical_test/chi_squared_test_spec.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'spec_helper'
require 'matrix'

describe RubyStatistics::StatisticalTest::ChiSquaredTest do
describe '.chi_statistic' do
Expand Down Expand Up @@ -82,4 +83,41 @@
expect(result[:alternative]).to be false
end
end

describe '.calculate_expected_matrix' do

it 'calculate expected values for a 2*3 contingency table of observed values' do

observed = Matrix[[388,51692],[119,45633],[271,40040]]
result = described_class.calculate_expected_matrix(observed)

expect(result.map(&:to_i)).to eq(Matrix[[(40518240/138143), (7153969200/138143)], [(35595056/138143), (6284723480/138143)], [(31361958/138143), (5537320515/138143)]])

end

end

describe '.test_of_independence' do

it 'calculate test of independence for a 2*3 contingency table' do

observed = Matrix[[388,51692],[119,45633],[271,40040]]
alpha = 0.05
result = {}

expect do
result = described_class.test_of_independence(alpha, observed)
end.not_to raise_error

expect(result[:chi_score].round(4)).to eq(114.3600)
expect(result[:p_value]).to eq(0.0)
expect(result[:df]).to eq(2)
expect(result[:null]).to be false
expect(result[:alternative]).to be true

end

end


end