OSDN Git Service

Applied r247 changes for the test cases (spliting hte file).
[shogi-server/shogi-server.git] / mk_rate
diff --git a/mk_rate b/mk_rate
index bc6186d..ae8e21c 100755 (executable)
--- a/mk_rate
+++ b/mk_rate
@@ -1,7 +1,7 @@
 #!/usr/bin/ruby
 ## $Id$
 
-## Copyright (C) 2006 Daigo Moriwaki <daigo at debian dot org>
+## Copyright (C) 2006-2008 Daigo Moriwaki <daigo at debian dot org>
 ##
 ## This program is free software; you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
 #
 # Sample:
 #   $ ./mk_rate . > players.yaml
+#   $ ./mk_rate . && ./mk_rate . > players.yaml
 #
 # The conditions that games and players are rated as following:
 #   * Rated games, which were played by both rated players.
 #   * Rated players, who logged in the server with a name followed by a trip:
 #     "name,trip".
-#   * (Rated) players, who played more than $GAMES_LIMIT [ten] (rated) games. 
+#   * (Rated) players, who played more than $GAMES_LIMIT [15] (rated) games. 
 #
 #
 # PREREQUIRE
 # ==========
 #
-#   Ruby bindings for the GNU Scientific Library (GSL) is required.
-#   You can download it from  http://rb-gsl.rubyforge.org/
-#   Or, if you use Debian, 
-#     $ sudo aptitude install libgsl-ruby1.8
+# Sample Commands to isntall prerequires will work for Debian.
+#
+# * Rubygems
+#   $ sudo aptitude install rubygems
+#
+# * Ruby bindings for the GNU Scientific Library (GSL)
+#   $ sudo aptitude install libgsl-ruby1.8
+#   Or, download it from  http://rb-gsl.rubyforge.org/ .
+#
+# * RGL: Ruby Graph Library
+#   $ sudo gem install rgl
+#   Or, download it from http://rubyforge.org/projects/rgl/ .
 #
 
 require 'yaml'
 require 'time'
+require 'getoptlong'
 require 'gsl'
+require 'rubygems'
+require 'rgl/adjacency'
+require 'rgl/connected_components'
 
 #################################################
 # Constants
 #
 
 # Count out players who play less games than $GAMES_LIMIT
-$GAMES_LIMIT = $DEBUG ? 0 : 10
+$GAMES_LIMIT = $DEBUG ? 0 : 15
 WIN_MARK  = "win"
 LOSS_MARK = "lose"
 DRAW_MARK = "draw"
@@ -123,7 +136,7 @@ class Rating
     @record = Record.new
     @n = win_loss_matrix
     case @n
-    when GSL::Matrix
+    when GSL::Matrix, GSL::Matrix::Int
       @size = @n.size1
     when ::Matrix
       @size = @n.row_size
@@ -199,9 +212,9 @@ class Rating
   end
 
   ##
-  # The initial value of the rate, which is of very importance for Newton method.
-  # This is based on my huristics; the higher the win probablity of a player is, 
-  # the greater points he takes.
+  # The initial value of the rate, which is of very importance for Newton
+  # method.  This is based on my huristics; the higher the win probablity of
+  # a player is, the greater points he takes.
   #
   def initial_rate
     possibility = 
@@ -270,7 +283,8 @@ class Rating
       $stderr.puts "f: %s -> %f" % [f.to_a.inspect, f.nrm2] if $DEBUG
 
       # GSL::Linalg::LU.solve or GSL::Linalg::HH.solve would be available instead.
-      a = GSL::Linalg::SV.solve(j, f)
+      #a = GSL::Linalg::HH.solve(j, f)
+      a, = GSL::MultiFit::linear(j, f)
       a = self.class.average(a)
       # $stderr.puts "a: %s -> %f" % [a.to_a.inspect, a.nrm2] if $DEBUG
       
@@ -338,40 +352,207 @@ class Rating
   end
 end
 
-
-
 #################################################
-# Main methods
+# Encapsulate a pair of keys and win loss matrix.
+#   - keys is an array of player IDs; [gps+123, foo+234, ...]
+#   - matrix holds games # where player i (row index) beats player j (column index).
+#     The row and column indexes match with the keys.
 #
+# This object should be immutable. If an internal state is being modified, a
+# new object is always returned.
+#
+class WinLossMatrix
+
+  ###############
+  # Class methods
+  #  
 
-def mk_win_loss_matrix(players)
-  keys = players.keys.sort.reject do |k|
-    players[k].values.inject(0) {|sum, v| sum + v[0] + v[1]} < $GAMES_LIMIT
+  def self.mk_matrix(players)
+    keys = players.keys.sort
+    size = keys.size
+    matrix =
+      GSL::Matrix[*
+      ((0...size).collect do |k|
+        p1 = keys[k]
+        p1_hash = players[p1]
+        ((0...size).collect do |j|
+          if k == j
+            0
+          else
+            p2 = keys[j]
+            v = p1_hash[p2] || Vector[0,0]
+            v[0]
+          end
+        end)
+      end)]
+    return WinLossMatrix.new(keys, matrix)
   end
 
-  size = keys.size
+  def self.mk_win_loss_matrix(players)
+    obj = mk_matrix(players)
+    return obj.filter
+  end
 
-  matrix =
-    GSL::Matrix[*
-    ((0...size).collect do |k|
-    ((0...size).collect do |j|
-      if k == j
-        0
-      else
-        v = players[keys[k]][keys[j]]
-        v[0]
+  ##################
+  # Instance methods
+  #
+
+  # an array of player IDs; [gps+123, foo+234, ...]
+  attr_reader :keys
+
+  # matrix holds games # where player i (row index) beats player j (column index).
+  # The row and column indexes match with the keys.
+  attr_reader :matrix
+
+  def initialize(keys, matrix)
+    @keys   = keys
+    @matrix = matrix
+  end
+
+  ##
+  # Returns the size of the keys/matrix
+  #
+  def size
+    if @keys
+      @keys.size
+    else
+      nil
+    end
+  end
+
+  ##
+  # Removes a delete_index'th player and returns a new object.
+  #
+  def delete_row(delete_index)
+    copied_cols = []
+    (0...size).each do |i|
+      next if i == delete_index
+      row = @matrix.row(i).clone
+      row.delete_at(delete_index)
+      copied_cols << row
+    end
+    if copied_cols.size == 0
+      new_matrix = GSL::Matrix.new
+    else
+      new_matrix = GSL::Matrix[*copied_cols]
+    end
+    new_keys = @keys.clone
+    new_keys.delete_at(delete_index)
+    return WinLossMatrix.new(new_keys, new_matrix)
+  end
+
+  ##
+  # Removes players in a rows; [1,3,5]
+  #
+  def delete_rows(rows)
+    obj = self
+    rows.sort.reverse.each do |index|
+      obj = obj.delete_row(index)
+    end
+    obj
+  end
+
+  ##
+  # Removes players who do not pass a criteria to be rated, and returns a
+  # new object.
+  # 
+  def filter
+    $stderr.puts @keys.inspect if $DEBUG
+    $stderr.puts @matrix.inspect if $DEBUG
+    delete = []  
+    (0...size).each do |i|
+      row = @matrix.row(i)
+      col = @matrix.col(i)
+      win  = row.sum
+      loss = col.sum
+      if win < 1 || loss < 1 || win + loss < $GAMES_LIMIT
+        delete << i
       end
-    end)
-    end)]
-  
-  return matrix, keys
+    end
+
+    # The recursion ends if there is nothing to delete
+    return self if delete.empty?
+
+    new_obj = delete_rows(delete)
+    new_obj.filter
+  end
+
+  ##
+  # Cuts self into connecting groups such as each player in a group has at least
+  # one game with other players in the group. Returns them as an array.
+  #
+  def connected_subsets
+    g = RGL::AdjacencyGraph.new
+    (0...size).each do |k|
+      (0...size).each do |i|
+        next if k == i
+        if @matrix[k,i] > 0
+          g.add_edge(k,i)
+        end
+      end
+    end
+
+    subsets = []
+    g.each_connected_component do |c|
+      new_keys = []      
+      c.each do |v|
+        new_keys << keys[v.to_s.to_i]
+      end
+      subsets << new_keys
+    end
+
+    subsets = subsets.sort {|a,b| b.size <=> a.size}
+
+    result = subsets.collect do |keys|
+      matrix =
+        GSL::Matrix[*
+        ((0...keys.size).collect do |k|
+          p1 = @keys.index(keys[k])
+          ((0...keys.size).collect do |j|
+            if k == j
+              0
+            else
+              p2 = @keys.index(keys[j])
+              @matrix[p1,p2]
+            end
+          end)
+        end)]
+      WinLossMatrix.new(keys, matrix)
+    end
+
+    return result
+  end
+
+  def to_s
+    "size : #{@keys.size}" + "\n" +
+    @keys.inspect + "\n" + 
+    @matrix.inspect
+  end
+
+end
+
+
+#################################################
+# Main methods
+#
+
+# Half-life effect
+# After NHAFE_LIFE days value will get half.
+# 0.693 is constant, where exp(0.693) ~ 0.5
+def half_life(days)
+  if days < $options["half-life-ignore"]
+    return 1.0
+  else
+    Math::exp(-0.693/$options["half-life"]*(days-$options["half-life-ignore"]))
+  end
 end
 
-def _add_win_loss(winner, loser)
+def _add_win_loss(winner, loser, time)
+  how_long_days = (Time.now - time)/(3600*24)
   $players[winner] ||= Hash.new { GSL::Vector[0,0] }
   $players[loser]  ||= Hash.new { GSL::Vector[0,0] }
-  $players[winner][loser] += GSL::Vector[1,0]
-  $players[loser][winner] += GSL::Vector[0,1]
+  $players[winner][loser] += GSL::Vector[1.0*half_life(how_long_days),0]
+  $players[loser][winner] += GSL::Vector[0,1.0*half_life(how_long_days)]
 end
 
 def _add_time(player, time)
@@ -380,9 +561,9 @@ end
 
 def add(black_mark, black_name, white_name, white_mark, time)
   if black_mark == WIN_MARK && white_mark == LOSS_MARK
-    _add_win_loss(black_name, white_name)
+    _add_win_loss(black_name, white_name, time)
   elsif black_mark == LOSS_MARK && white_mark == WIN_MARK
-    _add_win_loss(white_name, black_name)
+    _add_win_loss(white_name, black_name, time)
   elsif black_mark == DRAW_MARK && white_mark == DRAW_MARK
     return
   else
@@ -406,7 +587,8 @@ def grep(file)
   if /^N\-(.*)$/ =~ str then white_name = $1.strip end
 
   if /^'summary:(.*)$/ =~ str
-    dummy, p1, p2 = $1.split(":").map {|a| a.strip}    
+    state, p1, p2 = $1.split(":").map {|a| a.strip}    
+    return if state == "abnormal"
     p1_name, p1_mark = p1.split(" ")
     p2_name, p2_mark = p2.split(" ")
     if p1_name == black_name
@@ -416,7 +598,7 @@ def grep(file)
       black_name, black_mark = p2_name, p2_mark
       white_name, white_mark = p1_name, p1_mark
     else
-      raise "Never reach!: #{black} #{white} #{p1} #{p2}"
+      raise "Never reach!: #{black} #{white} #{p3} #{p2}"
     end
   end
   if /^'\$END_TIME:(.*)$/ =~ str
@@ -439,30 +621,116 @@ USAGE: #{$0} dir [...]
   exit 1
 end
 
+def validate(yaml)
+  yaml["players"].each do |group_key, group|
+    group.each do |player_key, player|
+      rate = player['rate']
+      next unless rate
+      if rate > 10000 || rate < -10000
+        return false
+      end
+    end
+  end
+  return true
+end
+
+def usage(io)
+    io.puts <<EOF
+USAGE: #{$0} [options] DIR..
+  DIR                where CSA files are looked up recursively
+OPTOINS:
+  --half-life        n [days] (default 60)
+  --half-life-ignore m [days] (default  7)
+                     after m days, half-life effect works
+  --help             show this message
+EOF
+end
+
 def main
-  usage if ARGV.empty?
+  $options = Hash::new
+  parser = GetoptLong.new(
+    ["--half-life",          GetoptLong::REQUIRED_ARGUMENT],
+    ["--half-life-ignore",   GetoptLong::REQUIRED_ARGUMENT],
+    ["--help", "-h",         GetoptLong::NO_ARGUMENT])
+  parser.quiet = true
+  begin
+    parser.each_option do |name, arg|
+      name.sub!(/^--/, '')
+      $options[name] = arg.dup
+    end
+  rescue
+    usage($stderr)
+    raise parser.error_message
+  end
+  if $options["help"]
+    usage($stdout) 
+    exit 0
+  end
+  $options["half-life"] ||= 60
+  $options["half-life"] = $options["half-life"].to_i
+  $options["half-life-ignore"] ||= 7
+  $options["half-life-ignore"] = $options["half-life-ignore"].to_i
+
   while dir = ARGV.shift do
     Dir.glob( File.join(dir, "**", "*.csa") ) {|f| grep(f)}
   end
 
-  win_loss_matrix, keys = mk_win_loss_matrix($players)
-  $stderr.puts keys.inspect if $DEBUG
-  $stderr.puts win_loss_matrix.inspect if $DEBUG
-  rating = Rating.new(win_loss_matrix)
-  rating.rating
-  rating.average!(Rating::AVERAGE_RATE)
-  rating.integer!
-
-  yaml = {}
-  keys.each_with_index do |p, i| # player_id, index#
-    win_loss = $players[p].values.inject(GSL::Vector[0,0]) {|sum, v| sum + v}
-    win = win_loss_matrix
-    yaml[p] = 
-      { 'name' => p.split("+")[0],
-        'rate' => rating.rate[i],
-        'last_modified' => $players_time[p].dup,
-        'win'  => win_loss[0],
-        'loss' => win_loss[1]}
+  yaml = {} 
+  yaml["players"] = {}
+  rating_group = 0
+  if $players.size > 0
+    obj = WinLossMatrix::mk_win_loss_matrix($players)
+    obj.connected_subsets.each do |win_loss_matrix|
+      yaml["players"][rating_group] = {}
+
+      rating = Rating.new(win_loss_matrix.matrix)
+      rating.rating
+      rating.average!(Rating::AVERAGE_RATE)
+      rating.integer!
+
+      win_loss_matrix.keys.each_with_index do |p, i| # player_id, index#
+        win  = win_loss_matrix.matrix.row(i).sum
+        loss = win_loss_matrix.matrix.col(i).sum
+
+        yaml["players"][rating_group][p] = 
+          { 'name' => p.split("+")[0],
+            'rating_group' => rating_group,
+            'rate' => rating.rate[i],
+            'last_modified' => $players_time[p].dup,
+            'win'  => win,
+            'loss' => loss}
+      end
+      rating_group += 1
+    end
+  end
+  rating_group -= 1
+  non_rated_group = 999 # large enough
+  yaml["players"][non_rated_group] = {}
+  $players.each_key do |id|
+    # skip players who have already been rated
+    found = false
+    (0..rating_group).each do |i|
+       found = true if yaml["players"][i][id]
+       break if found
+    end
+    next if found
+
+    v = GSL::Vector[0, 0]
+    $players[id].each_value {|value| v += value}
+    next if v[0] < 1 && v[1] < 1
+
+    yaml["players"][non_rated_group][id] =
+      { 'name' => id.split("+")[0],
+        'rating_group' => non_rated_group,
+        'rate' => 0,
+        'last_modified' => $players_time[id].dup,
+        'win'  => v[0],
+        'loss' => v[1]}
+  end
+  unless validate(yaml)
+    $stderr.puts "Aborted. It did not result in valid ratings."
+    $stderr.puts yaml.to_yaml if $DEBUG
+    exit 10
   end
   puts yaml.to_yaml
 end