When there were too few games to rate players (i.e. no

[shogi-server/shogi-server.git] / mk_rate
diff --git a/mk_rate b/mk_rate

index cef725f..396d237 100755 (executable)
--- a/mk_rate
+++ b/mk_rate
@@ -30,41 +30,96 @@
  #     "name,trip".
  #   * (Rated) players, who played more than $GAMES_LIMIT [ten] (rated) games. 
  #
+#
+# PREREQUIRE
+# ==========
+#
+# Sample Commands to isntall prerequires will work for Debian.
+#
+# * Rubygems
+#   $ sudo aptitude install rubygems
+#
+# * Ruby bindings for the GNU Scientific Library (GSL)
+#   $ sudo aptitude install libgsl-ruby1.8
+#   Or, download it from  http://rb-gsl.rubyforge.org/ .
+#
+# * RGL: Ruby Graph Library
+#   $ sudo gem install rgl
+#   Or, download it from http://rubyforge.org/projects/rgl/ .
+#
  
  require 'yaml'
  require 'time'
  require 'gsl'
+require 'rubygems'
+require 'rgl/adjacency'
+require 'rgl/connected_components'
  
  #################################################
  # Constants
  #
+
+# Count out players who play less games than $GAMES_LIMIT
  $GAMES_LIMIT = $DEBUG ? 0 : 10
  WIN_MARK  = "win"
  LOSS_MARK = "lose"
+DRAW_MARK = "draw"
  
+# Holds players
  $players = Hash.new
+# Holds the last time when a player gamed
  $players_time = Hash.new { Time.at(0) }
  
  
  #################################################
+# Keeps the value of the lowest key
+#
+class Record
+  def initialize
+    @lowest = []
+  end
+
+  def set(key, value)
+    if @lowest.empty? || key < @lowest[0]
+      @lowest = [key, value]
+    end
+  end
+
+  def get
+    if @lowest.empty?
+      nil
+    else
+      @lowest[1]
+    end
+  end
+end
+
+#################################################
  # Calculates rates of every player from a Win Loss GSL::Matrix
  #
  class Rating
    include Math
  
-  # The model of the win possibility is 1/(1 + 10^(-d/400))
-  # The equation in this class is 1/(1 + e^(-Kd))
-  # So, K should be like this.
+  # The model of the win possibility is 1/(1 + 10^(-d/400)).
+  # The equation in this class is 1/(1 + e^(-Kd)).
+  # So, K should be calculated like this.
    K = Math.log(10.0) / 400.0
    
    # Convergence limit to stop Newton method.
    ERROR_LIMIT = 1.0e-3
+  # Stop Newton method after this iterations.
+  COUNT_MAX = 500
  
    # Average rate among the players
    AVERAGE_RATE = 1000
+
    
    ###############
    # Class methods
+  #  
+  
+  ##
+  # Calcurates the average of the vector.
    #
    def Rating.average(vector, mean=0.0)
      sum = Array(vector).inject(0.0) {|sum, n| sum + n}
@@ -76,17 +131,17 @@ class Rating
    # Instance methods
    #
    def initialize(win_loss_matrix)
+    @record = Record.new
      @n = win_loss_matrix
      case @n
-    when GSL::Matrix
+    when GSL::Matrix, GSL::Matrix::Int
        @size = @n.size1
      when ::Matrix
        @size = @n.row_size
      else
        raise ArgumentError
      end
-    # 0 is the initial value
-    @rate = initial_rate
+    initial_rate
    end
    attr_reader :rate, :n
  
@@ -122,20 +177,19 @@ class Rating
    end
  
    ##
-  #         / f0/R0 f0/R1 f0/R2 ... \
-  # fk/Rj = | f1/R0 f1/R1 f1/R2 ... |
-  #         \ f2/R0 f2/R1 f2/R2 ... /
-  def d_funk(k,j)
+  #           / f0/R0 f0/R1 f0/R2 ... \
+  # dfk/dRj = | f1/R0 f1/R1 f1/R2 ... |
+  #           \ f2/R0 f2/R1 f2/R2 ... /
+  def d_func(k,j)
      sum = 0.0
      if k == j
        each_player do |i|
          next if i == k
          sum += win_rate(i,k) * win_rate(k,i) * (@n[k,i] + @n[i,k])
-        sum *= -2.0
        end
+      sum *= -2.0
      else # k != j
-      sum = win_rate(j,k) * win_rate(k,j) * (@n[k,j] + @n[j,k])
-      sum *= 2.0
+      sum = 2.0 * win_rate(j,k) * win_rate(k,j) * (@n[k,j] + @n[j,k])
      end
      sum
    end
@@ -149,7 +203,7 @@ class Rating
      GSL::Matrix[*
        (0...@size).collect do |k|
          (0...@size).collect do |j|
-          d_funk(k,j)
+          d_func(k,j)
          end
        end
      ]
@@ -157,76 +211,100 @@ class Rating
  
    ##
    # The initial value of the rate, which is of very importance for Newton method.
-  # This is based on my huristics. 
+  # This is based on my huristics; the higher the win probablity of a player is, 
+  # the greater points he takes.
    #
    def initial_rate
      possibility = 
        player_vector do |k|
-        v = GSL::Vector[0.0, 0.0]
+        v = GSL::Vector[0, 0]
          each_player do |i|
            next if k == i
            v += GSL::Vector[@n[k,i], @n[i,k]]
          end
-        v[0] + v[1] == 0 ? 0.001 : v[0] / (v[0] + v[1])
+        v.nrm2 < 1 ? 0 : v[0] / (v[0] + v[1])
        end
      rank = possibility.sort_index
-    player_vector do |k|
-      K*500 * (rank[k]+1) / (@size)
+    @rate = player_vector do |k|
+      K*500 * (rank[k]+1) / @size
      end
+    average!
    end
  
    ##
-  # Main method to calculate ratings.
+  # Resets @rate as the higher the current win probablity of a player is, 
+  # the greater points he takes. 
+  #
+  def initial_rate2
+    @rate = @record.get || @rate
+    rank = @rate.sort_index
+    @rate = player_vector do |k|
+      K*@count*1.5 * (rank[k]+1) / @size
+    end
+    average!
+  end
+
+  # mu is the deaccelrating parameter in Deaccelerated Newton method
+  def deaccelrate(mu, old_rate, a, old_f_nrm2)
+    @rate = old_rate - a * mu
+    if func_vector.nrm2 < (1 - mu / 4.0 ) * old_f_nrm2 then
+      return
+    end
+    if mu < 1e-4
+      @record.set(func_vector.nrm2, @rate)
+      initial_rate2
+      return
+    end
+    $stderr.puts "mu: %f " % [mu] if $DEBUG
+    deaccelrate(mu*0.5, old_rate, a, old_f_nrm2)
+  end
+
+  ##
+  # Main process to calculate ratings.
    #
    def rating
      # Counter to stop the process. 
      # Calulation in Newton method may fall in an infinite loop
-    count = 0
-    # Mu parameter in Deaccelerated Newton method
-    mu = 1
+    @count = 0
  
      # Main loop
      begin
        # Solve the equation: 
        #   J*a=f
        #   @rate_(n+1) = @rate_(n) - a
+      #
+      # f.nrm2 should approach to zero.
        f = func_vector
        j = j_matrix
  
-      # f.nrm2 should approach to zero.
+      # $stderr.puts "j: %s" % [j.inspect] if $DEBUG
        $stderr.puts "f: %s -> %f" % [f.to_a.inspect, f.nrm2] if $DEBUG
  
-      # LU is not available because J may not be a normal matrix.
-      # a = GSL::Linalg::LU.solve(j, f)
+      # GSL::Linalg::LU.solve or GSL::Linalg::HH.solve would be available instead.
        a = GSL::Linalg::SV.solve(j, f)
        a = self.class.average(a)
+      # $stderr.puts "a: %s -> %f" % [a.to_a.inspect, a.nrm2] if $DEBUG
        
        # Deaccelerated Newton method
-      if mu == 1
-        old_rate = GSL::Vector.alloc(@rate)
-        old_f    = GSL::Vector.alloc(f)
-        @rate = old_rate - a * mu
-      end
-      if func_vector.nrm2 < (1.0 - mu / 4.0) * old_f.nrm2
-        mu = 1
-        break
-      else
-        mu *= 0.5
-        @rate = old_rate - a * mu
-      end
+      # GSL::Vector object should be immutable.
+      old_rate   = @rate
+      old_f      = f
+      old_f_nrm2 = old_f.nrm2
+      deaccelrate(1.0, old_rate, a, old_f_nrm2)
+      @record.set(func_vector.nrm2, @rate)
  
        $stderr.printf "|error| : %5.2e\n", a.nrm2 if $DEBUG
  
-      count += 1
-      if count > 300
+      @count += 1
+      if @count > COUNT_MAX
          $stderr.puts "Values seem to oscillate. Stopped the process."
-        $stderr.puts "f: %s -> %f" % [f.to_a.inspect, f.nrm2]
+        $stderr.puts "f: %s -> %f" % [func_vector.to_a.inspect, func_vector.nrm2]
          break
        end
  
      end while (a.nrm2 > ERROR_LIMIT * @rate.nrm2)
-    #end while ( !(0..0.01).include?(func_vector.nrm2) )
      
+    @rate = @record.get
      $stderr.puts "resolved f: %s -> %f" %
        [func_vector.to_a.inspect, func_vector.nrm2] if $DEBUG
  
@@ -235,6 +313,9 @@ class Rating
      self
    end
  
+  ##
+  # Make the values of @rate finite.
+  #
    def finite!
      @rate = @rate.collect do |a|
        if a.infinite?
@@ -245,10 +326,16 @@ class Rating
      end
    end
  
+  ##
+  # Flatten the values of @rate.
+  #
    def average!(mean=0.0)
      @rate = self.class.average(@rate, mean)
    end
  
+  ##
+  # Make the values of @rate integer.
+  #
    def integer!
      @rate = @rate.collect do |a|
        if a.finite?
@@ -262,40 +349,203 @@ class Rating
    end
  end
  
-
-
  #################################################
-# Main methods
+# Encapsulate a pair of keys and win loss matrix.
+#   - keys is an array of player IDs; [gps+123, foo+234, ...]
+#   - matrix holds games # where player i (row index) beats player j (column index).
+#     The row and column indexes match with the keys.
+#
+# This object should be immutable. If an internal state is being modified, a
+# new object is always returned.
  #
+class WinLossMatrix
+
+  ###############
+  # Class methods
+  #  
+
+  def self.mk_matrix(players)
+    keys = players.keys.sort
+    size = keys.size
+    matrix =
+      Matrix[*
+      ((0...size).collect do |k|
+        p1 = keys[k]
+        p1_hash = players[p1]
+        ((0...size).collect do |j|
+          if k == j
+            0
+          else
+            p2 = keys[j]
+            v = p1_hash[p2] || Vector[0,0]
+            v[0]
+          end
+        end)
+      end)]
+    return WinLossMatrix.new(keys, matrix)
+  end
  
-def mk_win_loss_matrix(players)
-  keys = players.keys.sort.reject do |k|
-    players[k].values.inject(0) {|sum, v| sum + v[0] + v[1]} < $GAMES_LIMIT
+  def self.mk_win_loss_matrix(players)
+    obj = mk_matrix(players)
+    return obj.filter
    end
  
-  size = keys.size
+  ##################
+  # Instance methods
+  #
  
-  matrix =
-    GSL::Matrix[*
-    ((0...size).collect do |k|
-    ((0...size).collect do |j|
-      if k == j
-        0
-      else
-        v = players[keys[k]][keys[j]]
-        v[0]
+  # an array of player IDs; [gps+123, foo+234, ...]
+  attr_reader :keys
+
+  # matrix holds games # where player i (row index) beats player j (column index).
+  # The row and column indexes match with the keys.
+  attr_reader :matrix
+
+  def initialize(keys, matrix)
+    @keys   = keys
+    @matrix = matrix
+  end
+
+  ##
+  # Returns the size of the keys/matrix
+  #
+  def size
+    if @keys
+      @keys.size
+    else
+      nil
+    end
+  end
+
+  ##
+  # Removes a delete_index'th player and returns a new object.
+  #
+  def delete_row(delete_index)
+    copied_cols = []
+    (0...size).each do |i|
+      next if i == delete_index
+      row = @matrix.get_row(i)  # get_row returns a copy of the row
+      row.delete_at(delete_index)
+      copied_cols << row
+    end
+    new_matrix = Matrix[*copied_cols]
+    new_keys = @keys.clone
+    new_keys.delete_at(delete_index)
+    return WinLossMatrix.new(new_keys, new_matrix)
+  end
+
+  ##
+  # Removes players in a rows; [1,3,5]
+  #
+  def delete_rows(rows)
+    obj = self
+    rows.sort.reverse.each do |index|
+      obj = obj.delete_row(index)
+    end
+    obj
+  end
+
+  ##
+  # Removes players who do not pass a criteria to be rated, and returns a new object.
+  # 
+  def filter
+    $stderr.puts @keys.inspect if $DEBUG
+    $stderr.puts @matrix.inspect if $DEBUG
+    delete = []  
+    (0...size).each do |i|
+      row = @matrix.row(i)
+      col = @matrix.col(i)
+      win  = row.sum
+      loss = col.sum
+      if win < 1 || loss < 1 || win + loss < $GAMES_LIMIT
+        delete << i
        end
-    end)
-    end)]
-  
-  return matrix, keys
+    end
+
+    # The recursion ends if there is nothing to delete
+    return self if delete.empty?
+
+    new_obj = delete_rows(delete)
+    new_obj.filter
+  end
+
+  ##
+  # Cuts self into connecting groups such as each player in a group has at least
+  # one game with other players in the group. Returns them as an array.
+  #
+  def connected_subsets
+    g = RGL::AdjacencyGraph.new
+    (0...size).each do |k|
+      (0...size).each do |i|
+        next if k == i
+        if @matrix[k,i] > 0
+          g.add_edge(k,i)
+        end
+      end
+    end
+
+    subsets = []
+    g.each_connected_component do |c|
+      new_keys = []      
+      c.each do |v|
+        new_keys << keys[v.to_s.to_i]
+      end
+      subsets << new_keys
+    end
+
+    subsets = subsets.sort {|a,b| b.size <=> a.size}
+
+    result = subsets.collect do |keys|
+      matrix =
+        Matrix[*
+        ((0...keys.size).collect do |k|
+          p1 = @keys.index(keys[k])
+          ((0...keys.size).collect do |j|
+            if k == j
+              0
+            else
+              p2 = @keys.index(keys[j])
+              @matrix[p1][p2]
+            end
+          end)
+        end)]
+      WinLossMatrix.new(keys, matrix)
+    end
+
+    return result
+  end
+
+  def to_s
+    "size : #{@keys.size}" + "\n" +
+    @keys.inspect + "\n" + 
+    @matrix.inspect
+  end
+
+end
+
+
+#################################################
+# Main methods
+#
+
+# Half-life effect
+# After NHAFE_LIFE days value will get half.
+# 0.693 is constant, where exp(0.693) ~ 0.5
+NHALF_LIFE=60
+def half_life(days)
+  if days < 7
+    return 1.0
+  else
+    Math::exp(-0.693/NHALF_LIFE*(days-7))
+  end
  end
  
-def _add_win_loss(winner, loser)
+def _add_win_loss(winner, loser, time)
+  how_long_days = (Time.now - time)/(3600*24)
    $players[winner] ||= Hash.new { GSL::Vector[0,0] }
    $players[loser]  ||= Hash.new { GSL::Vector[0,0] }
-  $players[winner][loser] += GSL::Vector[1,0]
-  $players[loser][winner] += GSL::Vector[0,1]
+  $players[winner][loser] += GSL::Vector[1.0*half_life(how_long_days),0]
+  $players[loser][winner] += GSL::Vector[0,1.0*half_life(how_long_days)]
  end
  
  def _add_time(player, time)
@@ -304,9 +554,11 @@ end
  
  def add(black_mark, black_name, white_name, white_mark, time)
    if black_mark == WIN_MARK && white_mark == LOSS_MARK
-    _add_win_loss(black_name, white_name)
+    _add_win_loss(black_name, white_name, time)
    elsif black_mark == LOSS_MARK && white_mark == WIN_MARK
-    _add_win_loss(white_name, black_name)
+    _add_win_loss(white_name, black_name, time)
+  elsif black_mark == DRAW_MARK && white_mark == DRAW_MARK
+    return
    else
      raise "Never reached!"
    end
@@ -314,6 +566,13 @@ def add(black_mark, black_name, white_name, white_mark, time)
    _add_time(white_name, time)
  end
  
+def identify_id(id)
+  if /@NORATE\+/ =~ id # the player having @NORATE in the name should not be rated
+    return nil
+  end
+  id.gsub(/@.*?\+/,"+")
+end
+
  def grep(file)
    str = File.open(file).read
  
@@ -321,7 +580,8 @@ def grep(file)
    if /^N\-(.*)$/ =~ str then white_name = $1.strip end
  
    if /^'summary:(.*)$/ =~ str
-    dummy, p1, p2 = $1.split(":").map {|a| a.strip}    
+    state, p1, p2 = $1.split(":").map {|a| a.strip}    
+    return if state == "abnormal"
      p1_name, p1_mark = p1.split(" ")
      p2_name, p2_mark = p2.split(" ")
      if p1_name == black_name
@@ -331,7 +591,7 @@ def grep(file)
        black_name, black_mark = p2_name, p2_mark
        white_name, white_mark = p1_name, p1_mark
      else
-      raise "Never reach!: #{black} #{white} #{p1} #{p2}"
+      raise "Never reach!: #{black} #{white} #{p3} #{p2}"
      end
    end
    if /^'\$END_TIME:(.*)$/ =~ str
@@ -339,7 +599,11 @@ def grep(file)
    end
    if /^'rating:(.*)$/ =~ str
      black_id, white_id = $1.split(":").map {|a| a.strip}
-    add(black_mark, black_id, white_id, white_mark, time)
+    black_id = identify_id(black_id)
+    white_id = identify_id(white_id)
+    if black_id && white_id && (black_id != white_id)
+      add(black_mark, black_id, white_id, white_mark, time)
+    end
    end
  end
  
@@ -356,24 +620,33 @@ def main
      Dir.glob( File.join(dir, "**", "*.csa") ) {|f| grep(f)}
    end
  
-  win_loss_matrix, keys = mk_win_loss_matrix($players)
-  $stderr.puts keys.inspect if $DEBUG
-  $stderr.puts win_loss_matrix.inspect if $DEBUG
-  rating = Rating.new(win_loss_matrix)
-  rating.rating
-  rating.average!(Rating::AVERAGE_RATE)
-  rating.integer!
-
-  yaml = {}
-  keys.each_with_index do |p, i| # player_id, index#
-    win_loss = $players[p].values.inject(GSL::Vector[0,0]) {|sum, v| sum + v}
-    win = win_loss_matrix
-    yaml[p] = 
-      { 'name' => p.split("+")[0],
-        'rate' => rating.rate[i],
-        'last_modified' => $players_time[p].dup,
-        'win'  => win_loss[0],
-        'loss' => win_loss[1]}
+  yaml = {} 
+  yaml["players"] = {}
+  if $players.size > 0
+    obj = WinLossMatrix::mk_win_loss_matrix($players)
+    rating_group = 0
+    obj.connected_subsets.each do |win_loss_matrix|
+      yaml["players"][rating_group] = {}
+
+      rating = Rating.new(win_loss_matrix.matrix)
+      rating.rating
+      rating.average!(Rating::AVERAGE_RATE)
+      rating.integer!
+
+      win_loss_matrix.keys.each_with_index do |p, i| # player_id, index#
+        win  = win_loss_matrix.matrix.row(i).sum
+        loss = win_loss_matrix.matrix.col(i).sum
+
+        yaml["players"][rating_group][p] = 
+          { 'name' => p.split("+")[0],
+            'rating_group' => rating_group,
+            'rate' => rating.rate[i],
+            'last_modified' => $players_time[p].dup,
+            'win'  => win,
+            'loss' => loss}
+      end
+      rating_group += 1
+    end
    end
    puts yaml.to_yaml
  end