mk_rate

   1 #!/usr/bin/ruby
   2 ## $Id$
   3
   4 ## Copyright (C) 2006 Daigo Moriwaki <daigo at debian dot org>
   5 ##
   6 ## This program is free software; you can redistribute it and/or modify
   7 ## it under the terms of the GNU General Public License as published by
   8 ## the Free Software Foundation; either version 2 of the License, or
   9 ## (at your option) any later version.
  10 ##
  11 ## This program is distributed in the hope that it will be useful,
  12 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 ## GNU General Public License for more details.
  15 ##
  16 ## You should have received a copy of the GNU General Public License
  17 ## along with this program; if not, write to the Free Software
  18 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19
  20 #
  21 # This calculates rating scores of every players from CSA files, and outputs a
  22 # yaml file (players.yaml) that Shogi Server can read.
  23 #
  24 # Sample:
  25 #   $ ./mk_rate . > players.yaml
  26 #
  27 # The conditions that games and players are rated as following:
  28 #   * Rated games, which were played by both rated players.
  29 #   * Rated players, who logged in the server with a name followed by a trip:
  30 #     "name,trip".
  31 #   * (Rated) players, who played more than $GAMES_LIMIT [15] (rated) games.
  32 #
  33 #
  34 # PREREQUIRE
  35 # ==========
  36 #
  37 # Sample Commands to isntall prerequires will work for Debian.
  38 #
  39 # * Rubygems
  40 #   $ sudo aptitude install rubygems
  41 #
  42 # * Ruby bindings for the GNU Scientific Library (GSL)
  43 #   $ sudo aptitude install libgsl-ruby1.8
  44 #   Or, download it from  http://rb-gsl.rubyforge.org/ .
  45 #
  46 # * RGL: Ruby Graph Library
  47 #   $ sudo gem install rgl
  48 #   Or, download it from http://rubyforge.org/projects/rgl/ .
  49 #
  50
  51 require 'yaml'
  52 require 'time'
  53 require 'gsl'
  54 require 'rubygems'
  55 require 'rgl/adjacency'
  56 require 'rgl/connected_components'
  57
  58 #################################################
  59 # Constants
  60 #
  61
  62 # Count out players who play less games than $GAMES_LIMIT
  63 $GAMES_LIMIT = $DEBUG ? 0 : 15
  64 WIN_MARK  = "win"
  65 LOSS_MARK = "lose"
  66 DRAW_MARK = "draw"
  67
  68 # Holds players
  69 $players = Hash.new
  70 # Holds the last time when a player gamed
  71 $players_time = Hash.new { Time.at(0) }
  72
  73
  74 #################################################
  75 # Keeps the value of the lowest key
  76 #
  77 class Record
  78   def initialize
  79     @lowest = []
  80   end
  81
  82   def set(key, value)
  83     if @lowest.empty? || key < @lowest[0]
  84       @lowest = [key, value]
  85     end
  86   end
  87
  88   def get
  89     if @lowest.empty?
  90       nil
  91     else
  92       @lowest[1]
  93     end
  94   end
  95 end
  96
  97 #################################################
  98 # Calculates rates of every player from a Win Loss GSL::Matrix
  99 #
 100 class Rating
 101   include Math
 102
 103   # The model of the win possibility is 1/(1 + 10^(-d/400)).
 104   # The equation in this class is 1/(1 + e^(-Kd)).
 105   # So, K should be calculated like this.
 106   K = Math.log(10.0) / 400.0
 107
 108   # Convergence limit to stop Newton method.
 109   ERROR_LIMIT = 1.0e-3
 110   # Stop Newton method after this iterations.
 111   COUNT_MAX = 500
 112
 113   # Average rate among the players
 114   AVERAGE_RATE = 1000
 115
 116
 117   ###############
 118   # Class methods
 119   #
 120
 121   ##
 122   # Calcurates the average of the vector.
 123   #
 124   def Rating.average(vector, mean=0.0)
 125     sum = Array(vector).inject(0.0) {|sum, n| sum + n}
 126     vector -= GSL::Vector[*Array.new(vector.size, sum/vector.size - mean)]
 127     vector
 128   end
 129
 130   ##################
 131   # Instance methods
 132   #
 133   def initialize(win_loss_matrix)
 134     @record = Record.new
 135     @n = win_loss_matrix
 136     case @n
 137     when GSL::Matrix, GSL::Matrix::Int
 138       @size = @n.size1
 139     when ::Matrix
 140       @size = @n.row_size
 141     else
 142       raise ArgumentError
 143     end
 144     initial_rate
 145   end
 146   attr_reader :rate, :n
 147
 148   def player_vector
 149     GSL::Vector[*
 150       (0...@size).collect {|k| yield k}
 151     ]
 152   end
 153
 154   def each_player
 155     (0...@size).each {|k| yield k}
 156   end
 157
 158   ##
 159   # The possibility that the player k will beet the player i.
 160   #
 161   def win_rate(k,i)
 162     1.0/(1.0 + exp(@rate[i]-@rate[k]))
 163   end
 164
 165   ##
 166   # Most possible equation
 167   #
 168   def func_vector
 169     player_vector do|k|
 170       sum = 0.0
 171       each_player do |i|
 172         next if i == k
 173         sum += @n[k,i] * win_rate(i,k) - @n[i,k] * win_rate(k,i)
 174       end
 175       sum * 2.0
 176     end
 177   end
 178
 179   ##
 180   #           / f0/R0 f0/R1 f0/R2 ... \
 181   # dfk/dRj = | f1/R0 f1/R1 f1/R2 ... |
 182   #           \ f2/R0 f2/R1 f2/R2 ... /
 183   def d_func(k,j)
 184     sum = 0.0
 185     if k == j
 186       each_player do |i|
 187         next if i == k
 188         sum += win_rate(i,k) * win_rate(k,i) * (@n[k,i] + @n[i,k])
 189       end
 190       sum *= -2.0
 191     else # k != j
 192       sum = 2.0 * win_rate(j,k) * win_rate(k,j) * (@n[k,j] + @n[j,k])
 193     end
 194     sum
 195   end
 196
 197   ##
 198   # Jacobi matrix of the func().
 199   #   m00 m01
 200   #   m10 m11
 201   #
 202   def j_matrix
 203     GSL::Matrix[*
 204       (0...@size).collect do |k|
 205         (0...@size).collect do |j|
 206           d_func(k,j)
 207         end
 208       end
 209     ]
 210   end
 211
 212   ##
 213   # The initial value of the rate, which is of very importance for Newton
 214   # method.  This is based on my huristics; the higher the win probablity of
 215   # a player is, the greater points he takes.
 216   #
 217   def initial_rate
 218     possibility =
 219       player_vector do |k|
 220         v = GSL::Vector[0, 0]
 221         each_player do |i|
 222           next if k == i
 223           v += GSL::Vector[@n[k,i], @n[i,k]]
 224         end
 225         v.nrm2 < 1 ? 0 : v[0] / (v[0] + v[1])
 226       end
 227     rank = possibility.sort_index
 228     @rate = player_vector do |k|
 229       K*500 * (rank[k]+1) / @size
 230     end
 231     average!
 232   end
 233
 234   ##
 235   # Resets @rate as the higher the current win probablity of a player is,
 236   # the greater points he takes.
 237   #
 238   def initial_rate2
 239     @rate = @record.get || @rate
 240     rank = @rate.sort_index
 241     @rate = player_vector do |k|
 242       K*@count*1.5 * (rank[k]+1) / @size
 243     end
 244     average!
 245   end
 246
 247   # mu is the deaccelrating parameter in Deaccelerated Newton method
 248   def deaccelrate(mu, old_rate, a, old_f_nrm2)
 249     @rate = old_rate - a * mu
 250     if func_vector.nrm2 < (1 - mu / 4.0 ) * old_f_nrm2 then
 251       return
 252     end
 253     if mu < 1e-4
 254       @record.set(func_vector.nrm2, @rate)
 255       initial_rate2
 256       return
 257     end
 258     $stderr.puts "mu: %f " % [mu] if $DEBUG
 259     deaccelrate(mu*0.5, old_rate, a, old_f_nrm2)
 260   end
 261
 262   ##
 263   # Main process to calculate ratings.
 264   #
 265   def rating
 266     # Counter to stop the process.
 267     # Calulation in Newton method may fall in an infinite loop
 268     @count = 0
 269
 270     # Main loop
 271     begin
 272       # Solve the equation:
 273       #   J*a=f
 274       #   @rate_(n+1) = @rate_(n) - a
 275       #
 276       # f.nrm2 should approach to zero.
 277       f = func_vector
 278       j = j_matrix
 279
 280       # $stderr.puts "j: %s" % [j.inspect] if $DEBUG
 281       $stderr.puts "f: %s -> %f" % [f.to_a.inspect, f.nrm2] if $DEBUG
 282
 283       # GSL::Linalg::LU.solve or GSL::Linalg::HH.solve would be available instead.
 284       a = GSL::Linalg::SV.solve(j, f)
 285       a = self.class.average(a)
 286       # $stderr.puts "a: %s -> %f" % [a.to_a.inspect, a.nrm2] if $DEBUG
 287
 288       # Deaccelerated Newton method
 289       # GSL::Vector object should be immutable.
 290       old_rate   = @rate
 291       old_f      = f
 292       old_f_nrm2 = old_f.nrm2
 293       deaccelrate(1.0, old_rate, a, old_f_nrm2)
 294       @record.set(func_vector.nrm2, @rate)
 295
 296       $stderr.printf "|error| : %5.2e\n", a.nrm2 if $DEBUG
 297
 298       @count += 1
 299       if @count > COUNT_MAX
 300         $stderr.puts "Values seem to oscillate. Stopped the process."
 301         $stderr.puts "f: %s -> %f" % [func_vector.to_a.inspect, func_vector.nrm2]
 302         break
 303       end
 304
 305     end while (a.nrm2 > ERROR_LIMIT * @rate.nrm2)
 306
 307     @rate = @record.get
 308     $stderr.puts "resolved f: %s -> %f" %
 309       [func_vector.to_a.inspect, func_vector.nrm2] if $DEBUG
 310
 311     @rate *= 1.0/K
 312     finite!
 313     self
 314   end
 315
 316   ##
 317   # Make the values of @rate finite.
 318   #
 319   def finite!
 320     @rate = @rate.collect do |a|
 321       if a.infinite?
 322         a.infinite? * AVERAGE_RATE * 100
 323       else
 324         a
 325       end
 326     end
 327   end
 328
 329   ##
 330   # Flatten the values of @rate.
 331   #
 332   def average!(mean=0.0)
 333     @rate = self.class.average(@rate, mean)
 334   end
 335
 336   ##
 337   # Make the values of @rate integer.
 338   #
 339   def integer!
 340     @rate = @rate.collect do |a|
 341       if a.finite?
 342         a.to_i
 343       elsif a.nan?
 344         0
 345       elsif a.infinite?
 346         a.infinite? * AVERAGE_RATE * 100
 347       end
 348     end
 349   end
 350 end
 351
 352 #################################################
 353 # Encapsulate a pair of keys and win loss matrix.
 354 #   - keys is an array of player IDs; [gps+123, foo+234, ...]
 355 #   - matrix holds games # where player i (row index) beats player j (column index).
 356 #     The row and column indexes match with the keys.
 357 #
 358 # This object should be immutable. If an internal state is being modified, a
 359 # new object is always returned.
 360 #
 361 class WinLossMatrix
 362
 363   ###############
 364   # Class methods
 365   #
 366
 367   def self.mk_matrix(players)
 368     keys = players.keys.sort
 369     size = keys.size
 370     matrix =
 371       GSL::Matrix[*
 372       ((0...size).collect do |k|
 373         p1 = keys[k]
 374         p1_hash = players[p1]
 375         ((0...size).collect do |j|
 376           if k == j
 377             0
 378           else
 379             p2 = keys[j]
 380             v = p1_hash[p2] || Vector[0,0]
 381             v[0]
 382           end
 383         end)
 384       end)]
 385     return WinLossMatrix.new(keys, matrix)
 386   end
 387
 388   def self.mk_win_loss_matrix(players)
 389     obj = mk_matrix(players)
 390     return obj.filter
 391   end
 392
 393   ##################
 394   # Instance methods
 395   #
 396
 397   # an array of player IDs; [gps+123, foo+234, ...]
 398   attr_reader :keys
 399
 400   # matrix holds games # where player i (row index) beats player j (column index).
 401   # The row and column indexes match with the keys.
 402   attr_reader :matrix
 403
 404   def initialize(keys, matrix)
 405     @keys   = keys
 406     @matrix = matrix
 407   end
 408
 409   ##
 410   # Returns the size of the keys/matrix
 411   #
 412   def size
 413     if @keys
 414       @keys.size
 415     else
 416       nil
 417     end
 418   end
 419
 420   ##
 421   # Removes a delete_index'th player and returns a new object.
 422   #
 423   def delete_row(delete_index)
 424     copied_cols = []
 425     (0...size).each do |i|
 426       next if i == delete_index
 427       row = @matrix.row(i).clone
 428       row.delete_at(delete_index)
 429       copied_cols << row
 430     end
 431     if copied_cols.size == 0
 432       new_matrix = GSL::Matrix.new
 433     else
 434       new_matrix = GSL::Matrix[*copied_cols]
 435     end
 436     new_keys = @keys.clone
 437     new_keys.delete_at(delete_index)
 438     return WinLossMatrix.new(new_keys, new_matrix)
 439   end
 440
 441   ##
 442   # Removes players in a rows; [1,3,5]
 443   #
 444   def delete_rows(rows)
 445     obj = self
 446     rows.sort.reverse.each do |index|
 447       obj = obj.delete_row(index)
 448     end
 449     obj
 450   end
 451
 452   ##
 453   # Removes players who do not pass a criteria to be rated, and returns a
 454   # new object.
 455   #
 456   def filter
 457     $stderr.puts @keys.inspect if $DEBUG
 458     $stderr.puts @matrix.inspect if $DEBUG
 459     delete = []
 460     (0...size).each do |i|
 461       row = @matrix.row(i)
 462       col = @matrix.col(i)
 463       win  = row.sum
 464       loss = col.sum
 465       if win < 1 || loss < 1 || win + loss < $GAMES_LIMIT
 466         delete << i
 467       end
 468     end
 469
 470     # The recursion ends if there is nothing to delete
 471     return self if delete.empty?
 472
 473     new_obj = delete_rows(delete)
 474     new_obj.filter
 475   end
 476
 477   ##
 478   # Cuts self into connecting groups such as each player in a group has at least
 479   # one game with other players in the group. Returns them as an array.
 480   #
 481   def connected_subsets
 482     g = RGL::AdjacencyGraph.new
 483     (0...size).each do |k|
 484       (0...size).each do |i|
 485         next if k == i
 486         if @matrix[k,i] > 0
 487           g.add_edge(k,i)
 488         end
 489       end
 490     end
 491
 492     subsets = []
 493     g.each_connected_component do |c|
 494       new_keys = []
 495       c.each do |v|
 496         new_keys << keys[v.to_s.to_i]
 497       end
 498       subsets << new_keys
 499     end
 500
 501     subsets = subsets.sort {|a,b| b.size <=> a.size}
 502
 503     result = subsets.collect do |keys|
 504       matrix =
 505         GSL::Matrix[*
 506         ((0...keys.size).collect do |k|
 507           p1 = @keys.index(keys[k])
 508           ((0...keys.size).collect do |j|
 509             if k == j
 510               0
 511             else
 512               p2 = @keys.index(keys[j])
 513               @matrix[p1,p2]
 514             end
 515           end)
 516         end)]
 517       WinLossMatrix.new(keys, matrix)
 518     end
 519
 520     return result
 521   end
 522
 523   def to_s
 524     "size : #{@keys.size}" + "\n" +
 525     @keys.inspect + "\n" +
 526     @matrix.inspect
 527   end
 528
 529 end
 530
 531
 532 #################################################
 533 # Main methods
 534 #
 535
 536 # Half-life effect
 537 # After NHAFE_LIFE days value will get half.
 538 # 0.693 is constant, where exp(0.693) ~ 0.5
 539 NHALF_LIFE=60
 540 def half_life(days)
 541   if days < 7
 542     return 1.0
 543   else
 544     Math::exp(-0.693/NHALF_LIFE*(days-7))
 545   end
 546 end
 547
 548 def _add_win_loss(winner, loser, time)
 549   how_long_days = (Time.now - time)/(3600*24)
 550   $players[winner] ||= Hash.new { GSL::Vector[0,0] }
 551   $players[loser]  ||= Hash.new { GSL::Vector[0,0] }
 552   $players[winner][loser] += GSL::Vector[1.0*half_life(how_long_days),0]
 553   $players[loser][winner] += GSL::Vector[0,1.0*half_life(how_long_days)]
 554 end
 555
 556 def _add_time(player, time)
 557   $players_time[player] = time if $players_time[player] < time
 558 end
 559
 560 def add(black_mark, black_name, white_name, white_mark, time)
 561   if black_mark == WIN_MARK && white_mark == LOSS_MARK
 562     _add_win_loss(black_name, white_name, time)
 563   elsif black_mark == LOSS_MARK && white_mark == WIN_MARK
 564     _add_win_loss(white_name, black_name, time)
 565   elsif black_mark == DRAW_MARK && white_mark == DRAW_MARK
 566     return
 567   else
 568     raise "Never reached!"
 569   end
 570   _add_time(black_name, time)
 571   _add_time(white_name, time)
 572 end
 573
 574 def identify_id(id)
 575   if /@NORATE\+/ =~ id # the player having @NORATE in the name should not be rated
 576     return nil
 577   end
 578   id.gsub(/@.*?\+/,"+")
 579 end
 580
 581 def grep(file)
 582   str = File.open(file).read
 583
 584   if /^N\+(.*)$/ =~ str then black_name = $1.strip end
 585   if /^N\-(.*)$/ =~ str then white_name = $1.strip end
 586
 587   if /^'summary:(.*)$/ =~ str
 588     state, p1, p2 = $1.split(":").map {|a| a.strip}
 589     return if state == "abnormal"
 590     p1_name, p1_mark = p1.split(" ")
 591     p2_name, p2_mark = p2.split(" ")
 592     if p1_name == black_name
 593       black_name, black_mark = p1_name, p1_mark
 594       white_name, white_mark = p2_name, p2_mark
 595     elsif p2_name == black_name
 596       black_name, black_mark = p2_name, p2_mark
 597       white_name, white_mark = p1_name, p1_mark
 598     else
 599       raise "Never reach!: #{black} #{white} #{p3} #{p2}"
 600     end
 601   end
 602   if /^'\$END_TIME:(.*)$/ =~ str
 603     time = Time.parse($1.strip)
 604   end
 605   if /^'rating:(.*)$/ =~ str
 606     black_id, white_id = $1.split(":").map {|a| a.strip}
 607     black_id = identify_id(black_id)
 608     white_id = identify_id(white_id)
 609     if black_id && white_id && (black_id != white_id)
 610       add(black_mark, black_id, white_id, white_mark, time)
 611     end
 612   end
 613 end
 614
 615 def usage
 616   $stderr.puts <<-EOF
 617 USAGE: #{$0} dir [...]
 618   EOF
 619   exit 1
 620 end
 621
 622 def main
 623   usage if ARGV.empty?
 624   while dir = ARGV.shift do
 625     Dir.glob( File.join(dir, "**", "*.csa") ) {|f| grep(f)}
 626   end
 627
 628   yaml = {}
 629   yaml["players"] = {}
 630   rating_group = 0
 631   if $players.size > 0
 632     obj = WinLossMatrix::mk_win_loss_matrix($players)
 633     obj.connected_subsets.each do |win_loss_matrix|
 634       yaml["players"][rating_group] = {}
 635
 636       rating = Rating.new(win_loss_matrix.matrix)
 637       rating.rating
 638       rating.average!(Rating::AVERAGE_RATE)
 639       rating.integer!
 640
 641       win_loss_matrix.keys.each_with_index do |p, i| # player_id, index#
 642         win  = win_loss_matrix.matrix.row(i).sum
 643         loss = win_loss_matrix.matrix.col(i).sum
 644
 645         yaml["players"][rating_group][p] =
 646           { 'name' => p.split("+")[0],
 647             'rating_group' => rating_group,
 648             'rate' => rating.rate[i],
 649             'last_modified' => $players_time[p].dup,
 650             'win'  => win,
 651             'loss' => loss}
 652       end
 653       rating_group += 1
 654     end
 655   end
 656   rating_group -= 1
 657   non_rated_group = 999 # large enough
 658   yaml["players"][non_rated_group] = {}
 659   $players.each_key do |id|
 660     # skip players who have already been rated
 661     found = false
 662     (0..rating_group).each do |i|
 663        found = true if yaml["players"][i][id]
 664        break if found
 665     end
 666     next if found
 667
 668     v = GSL::Vector[0, 0]
 669     $players[id].each_value {|value| v += value}
 670     next if v[0] < 1 && v[1] < 1
 671
 672     yaml["players"][non_rated_group][id] =
 673       { 'name' => id.split("+")[0],
 674         'rating_group' => non_rated_group,
 675         'rate' => 0,
 676         'last_modified' => $players_time[id].dup,
 677         'win'  => v[0],
 678         'loss' => v[1]}
 679   end
 680   puts yaml.to_yaml
 681 end
 682
 683 if __FILE__ == $0
 684   main
 685 end
 686
 687 # vim: ts=2 sw=2 sts=0