mk_rate

   1 #!/usr/bin/ruby
   2 ## $Id$
   3
   4 ## Copyright (C) 2006-2008 Daigo Moriwaki <daigo at debian dot org>
   5 ##
   6 ## This program is free software; you can redistribute it and/or modify
   7 ## it under the terms of the GNU General Public License as published by
   8 ## the Free Software Foundation; either version 2 of the License, or
   9 ## (at your option) any later version.
  10 ##
  11 ## This program is distributed in the hope that it will be useful,
  12 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 ## GNU General Public License for more details.
  15 ##
  16 ## You should have received a copy of the GNU General Public License
  17 ## along with this program; if not, write to the Free Software
  18 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19
  20 #
  21 # This calculates rating scores of every players from CSA files, and outputs a
  22 # yaml file (players.yaml) that Shogi Server can read.
  23 #
  24 # Sample:
  25 #   $ ./mk_rate . > players.yaml
  26 #   $ ./mk_rate . && ./mk_rate . > players.yaml
  27 #
  28 # The conditions that games and players are rated as following:
  29 #   * Rated games, which were played by both rated players.
  30 #   * Rated players, who logged in the server with a name followed by a trip:
  31 #     "name,trip".
  32 #   * (Rated) players, who played more than $GAMES_LIMIT [15] (rated) games.
  33 #
  34 #
  35 # PREREQUIRE
  36 # ==========
  37 #
  38 # Sample Commands to isntall prerequires will work for Debian.
  39 #
  40 # * Rubygems
  41 #   $ sudo aptitude install rubygems
  42 #
  43 # * Ruby bindings for the GNU Scientific Library (GSL)
  44 #   $ sudo aptitude install libgsl-ruby1.8
  45 #   Or, download it from  http://rb-gsl.rubyforge.org/ .
  46 #
  47 # * RGL: Ruby Graph Library
  48 #   $ sudo gem install rgl
  49 #   Or, download it from http://rubyforge.org/projects/rgl/ .
  50 #
  51
  52 require 'yaml'
  53 require 'time'
  54 require 'gsl'
  55 require 'rubygems'
  56 require 'rgl/adjacency'
  57 require 'rgl/connected_components'
  58
  59 #################################################
  60 # Constants
  61 #
  62
  63 # Count out players who play less games than $GAMES_LIMIT
  64 $GAMES_LIMIT = $DEBUG ? 0 : 15
  65 WIN_MARK  = "win"
  66 LOSS_MARK = "lose"
  67 DRAW_MARK = "draw"
  68
  69 # Holds players
  70 $players = Hash.new
  71 # Holds the last time when a player gamed
  72 $players_time = Hash.new { Time.at(0) }
  73
  74
  75 #################################################
  76 # Keeps the value of the lowest key
  77 #
  78 class Record
  79   def initialize
  80     @lowest = []
  81   end
  82
  83   def set(key, value)
  84     if @lowest.empty? || key < @lowest[0]
  85       @lowest = [key, value]
  86     end
  87   end
  88
  89   def get
  90     if @lowest.empty?
  91       nil
  92     else
  93       @lowest[1]
  94     end
  95   end
  96 end
  97
  98 #################################################
  99 # Calculates rates of every player from a Win Loss GSL::Matrix
 100 #
 101 class Rating
 102   include Math
 103
 104   # The model of the win possibility is 1/(1 + 10^(-d/400)).
 105   # The equation in this class is 1/(1 + e^(-Kd)).
 106   # So, K should be calculated like this.
 107   K = Math.log(10.0) / 400.0
 108
 109   # Convergence limit to stop Newton method.
 110   ERROR_LIMIT = 1.0e-3
 111   # Stop Newton method after this iterations.
 112   COUNT_MAX = 500
 113
 114   # Average rate among the players
 115   AVERAGE_RATE = 1000
 116
 117
 118   ###############
 119   # Class methods
 120   #
 121
 122   ##
 123   # Calcurates the average of the vector.
 124   #
 125   def Rating.average(vector, mean=0.0)
 126     sum = Array(vector).inject(0.0) {|sum, n| sum + n}
 127     vector -= GSL::Vector[*Array.new(vector.size, sum/vector.size - mean)]
 128     vector
 129   end
 130
 131   ##################
 132   # Instance methods
 133   #
 134   def initialize(win_loss_matrix)
 135     @record = Record.new
 136     @n = win_loss_matrix
 137     case @n
 138     when GSL::Matrix, GSL::Matrix::Int
 139       @size = @n.size1
 140     when ::Matrix
 141       @size = @n.row_size
 142     else
 143       raise ArgumentError
 144     end
 145     initial_rate
 146   end
 147   attr_reader :rate, :n
 148
 149   def player_vector
 150     GSL::Vector[*
 151       (0...@size).collect {|k| yield k}
 152     ]
 153   end
 154
 155   def each_player
 156     (0...@size).each {|k| yield k}
 157   end
 158
 159   ##
 160   # The possibility that the player k will beet the player i.
 161   #
 162   def win_rate(k,i)
 163     1.0/(1.0 + exp(@rate[i]-@rate[k]))
 164   end
 165
 166   ##
 167   # Most possible equation
 168   #
 169   def func_vector
 170     player_vector do|k|
 171       sum = 0.0
 172       each_player do |i|
 173         next if i == k
 174         sum += @n[k,i] * win_rate(i,k) - @n[i,k] * win_rate(k,i)
 175       end
 176       sum * 2.0
 177     end
 178   end
 179
 180   ##
 181   #           / f0/R0 f0/R1 f0/R2 ... \
 182   # dfk/dRj = | f1/R0 f1/R1 f1/R2 ... |
 183   #           \ f2/R0 f2/R1 f2/R2 ... /
 184   def d_func(k,j)
 185     sum = 0.0
 186     if k == j
 187       each_player do |i|
 188         next if i == k
 189         sum += win_rate(i,k) * win_rate(k,i) * (@n[k,i] + @n[i,k])
 190       end
 191       sum *= -2.0
 192     else # k != j
 193       sum = 2.0 * win_rate(j,k) * win_rate(k,j) * (@n[k,j] + @n[j,k])
 194     end
 195     sum
 196   end
 197
 198   ##
 199   # Jacobi matrix of the func().
 200   #   m00 m01
 201   #   m10 m11
 202   #
 203   def j_matrix
 204     GSL::Matrix[*
 205       (0...@size).collect do |k|
 206         (0...@size).collect do |j|
 207           d_func(k,j)
 208         end
 209       end
 210     ]
 211   end
 212
 213   ##
 214   # The initial value of the rate, which is of very importance for Newton
 215   # method.  This is based on my huristics; the higher the win probablity of
 216   # a player is, the greater points he takes.
 217   #
 218   def initial_rate
 219     possibility =
 220       player_vector do |k|
 221         v = GSL::Vector[0, 0]
 222         each_player do |i|
 223           next if k == i
 224           v += GSL::Vector[@n[k,i], @n[i,k]]
 225         end
 226         v.nrm2 < 1 ? 0 : v[0] / (v[0] + v[1])
 227       end
 228     rank = possibility.sort_index
 229     @rate = player_vector do |k|
 230       K*500 * (rank[k]+1) / @size
 231     end
 232     average!
 233   end
 234
 235   ##
 236   # Resets @rate as the higher the current win probablity of a player is,
 237   # the greater points he takes.
 238   #
 239   def initial_rate2
 240     @rate = @record.get || @rate
 241     rank = @rate.sort_index
 242     @rate = player_vector do |k|
 243       K*@count*1.5 * (rank[k]+1) / @size
 244     end
 245     average!
 246   end
 247
 248   # mu is the deaccelrating parameter in Deaccelerated Newton method
 249   def deaccelrate(mu, old_rate, a, old_f_nrm2)
 250     @rate = old_rate - a * mu
 251     if func_vector.nrm2 < (1 - mu / 4.0 ) * old_f_nrm2 then
 252       return
 253     end
 254     if mu < 1e-4
 255       @record.set(func_vector.nrm2, @rate)
 256       initial_rate2
 257       return
 258     end
 259     $stderr.puts "mu: %f " % [mu] if $DEBUG
 260     deaccelrate(mu*0.5, old_rate, a, old_f_nrm2)
 261   end
 262
 263   ##
 264   # Main process to calculate ratings.
 265   #
 266   def rating
 267     # Counter to stop the process.
 268     # Calulation in Newton method may fall in an infinite loop
 269     @count = 0
 270
 271     # Main loop
 272     begin
 273       # Solve the equation:
 274       #   J*a=f
 275       #   @rate_(n+1) = @rate_(n) - a
 276       #
 277       # f.nrm2 should approach to zero.
 278       f = func_vector
 279       j = j_matrix
 280
 281       # $stderr.puts "j: %s" % [j.inspect] if $DEBUG
 282       $stderr.puts "f: %s -> %f" % [f.to_a.inspect, f.nrm2] if $DEBUG
 283
 284       # GSL::Linalg::LU.solve or GSL::Linalg::HH.solve would be available instead.
 285       a = GSL::Linalg::SV.solve(j, f)
 286       a = self.class.average(a)
 287       # $stderr.puts "a: %s -> %f" % [a.to_a.inspect, a.nrm2] if $DEBUG
 288
 289       # Deaccelerated Newton method
 290       # GSL::Vector object should be immutable.
 291       old_rate   = @rate
 292       old_f      = f
 293       old_f_nrm2 = old_f.nrm2
 294       deaccelrate(1.0, old_rate, a, old_f_nrm2)
 295       @record.set(func_vector.nrm2, @rate)
 296
 297       $stderr.printf "|error| : %5.2e\n", a.nrm2 if $DEBUG
 298
 299       @count += 1
 300       if @count > COUNT_MAX
 301         $stderr.puts "Values seem to oscillate. Stopped the process."
 302         $stderr.puts "f: %s -> %f" % [func_vector.to_a.inspect, func_vector.nrm2]
 303         break
 304       end
 305
 306     end while (a.nrm2 > ERROR_LIMIT * @rate.nrm2)
 307
 308     @rate = @record.get
 309     $stderr.puts "resolved f: %s -> %f" %
 310       [func_vector.to_a.inspect, func_vector.nrm2] if $DEBUG
 311
 312     @rate *= 1.0/K
 313     finite!
 314     self
 315   end
 316
 317   ##
 318   # Make the values of @rate finite.
 319   #
 320   def finite!
 321     @rate = @rate.collect do |a|
 322       if a.infinite?
 323         a.infinite? * AVERAGE_RATE * 100
 324       else
 325         a
 326       end
 327     end
 328   end
 329
 330   ##
 331   # Flatten the values of @rate.
 332   #
 333   def average!(mean=0.0)
 334     @rate = self.class.average(@rate, mean)
 335   end
 336
 337   ##
 338   # Make the values of @rate integer.
 339   #
 340   def integer!
 341     @rate = @rate.collect do |a|
 342       if a.finite?
 343         a.to_i
 344       elsif a.nan?
 345         0
 346       elsif a.infinite?
 347         a.infinite? * AVERAGE_RATE * 100
 348       end
 349     end
 350   end
 351 end
 352
 353 #################################################
 354 # Encapsulate a pair of keys and win loss matrix.
 355 #   - keys is an array of player IDs; [gps+123, foo+234, ...]
 356 #   - matrix holds games # where player i (row index) beats player j (column index).
 357 #     The row and column indexes match with the keys.
 358 #
 359 # This object should be immutable. If an internal state is being modified, a
 360 # new object is always returned.
 361 #
 362 class WinLossMatrix
 363
 364   ###############
 365   # Class methods
 366   #
 367
 368   def self.mk_matrix(players)
 369     keys = players.keys.sort
 370     size = keys.size
 371     matrix =
 372       GSL::Matrix[*
 373       ((0...size).collect do |k|
 374         p1 = keys[k]
 375         p1_hash = players[p1]
 376         ((0...size).collect do |j|
 377           if k == j
 378             0
 379           else
 380             p2 = keys[j]
 381             v = p1_hash[p2] || Vector[0,0]
 382             v[0]
 383           end
 384         end)
 385       end)]
 386     return WinLossMatrix.new(keys, matrix)
 387   end
 388
 389   def self.mk_win_loss_matrix(players)
 390     obj = mk_matrix(players)
 391     return obj.filter
 392   end
 393
 394   ##################
 395   # Instance methods
 396   #
 397
 398   # an array of player IDs; [gps+123, foo+234, ...]
 399   attr_reader :keys
 400
 401   # matrix holds games # where player i (row index) beats player j (column index).
 402   # The row and column indexes match with the keys.
 403   attr_reader :matrix
 404
 405   def initialize(keys, matrix)
 406     @keys   = keys
 407     @matrix = matrix
 408   end
 409
 410   ##
 411   # Returns the size of the keys/matrix
 412   #
 413   def size
 414     if @keys
 415       @keys.size
 416     else
 417       nil
 418     end
 419   end
 420
 421   ##
 422   # Removes a delete_index'th player and returns a new object.
 423   #
 424   def delete_row(delete_index)
 425     copied_cols = []
 426     (0...size).each do |i|
 427       next if i == delete_index
 428       row = @matrix.row(i).clone
 429       row.delete_at(delete_index)
 430       copied_cols << row
 431     end
 432     if copied_cols.size == 0
 433       new_matrix = GSL::Matrix.new
 434     else
 435       new_matrix = GSL::Matrix[*copied_cols]
 436     end
 437     new_keys = @keys.clone
 438     new_keys.delete_at(delete_index)
 439     return WinLossMatrix.new(new_keys, new_matrix)
 440   end
 441
 442   ##
 443   # Removes players in a rows; [1,3,5]
 444   #
 445   def delete_rows(rows)
 446     obj = self
 447     rows.sort.reverse.each do |index|
 448       obj = obj.delete_row(index)
 449     end
 450     obj
 451   end
 452
 453   ##
 454   # Removes players who do not pass a criteria to be rated, and returns a
 455   # new object.
 456   #
 457   def filter
 458     $stderr.puts @keys.inspect if $DEBUG
 459     $stderr.puts @matrix.inspect if $DEBUG
 460     delete = []
 461     (0...size).each do |i|
 462       row = @matrix.row(i)
 463       col = @matrix.col(i)
 464       win  = row.sum
 465       loss = col.sum
 466       if win < 1 || loss < 1 || win + loss < $GAMES_LIMIT
 467         delete << i
 468       end
 469     end
 470
 471     # The recursion ends if there is nothing to delete
 472     return self if delete.empty?
 473
 474     new_obj = delete_rows(delete)
 475     new_obj.filter
 476   end
 477
 478   ##
 479   # Cuts self into connecting groups such as each player in a group has at least
 480   # one game with other players in the group. Returns them as an array.
 481   #
 482   def connected_subsets
 483     g = RGL::AdjacencyGraph.new
 484     (0...size).each do |k|
 485       (0...size).each do |i|
 486         next if k == i
 487         if @matrix[k,i] > 0
 488           g.add_edge(k,i)
 489         end
 490       end
 491     end
 492
 493     subsets = []
 494     g.each_connected_component do |c|
 495       new_keys = []
 496       c.each do |v|
 497         new_keys << keys[v.to_s.to_i]
 498       end
 499       subsets << new_keys
 500     end
 501
 502     subsets = subsets.sort {|a,b| b.size <=> a.size}
 503
 504     result = subsets.collect do |keys|
 505       matrix =
 506         GSL::Matrix[*
 507         ((0...keys.size).collect do |k|
 508           p1 = @keys.index(keys[k])
 509           ((0...keys.size).collect do |j|
 510             if k == j
 511               0
 512             else
 513               p2 = @keys.index(keys[j])
 514               @matrix[p1,p2]
 515             end
 516           end)
 517         end)]
 518       WinLossMatrix.new(keys, matrix)
 519     end
 520
 521     return result
 522   end
 523
 524   def to_s
 525     "size : #{@keys.size}" + "\n" +
 526     @keys.inspect + "\n" +
 527     @matrix.inspect
 528   end
 529
 530 end
 531
 532
 533 #################################################
 534 # Main methods
 535 #
 536
 537 # Half-life effect
 538 # After NHAFE_LIFE days value will get half.
 539 # 0.693 is constant, where exp(0.693) ~ 0.5
 540 NHALF_LIFE=60
 541 def half_life(days)
 542   if days < 7
 543     return 1.0
 544   else
 545     Math::exp(-0.693/NHALF_LIFE*(days-7))
 546   end
 547 end
 548
 549 def _add_win_loss(winner, loser, time)
 550   how_long_days = (Time.now - time)/(3600*24)
 551   $players[winner] ||= Hash.new { GSL::Vector[0,0] }
 552   $players[loser]  ||= Hash.new { GSL::Vector[0,0] }
 553   $players[winner][loser] += GSL::Vector[1.0*half_life(how_long_days),0]
 554   $players[loser][winner] += GSL::Vector[0,1.0*half_life(how_long_days)]
 555 end
 556
 557 def _add_time(player, time)
 558   $players_time[player] = time if $players_time[player] < time
 559 end
 560
 561 def add(black_mark, black_name, white_name, white_mark, time)
 562   if black_mark == WIN_MARK && white_mark == LOSS_MARK
 563     _add_win_loss(black_name, white_name, time)
 564   elsif black_mark == LOSS_MARK && white_mark == WIN_MARK
 565     _add_win_loss(white_name, black_name, time)
 566   elsif black_mark == DRAW_MARK && white_mark == DRAW_MARK
 567     return
 568   else
 569     raise "Never reached!"
 570   end
 571   _add_time(black_name, time)
 572   _add_time(white_name, time)
 573 end
 574
 575 def identify_id(id)
 576   if /@NORATE\+/ =~ id # the player having @NORATE in the name should not be rated
 577     return nil
 578   end
 579   id.gsub(/@.*?\+/,"+")
 580 end
 581
 582 def grep(file)
 583   str = File.open(file).read
 584
 585   if /^N\+(.*)$/ =~ str then black_name = $1.strip end
 586   if /^N\-(.*)$/ =~ str then white_name = $1.strip end
 587
 588   if /^'summary:(.*)$/ =~ str
 589     state, p1, p2 = $1.split(":").map {|a| a.strip}
 590     return if state == "abnormal"
 591     p1_name, p1_mark = p1.split(" ")
 592     p2_name, p2_mark = p2.split(" ")
 593     if p1_name == black_name
 594       black_name, black_mark = p1_name, p1_mark
 595       white_name, white_mark = p2_name, p2_mark
 596     elsif p2_name == black_name
 597       black_name, black_mark = p2_name, p2_mark
 598       white_name, white_mark = p1_name, p1_mark
 599     else
 600       raise "Never reach!: #{black} #{white} #{p3} #{p2}"
 601     end
 602   end
 603   if /^'\$END_TIME:(.*)$/ =~ str
 604     time = Time.parse($1.strip)
 605   end
 606   if /^'rating:(.*)$/ =~ str
 607     black_id, white_id = $1.split(":").map {|a| a.strip}
 608     black_id = identify_id(black_id)
 609     white_id = identify_id(white_id)
 610     if black_id && white_id && (black_id != white_id)
 611       add(black_mark, black_id, white_id, white_mark, time)
 612     end
 613   end
 614 end
 615
 616 def usage
 617   $stderr.puts <<-EOF
 618 USAGE: #{$0} dir [...]
 619   EOF
 620   exit 1
 621 end
 622
 623 def validate(yaml)
 624   yaml["players"].each do |group_key, group|
 625     group.each do |player_key, player|
 626       rate = player['rate']
 627       next unless rate
 628       if rate > 10000 || rate < -10000
 629         return false
 630       end
 631     end
 632   end
 633   return true
 634 end
 635
 636 def main
 637   usage if ARGV.empty?
 638   while dir = ARGV.shift do
 639     Dir.glob( File.join(dir, "**", "*.csa") ) {|f| grep(f)}
 640   end
 641
 642   yaml = {}
 643   yaml["players"] = {}
 644   rating_group = 0
 645   if $players.size > 0
 646     obj = WinLossMatrix::mk_win_loss_matrix($players)
 647     obj.connected_subsets.each do |win_loss_matrix|
 648       yaml["players"][rating_group] = {}
 649
 650       rating = Rating.new(win_loss_matrix.matrix)
 651       rating.rating
 652       rating.average!(Rating::AVERAGE_RATE)
 653       rating.integer!
 654
 655       win_loss_matrix.keys.each_with_index do |p, i| # player_id, index#
 656         win  = win_loss_matrix.matrix.row(i).sum
 657         loss = win_loss_matrix.matrix.col(i).sum
 658
 659         yaml["players"][rating_group][p] =
 660           { 'name' => p.split("+")[0],
 661             'rating_group' => rating_group,
 662             'rate' => rating.rate[i],
 663             'last_modified' => $players_time[p].dup,
 664             'win'  => win,
 665             'loss' => loss}
 666       end
 667       rating_group += 1
 668     end
 669   end
 670   rating_group -= 1
 671   non_rated_group = 999 # large enough
 672   yaml["players"][non_rated_group] = {}
 673   $players.each_key do |id|
 674     # skip players who have already been rated
 675     found = false
 676     (0..rating_group).each do |i|
 677        found = true if yaml["players"][i][id]
 678        break if found
 679     end
 680     next if found
 681
 682     v = GSL::Vector[0, 0]
 683     $players[id].each_value {|value| v += value}
 684     next if v[0] < 1 && v[1] < 1
 685
 686     yaml["players"][non_rated_group][id] =
 687       { 'name' => id.split("+")[0],
 688         'rating_group' => non_rated_group,
 689         'rate' => 0,
 690         'last_modified' => $players_time[id].dup,
 691         'win'  => v[0],
 692         'loss' => v[1]}
 693   end
 694   unless validate(yaml)
 695     $stderr.puts "Aborted. It did not result in valid ratings."
 696     exit 10
 697   end
 698   puts yaml.to_yaml
 699 end
 700
 701 if __FILE__ == $0
 702   main
 703 end
 704
 705 # vim: ts=2 sw=2 sts=0