OSDN Git Service

add script merging google anal.'s log and OTP's log.
authorhylom <hylom@users.sourceforge.jp>
Thu, 12 Mar 2009 10:31:02 +0000 (19:31 +0900)
committerhylom <hylom@users.sourceforge.jp>
Thu, 12 Mar 2009 10:31:02 +0000 (19:31 +0900)
merge_csv.py [new file with mode: 0755]

diff --git a/merge_csv.py b/merge_csv.py
new file mode 100755 (executable)
index 0000000..b4afb9d
--- /dev/null
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os, sys
+import codecs
+
+sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
+sys.stderr = codecs.getwriter('utf_8')(sys.stderr)
+
+try:
+       ga_data_path = sys.argv[1]
+       otp_data_path = sys.argv[2]
+except IndexError:
+       sys.exit(sys.argv[0] + " <ga file> <otp file>")
+
+ga_file = codecs.open(ga_data_path, "r", "utf_8")
+
+ga_dict = {}
+ga_titles = []
+for row in ga_file:
+#Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Bounce Rate,% Exit,$ Index
+       
+       items = row.strip().split( "," )
+       title = items.pop(0)
+       ga_dict[title] = items
+       ga_titles.append(title)
+#      print title
+ga_file.close()
+
+otp_file = codecs.open(otp_data_path, "r", "utf_8")
+updated_dict = {}
+ga_notfounds = []
+for row in otp_file:
+#0   1     2      3   4        5    6
+#url,title,editor,PVs,comments,date,tags
+       items = row.strip().split( "," )
+
+       for ga_title in ga_titles:
+               if ga_title.find( items[1] ) != -1:
+                       ga_info = ga_dict.pop(ga_title)
+                       ga_info.append( items[5] )
+                       ga_info.append( items[6] )
+                       updated_dict[ga_title] = ga_info
+                       break
+       else:
+               sys.stderr.write( "! %s - %s\n" % (items[1],items[5]) )
+
+otp_file.close()
+for title in updated_dict:
+       print title, ",",  ",".join( updated_dict[title] )
+
+for title in ga_dict:
+       print title, ",",  ",".join( ga_dict[title] )