gwit/twittertools.py

   1 #-*- coding: utf-8 -*-
   2
   3 '''Useful functions for twitter
   4 '''
   5
   6 ################################################################################
   7 #
   8 # Copyright (c) 2010 University of Tsukuba Linux User Group
   9 #
  10 # This file is part of "gwit".
  11 #
  12 # "gwit" is free software: you can redistribute it and/or modify
  13 # it under the terms of the GNU General Public License as published by
  14 # the Free Software Foundation, either version 3 of the License, or
  15 # (at your option) any later version.
  16 #
  17 # "gwit" is distributed in the hope that it will be useful,
  18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 # GNU General Public License for more details.
  21 #
  22 # You should have received a copy of the GNU General Public License
  23 # along with "gwit".  If not, see <http://www.gnu.org/licenses/>.
  24 #
  25 ################################################################################
  26
  27
  28 import re
  29 import datetime
  30 import htmlentitydefs
  31
  32 class TwitterTools(object):
  33     _urlpattern = u'''(?P<url>https?://[^\s　]*)'''
  34     _userpattern = u'''@(?P<user>\w+)'''
  35     _hashpattern = u'''#(?P<hashtag>\w+)'''
  36
  37     reurl = re.compile(_urlpattern)
  38     reuser = re.compile(_userpattern)
  39     rehash = re.compile(_hashpattern)
  40     reentity = re.compile("&([A-Za-z]+);")
  41     reamp = re.compile("&(?P<after>((?P<name>[A-Za-z]+);)?[^&]*)")
  42
  43     @classmethod
  44     def get_footer(cls, status):
  45         time = cls.get_time_hms(status.created_at)
  46         ago = cls.get_time_ago(status.created_at)
  47
  48         if "source" in status.keys():
  49             source = status.source_name
  50             footer = u"[%s] %s via %s" % (
  51                 time, ago, source)
  52         else:
  53             fotter = "DirectMessage?"
  54
  55         return footer
  56
  57     ## Status
  58     # Text decoration
  59     @classmethod
  60     def get_decoration_text(cls, status):
  61         text = status.text
  62
  63         # replace hashtags
  64         for h in cls.get_hashtags(status):
  65             text = text.replace(
  66                 "#" + h,'<span foreground="#666666">#%s</span>' % h)
  67
  68         # replace URL
  69         if not status.entities:
  70             text = cls.reurl.sub(
  71                 '<span foreground="#0000FF" underline="single">\g<url></span>', text)
  72         else:
  73             urls = status.entities.urls
  74             if status.entities.get("media", None):
  75                 urls.extend(status.entities.media)
  76             for i in urls:
  77                 url = i.display_url if i.expanded_url else i.url
  78                 text = text.replace(
  79                     i.url,'<span foreground="#0000FF" underline="single">%s</span>' % url)
  80
  81         return text
  82
  83     @classmethod
  84     def get_urls_from_text(cls, text):
  85         url_iter = cls.reurl.finditer(text)
  86         # shorten_url, display_url
  87         return [(i.group('url'), i.group('url')) for i in url_iter]
  88
  89     @classmethod
  90     def get_urls(cls, status):
  91         if cls.isretweet(status):
  92             status = status.retweeted_status
  93
  94         if status.entities:
  95             return [(i.url, i.display_url if i.expanded_url else i.url)
  96                     for i in status.entities.urls]
  97         else:
  98             return cls.get_urls_from_text(status.text)
  99
 100     @classmethod
 101     def get_media_urls(cls, status):
 102         if cls.isretweet(status):
 103             status = status.retweeted_status
 104
 105         if status.entities and status.entities.get("media", None):
 106             return [(i.url, i.display_url if i.expanded_url else i.url)
 107                     for i in status.entities.media]
 108         else:
 109             return []
 110
 111     # User
 112     @classmethod
 113     def get_user_mentions(cls, status):
 114         if status.entities:
 115             return [i.screen_name for i in status.entities.user_mentions]
 116         else:
 117             match = cls.reuser.finditer(status.text)
 118             return [i.group('user') for i in match]
 119
 120     # Hashtags
 121     @classmethod
 122     def get_hashtags(cls, status):
 123         if cls.isretweet(status):
 124             status = status.retweeted_status
 125
 126         if status.entities:
 127             return [i.text for i in status.entities.hashtags]
 128         else:
 129             match = cls.rehash.finditer(status.text)
 130             return [i.group('hashtag') for i in match]
 131
 132     # source
 133     @staticmethod
 134     def get_source_name(source):
 135         if source == "web":
 136             return u"web"
 137         else:
 138             i = source.find(">")
 139             if i != -1:
 140                 return unicode(source[i + 1:-4])
 141             else:
 142                 return unicode(source)
 143
 144     ## Datetime
 145     @staticmethod
 146     def get_datetime(timestr):
 147         # Sample
 148         # Wed Nov 18 18:54:12 +0000 2009
 149         format = "%m %d %H:%M:%S +0000 %Y"
 150         m = {
 151             'Jan' : 1, 'Feb' : 2, 'Mar' : 3,
 152             'Apr' : 4, 'May' : 5, 'Jun' : 6,
 153             'Jul' : 7, 'Aug' : 8, 'Sep' : 9,
 154             'Oct' : 10, 'Nov' : 11, 'Dec' : 12
 155             }
 156
 157         t = "%02d %s" % (m[timestr[4:7]], timestr[8:])
 158         dt = datetime.datetime.strptime(t, format)
 159         offset = time.altzone if time.daylight else time.timezone
 160         dt -= datetime.timedelta(seconds = offset)
 161         return dt
 162
 163     @staticmethod
 164     def get_time_hms(dt):
 165         return dt.strftime("%H:%M:%S")
 166
 167     @staticmethod
 168     def get_time_ago(dt):
 169         now = datetime.datetime.now()
 170
 171         if now < dt:
 172             return "Just now!"
 173
 174         ago = now - dt
 175         hours = ago.seconds / 3600
 176         minutes = ago.seconds / 60
 177
 178         if ago.days:
 179             if ago.days == 1:
 180                 return "1 day ago"
 181             else:
 182                 return "%d days ago" % ago.days
 183         elif hours:
 184             if hours == 1:
 185                 return "1 hour ago"
 186             else:
 187                 return "%d hours ago" % hours
 188         elif minutes:
 189             if minutes == 1:
 190                 return "1 minute ago"
 191             else:
 192                 return "%d minutes ago" % minutes
 193         elif ago.seconds:
 194             if ago.seconds == 1:
 195                 return "1 second ago"
 196             else:
 197                 return "%d seconds ago" % ago.seconds
 198         else:
 199             return "Just now!"
 200
 201     ## Retweet
 202     @staticmethod
 203     def isretweet(status):
 204         return bool(status.get("retweeted_status"))
 205
 206     ## Lists
 207     @staticmethod
 208     def get_listed_count(api, ret = None):
 209         listed = 0
 210         cursor = -1
 211
 212         while True:
 213             lists = api.lists_memberships(cursor = cursor)
 214             cursor = int(lists["next_cursor"])
 215             listed += len(lists["lists"])
 216             if cursor <= 0:
 217                 break
 218
 219         if ret != None: ret = listed
 220
 221         return listed
 222
 223     @staticmethod
 224     def listed_count_background(api, ret):
 225         th = threading.Thread(target = listed_count, args = (api, ret))
 226         th.isDaemon()
 227         th.start()
 228
 229     # Replace & -> &amp;
 230     @classmethod
 231     def replace_amp(cls, string):
 232         amp = string.find('&')
 233         if amp == -1:
 234             return string
 235
 236         entity_match = cls.reamp.finditer(string)
 237
 238         for m in entity_match:
 239             if m.group("name") not in ["gt", "lt", "amp"]:
 240                 # cannot use htmlentitydefs cheeb(ry...
 241                 string = string.replace(m.group(), m.expand("&amp;%s" % m.group("after")))
 242
 243         return string
 244
 245     @classmethod
 246     def replace_htmlentity(cls, string):
 247         amp = string.find('&')
 248         if amp == -1:
 249             return string
 250
 251         entity_match = cls.reentity.findall(string)
 252
 253         for name in entity_match:
 254             if name in htmlentitydefs.name2codepoint:
 255                 c = htmlentitydefs.name2codepoint[name]
 256                 string = string.replace("&%s;" % name, unichr(c))
 257
 258         return string