gwit/twittertools.py

   1 #-*- coding: utf-8 -*-
   2
   3 '''Useful functions for twitter
   4 '''
   5
   6 ################################################################################
   7 #
   8 # Copyright (c) 2010 University of Tsukuba Linux User Group
   9 #
  10 # This file is part of "gwit".
  11 #
  12 # "gwit" is free software: you can redistribute it and/or modify
  13 # it under the terms of the GNU General Public License as published by
  14 # the Free Software Foundation, either version 3 of the License, or
  15 # (at your option) any later version.
  16 #
  17 # "gwit" is distributed in the hope that it will be useful,
  18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 # GNU General Public License for more details.
  21 #
  22 # You should have received a copy of the GNU General Public License
  23 # along with "gwit".  If not, see <http://www.gnu.org/licenses/>.
  24 #
  25 ################################################################################
  26
  27
  28 import re
  29 import datetime
  30 import htmlentitydefs
  31 import bitly
  32
  33 class TwitterTools(object):
  34     _urlpattern = u'''(?P<url>https?://[^\s　]*)'''
  35     _userpattern = u'''@(?P<user>\w+)'''
  36     _hashpattern = u'''#(?P<hashtag>\w+)'''
  37
  38     reurl = re.compile(_urlpattern)
  39     reuser = re.compile(_userpattern)
  40     rehash = re.compile(_hashpattern)
  41     reentity = re.compile("&([A-Za-z]+);")
  42     reamp = re.compile("&(?P<after>((?P<name>[A-Za-z]+);)?[^&]*)")
  43
  44     @classmethod
  45     def get_footer(cls, status):
  46         time = cls.get_time_hms(status.created_at)
  47         ago = cls.get_time_ago(status.created_at)
  48
  49         if "source" in status.keys():
  50             source = status.source_name
  51             footer = u"[%s] %s via %s" % (
  52                 time, ago, source)
  53         else:
  54             fotter = "DirectMessage?"
  55
  56         return footer
  57
  58     ## Status
  59     # Text decoration
  60     @classmethod
  61     def get_decoration_text(cls, status):
  62         text = status.text
  63
  64         # replace hashtags
  65         for h in cls.get_hashtags(status):
  66             text = text.replace(
  67                 "#" + h,'<span foreground="#666666">#%s</span>' % h)
  68
  69         # replace URL
  70         if not status.entities:
  71             text = cls.reurl.sub(
  72                 '<span foreground="#0000FF" underline="single">\g<url></span>', text)
  73         else:
  74             urls = status.entities.urls
  75             if status.entities.get("media", None):
  76                 urls.extend(status.entities.media)
  77             for i in urls:
  78                 url = i.display_url if i.expanded_url else i.url
  79                 text = text.replace(
  80                     i.url,'<span foreground="#0000FF" underline="single">%s</span>' % url)
  81
  82         return text
  83
  84     @classmethod
  85     def get_urls_from_text(cls, text):
  86         url_iter = cls.reurl.finditer(text)
  87         # shorten_url, display_url
  88         return [(i.group('url'), i.group('url')) for i in url_iter]
  89
  90     @classmethod
  91     def get_urls(cls, status):
  92         if cls.isretweet(status):
  93             status = status.retweeted_status
  94
  95         if status.entities:
  96             return [(i.url, i.display_url if i.expanded_url else i.url)
  97                     for i in status.entities.urls]
  98         else:
  99             return cls.get_urls_from_text(status.text)
 100
 101     @classmethod
 102     def get_media_urls(cls, status):
 103         if cls.isretweet(status):
 104             status = status.retweeted_status
 105
 106         if status.entities and status.entities.get("media", None):
 107             return [(i.url, i.display_url if i.expanded_url else i.url)
 108                     for i in status.entities.media]
 109         else:
 110             return []
 111
 112     # User
 113     @classmethod
 114     def get_user_mentions(cls, status):
 115         if status.entities:
 116             return [i.screen_name for i in status.entities.user_mentions]
 117         else:
 118             match = cls.reuser.finditer(status.text)
 119             return [i.group('user') for i in match]
 120
 121     # Hashtags
 122     @classmethod
 123     def get_hashtags(cls, status):
 124         if cls.isretweet(status):
 125             status = status.retweeted_status
 126
 127         if status.entities:
 128             return [i.text for i in status.entities.hashtags]
 129         else:
 130             match = cls.rehash.finditer(status.text)
 131             return [i.group('hashtag') for i in match]
 132
 133     # source
 134     @staticmethod
 135     def get_source_name(source):
 136         if source == "web":
 137             return u"web"
 138         else:
 139             i = source.find(">")
 140             if i != -1:
 141                 return unicode(source[i + 1:-4])
 142             else:
 143                 return unicode(source)
 144
 145     ## Datetime
 146     @staticmethod
 147     def get_datetime(timestr):
 148         # Sample
 149         # Wed Nov 18 18:54:12 +0000 2009
 150         format = "%m %d %H:%M:%S +0000 %Y"
 151         m = {
 152             'Jan' : 1, 'Feb' : 2, 'Mar' : 3,
 153             'Apr' : 4, 'May' : 5, 'Jun' : 6,
 154             'Jul' : 7, 'Aug' : 8, 'Sep' : 9,
 155             'Oct' : 10, 'Nov' : 11, 'Dec' : 12
 156             }
 157
 158         t = "%02d %s" % (m[timestr[4:7]], timestr[8:])
 159         dt = datetime.datetime.strptime(t, format)
 160         offset = time.altzone if time.daylight else time.timezone
 161         dt -= datetime.timedelta(seconds = offset)
 162         return dt
 163
 164     @staticmethod
 165     def get_time_hms(dt):
 166         return dt.strftime("%H:%M:%S")
 167
 168     @staticmethod
 169     def get_time_ago(dt):
 170         now = datetime.datetime.now()
 171
 172         if now < dt:
 173             return "Just now!"
 174
 175         ago = now - dt
 176         hours = ago.seconds / 3600
 177         minutes = ago.seconds / 60
 178
 179         if ago.days:
 180             if ago.days == 1:
 181                 return "1 day ago"
 182             else:
 183                 return "%d days ago" % ago.days
 184         elif hours:
 185             if hours == 1:
 186                 return "1 hour ago"
 187             else:
 188                 return "%d hours ago" % hours
 189         elif minutes:
 190             if minutes == 1:
 191                 return "1 minute ago"
 192             else:
 193                 return "%d minutes ago" % minutes
 194         elif ago.seconds:
 195             if ago.seconds == 1:
 196                 return "1 second ago"
 197             else:
 198                 return "%d seconds ago" % ago.seconds
 199         else:
 200             return "Just now!"
 201
 202     ## Retweet
 203     @staticmethod
 204     def isretweet(status):
 205         return bool(status.get("retweeted_status"))
 206
 207     ## Lists
 208     @staticmethod
 209     def get_listed_count(api, ret = None):
 210         listed = 0
 211         cursor = -1
 212
 213         while True:
 214             lists = api.lists_memberships(cursor = cursor)
 215             cursor = int(lists["next_cursor"])
 216             listed += len(lists["lists"])
 217             if cursor <= 0:
 218                 break
 219
 220         if ret != None: ret = listed
 221
 222         return listed
 223
 224     @staticmethod
 225     def listed_count_background(api, ret):
 226         th = threading.Thread(target = listed_count, args = (api, ret))
 227         th.isDaemon()
 228         th.start()
 229
 230     # Replace & -> &amp;
 231     @classmethod
 232     def replace_amp(cls, string):
 233         amp = string.find('&')
 234         if amp == -1:
 235             return string
 236
 237         entity_match = cls.reamp.finditer(string)
 238
 239         for m in entity_match:
 240             if m.group("name") not in ["gt", "lt", "amp"]:
 241                 # cannot use htmlentitydefs cheeb(ry...
 242                 string = string.replace(m.group(), m.expand("&amp;%s" % m.group("after")))
 243
 244         return string
 245
 246     @classmethod
 247     def replace_htmlentity(cls, string):
 248         amp = string.find('&')
 249         if amp == -1:
 250             return string
 251
 252         entity_match = cls.reentity.findall(string)
 253
 254         for name in entity_match:
 255             if name in htmlentitydefs.name2codepoint:
 256                 c = htmlentitydefs.name2codepoint[name]
 257                 string = string.replace("&%s;" % name, unichr(c))
 258
 259         return string
 260
 261     @classmethod
 262     def is_bitly_url(cls, urls):
 263         if url.startswith(("http://bit.ly", "http://j.mp")):
 264             return bitly.Bitly.expand(url)[0]
 265         else:
 266             return False
 267
 268     @classmethod
 269     def url_shorten(cls, text):
 270         urls = TwitterTools.get_urls_from_text(text)
 271         for longurl in urls:
 272             shorturl = bitly.Bitly.shorten(longurl)
 273             text = text.replace(longurl, shorturl)
 274
 275         return text
 276