3 '''Useful functions for twitter
6 ################################################################################
8 # Copyright (c) 2010 University of Tsukuba Linux User Group
10 # This file is part of "gwit".
12 # "gwit" is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # "gwit" is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with "gwit". If not, see <http://www.gnu.org/licenses/>.
25 ################################################################################
32 class TwitterTools(object):
33 _urlpattern = u'''(?P<url>https?://[^\s ]*)'''
34 _userpattern = u'''@(?P<user>\w+)'''
35 _hashpattern = u'''#(?P<hashtag>\w+)'''
37 reurl = re.compile(_urlpattern)
38 reuser = re.compile(_userpattern)
39 rehash = re.compile(_hashpattern)
40 reentity = re.compile("&([A-Za-z]+);")
41 reamp = re.compile("&(?P<after>((?P<name>[A-Za-z]+);)?[^&]*)")
44 def get_footer(cls, status):
45 time = cls.get_time_hms(status.created_at)
46 ago = cls.get_time_ago(status.created_at)
48 if "source" in status.keys():
49 source = status.source_name
50 footer = u"[%s] %s via %s" % (
53 fotter = "DirectMessage?"
60 def get_decoration_text(cls, status):
64 for h in cls.get_hashtags(status):
66 "#" + h,'<span foreground="#666666">#%s</span>' % h)
69 if not status.entities:
71 '<span foreground="#0000FF" underline="single">\g<url></span>', text)
73 urls = status.entities.urls
74 if status.entities.get("media", None):
75 urls.extend(status.entities.media)
77 url = i.display_url if i.expanded_url else i.url
79 i.url,'<span foreground="#0000FF" underline="single">%s</span>' % url)
84 def get_urls_from_text(cls, text):
85 url_iter = cls.reurl.finditer(text)
86 # shorten_url, display_url
87 return [(i.group('url'), i.group('url')) for i in url_iter]
90 def get_urls(cls, status):
91 if cls.isretweet(status):
92 status = status.retweeted_status
95 return [(i.url, i.display_url if i.expanded_url else i.url)
96 for i in status.entities.urls]
98 return cls.get_urls_from_text(status.text)
101 def get_media_urls(cls, status):
102 if cls.isretweet(status):
103 status = status.retweeted_status
105 if status.entities and status.entities.get("media", None):
106 return [(i.url, i.display_url if i.expanded_url else i.url)
107 for i in status.entities.media]
113 def get_user_mentions(cls, status):
115 return [i.screen_name for i in status.entities.user_mentions]
117 match = cls.reuser.finditer(status.text)
118 return [i.group('user') for i in match]
122 def get_hashtags(cls, status):
123 if cls.isretweet(status):
124 status = status.retweeted_status
127 return [i.text for i in status.entities.hashtags]
129 match = cls.rehash.finditer(status.text)
130 return [i.group('hashtag') for i in match]
134 def get_source_name(source):
140 return unicode(source[i + 1:-4])
142 return unicode(source)
146 def get_datetime(timestr):
148 # Wed Nov 18 18:54:12 +0000 2009
149 format = "%m %d %H:%M:%S +0000 %Y"
151 'Jan' : 1, 'Feb' : 2, 'Mar' : 3,
152 'Apr' : 4, 'May' : 5, 'Jun' : 6,
153 'Jul' : 7, 'Aug' : 8, 'Sep' : 9,
154 'Oct' : 10, 'Nov' : 11, 'Dec' : 12
157 t = "%02d %s" % (m[timestr[4:7]], timestr[8:])
158 dt = datetime.datetime.strptime(t, format)
159 offset = time.altzone if time.daylight else time.timezone
160 dt -= datetime.timedelta(seconds = offset)
164 def get_time_hms(dt):
165 return dt.strftime("%H:%M:%S")
168 def get_time_ago(dt):
169 now = datetime.datetime.now()
175 hours = ago.seconds / 3600
176 minutes = ago.seconds / 60
182 return "%d days ago" % ago.days
187 return "%d hours ago" % hours
190 return "1 minute ago"
192 return "%d minutes ago" % minutes
195 return "1 second ago"
197 return "%d seconds ago" % ago.seconds
203 def isretweet(status):
204 return bool(status.get("retweeted_status"))
208 def get_listed_count(api, ret = None):
213 lists = api.lists_memberships(cursor = cursor)
214 cursor = int(lists["next_cursor"])
215 listed += len(lists["lists"])
219 if ret != None: ret = listed
224 def listed_count_background(api, ret):
225 th = threading.Thread(target = listed_count, args = (api, ret))
231 def replace_amp(cls, string):
232 amp = string.find('&')
236 entity_match = cls.reamp.finditer(string)
238 for m in entity_match:
239 if m.group("name") not in ["gt", "lt", "amp"]:
240 # cannot use htmlentitydefs cheeb(ry...
241 string = string.replace(m.group(), m.expand("&%s" % m.group("after")))
246 def replace_htmlentity(cls, string):
247 amp = string.find('&')
251 entity_match = cls.reentity.findall(string)
253 for name in entity_match:
254 if name in htmlentitydefs.name2codepoint:
255 c = htmlentitydefs.name2codepoint[name]
256 string = string.replace("&%s;" % name, unichr(c))