3 '''Useful functions for twitter
6 ################################################################################
8 # Copyright (c) 2010 University of Tsukuba Linux User Group
10 # This file is part of "gwit".
12 # "gwit" is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # "gwit" is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with "gwit". If not, see <http://www.gnu.org/licenses/>.
25 ################################################################################
33 class TwitterTools(object):
34 _urlpattern = u'''(?P<url>https?://[^\s ]*)'''
35 _userpattern = u'''@(?P<user>\w+)'''
36 _hashpattern = u'''#(?P<hashtag>\w+)'''
38 reurl = re.compile(_urlpattern)
39 reuser = re.compile(_userpattern)
40 rehash = re.compile(_hashpattern)
41 reentity = re.compile("&([A-Za-z]+);")
42 reamp = re.compile("&(?P<after>((?P<name>[A-Za-z]+);)?[^&]*)")
45 def get_footer(cls, status):
46 time = cls.get_time_hms(status.created_at)
47 ago = cls.get_time_ago(status.created_at)
49 if "source" in status.keys():
50 source = status.source_name
51 footer = u"[%s] %s via %s" % (
54 fotter = "DirectMessage?"
61 def get_decoration_text(cls, status):
65 for h in cls.get_hashtags(status):
67 "#" + h,'<span foreground="#666666">#%s</span>' % h)
70 if not status.entities:
72 '<span foreground="#0000FF" underline="single">\g<url></span>', text)
74 urls = status.entities.urls
75 if status.entities.get("media", None):
76 urls.extend(status.entities.media)
78 url = i.display_url if i.expanded_url else i.url
80 i.url,'<span foreground="#0000FF" underline="single">%s</span>' % url)
85 def get_urls_from_text(cls, text):
86 url_iter = cls.reurl.finditer(text)
87 # shorten_url, display_url
88 return [(i.group('url'), i.group('url')) for i in url_iter]
91 def get_urls(cls, status):
92 if cls.isretweet(status):
93 status = status.retweeted_status
96 return [(i.url, i.display_url if i.expanded_url else i.url)
97 for i in status.entities.urls]
99 return cls.get_urls_from_text(status.text)
102 def get_media_urls(cls, status):
103 if cls.isretweet(status):
104 status = status.retweeted_status
106 if status.entities and status.entities.get("media", None):
107 return [(i.url, i.display_url if i.expanded_url else i.url)
108 for i in status.entities.media]
114 def get_user_mentions(cls, status):
116 return [i.screen_name for i in status.entities.user_mentions]
118 match = cls.reuser.finditer(status.text)
119 return [i.group('user') for i in match]
123 def get_hashtags(cls, status):
124 if cls.isretweet(status):
125 status = status.retweeted_status
128 return [i.text for i in status.entities.hashtags]
130 match = cls.rehash.finditer(status.text)
131 return [i.group('hashtag') for i in match]
135 def get_source_name(source):
141 return unicode(source[i + 1:-4])
143 return unicode(source)
147 def get_datetime(timestr):
149 # Wed Nov 18 18:54:12 +0000 2009
150 format = "%m %d %H:%M:%S +0000 %Y"
152 'Jan' : 1, 'Feb' : 2, 'Mar' : 3,
153 'Apr' : 4, 'May' : 5, 'Jun' : 6,
154 'Jul' : 7, 'Aug' : 8, 'Sep' : 9,
155 'Oct' : 10, 'Nov' : 11, 'Dec' : 12
158 t = "%02d %s" % (m[timestr[4:7]], timestr[8:])
159 dt = datetime.datetime.strptime(t, format)
160 offset = time.altzone if time.daylight else time.timezone
161 dt -= datetime.timedelta(seconds = offset)
165 def get_time_hms(dt):
166 return dt.strftime("%H:%M:%S")
169 def get_time_ago(dt):
170 now = datetime.datetime.now()
176 hours = ago.seconds / 3600
177 minutes = ago.seconds / 60
183 return "%d days ago" % ago.days
188 return "%d hours ago" % hours
191 return "1 minute ago"
193 return "%d minutes ago" % minutes
196 return "1 second ago"
198 return "%d seconds ago" % ago.seconds
204 def isretweet(status):
205 return bool(status.get("retweeted_status"))
209 def get_listed_count(api, ret = None):
214 lists = api.lists_memberships(cursor = cursor)
215 cursor = int(lists["next_cursor"])
216 listed += len(lists["lists"])
220 if ret != None: ret = listed
225 def listed_count_background(api, ret):
226 th = threading.Thread(target = listed_count, args = (api, ret))
232 def replace_amp(cls, string):
233 amp = string.find('&')
237 entity_match = cls.reamp.finditer(string)
239 for m in entity_match:
240 if m.group("name") not in ["gt", "lt", "amp"]:
241 # cannot use htmlentitydefs cheeb(ry...
242 string = string.replace(m.group(), m.expand("&%s" % m.group("after")))
247 def replace_htmlentity(cls, string):
248 amp = string.find('&')
252 entity_match = cls.reentity.findall(string)
254 for name in entity_match:
255 if name in htmlentitydefs.name2codepoint:
256 c = htmlentitydefs.name2codepoint[name]
257 string = string.replace("&%s;" % name, unichr(c))
262 def is_bitly_url(cls, urls):
263 if url.startswith(("http://bit.ly", "http://j.mp")):
264 return bitly.Bitly.expand(url)[0]
269 def url_shorten(cls, text):
270 urls = TwitterTools.get_urls_from_text(text)
272 shorturl = bitly.Bitly.shorten(longurl)
273 text = text.replace(longurl, shorturl)