Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python by self ( 17 years ago )
#!/usr/bin/python
import os
import sys
import json
import time
import urllib
import tinyurl
import twitter
import subprocess
from random import randrange
from optparse import OptionParser
from pyseo.utils import getBrowser
from pyseo.dataminer.google import NewsFeedParser
from pyseo.dataminer.generic import getStoryFromHTML
trends_url = "http://search.twitter.com/trends.json"
def getTrends():
"""returns a list of trend keywords
"""
kwds = []
br = getBrowser()
r = br.open(trends_url)
trends = json.loads( r.read())
for trend in trends['trends']:
kwds.append( trend['name'])
return kwds
def getNewsStories(kwds):
"""returns a list of gnews feeds for `kwds' list
"""
urls = []
for kwd in kwds:
news = NewsFeedParser('"%s"' % kwd)
urls += news.getStoriesURLs()
return list(set(urls))
def main():
parser = OptionParser(usage="""
This program can be run in two different modes:
- News story url fetcher (--trends)
- Automated tweets (--username & --password)
Note that the automated tweeting mode takes a very long time to complete,
it will tweet every two to five minutes until it's stories urls are
exausted. You should run this in a screen session.
TODO: process locking in automated tweeting mode
%prog [options] [query string]""")
parser.add_option("--username", "-u", action="store", type="string",
dest="username", help="specify the author's username")
parser.add_option("--password", "-p", action="store", type="string",
dest="password", help="specify the author's password")
parser.add_option("--trends", action="store_true", dest="trends",
help="retrieve stories URLs by trend")
opts, args = parser.parse_args()
if opts.trends:
# we ignore the channel trends by ignoring trends with an '#'
trends = [trend for trend in getTrends() if not '#' in trend]
stories = {}
for trend in trends:
stories[trend] = getNewsStories(trend)
for topic in stories:
urls = []
filename = topic.replace(" ", "_")
f = file('/home/pyseo/stories/%s' % filename, 'w')
for url in stories[topic]:
urls.append(urllib.unquote(url.split("&usg;=")[0]))
f.write("\n".join(urls))
f.close()
elif opts.username and opts.password:
for root, dirs, files in os.walk('/home/pyseo/stories'):
for name in files:
topic = name.replace("_", " ")
absfile = os.path.join(root, name)
print absfile
f = file(absfile, "r")
urls = f.readlines()
f.close()
while True:
if not urls:
break
url = urls.pop()
print url
story = getStoryFromHTML(url)
url = tinyurl.create_one(url)
# 2/10 posts include an URL in the tweet
if randrange(0, 11) not in (1, 9):
url = "."
excerpt = story[:140-len(url)-len(topic)-3].split(".")[0]
excerpt += " %s" % url
# once every ~3 posts prefix the excerpt with the trend name
if randrange(0, 3) == 1:
excerpt = topic + " " + excerpt
# once every ~3 posts postfix the excerpt with the trend name
elif randrange(0, 3) == 1:
excerpt += excerpt + " " + topic
twit = twitter.Api(username=opts.username, password=opts.password)
print "\n" + excerpt + "\n"
twit.PostUpdates(excerpt)
# post every 33 to 333 seconds for what it's worth...
time.sleep( randrange(33,167))
f = file(absfile, "w")
f.write("\n".join(urls))
f.close()
else:
parser.print_help()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print "User interrupted"
sys.exit(1)
Revise this Paste