Paste code

#!/usr/bin/python

import os
import sys
import json
import time
import urllib
import tinyurl
import twitter
import subprocess

from random import randrange
from optparse import OptionParser

from pyseo.utils import getBrowser
from pyseo.dataminer.google import NewsFeedParser
from pyseo.dataminer.generic import getStoryFromHTML

trends_url = "http://search.twitter.com/trends.json"

def getTrends():
    """returns a list of trend keywords
    """
    kwds = []
    
    br = getBrowser()
    r = br.open(trends_url)
    trends = json.loads( r.read())

    for trend in trends['trends']:
        kwds.append( trend['name'])

    return kwds

def getNewsStories(kwds):
    """returns a list of gnews feeds for `kwds' list
    """
    urls = []

    for kwd in kwds:
        news = NewsFeedParser('"%s"' % kwd)
        urls += news.getStoriesURLs()

    return list(set(urls))

def main():
    parser = OptionParser(usage="""
    This program can be run in two different modes:

        - News story url fetcher (--trends)
        - Automated tweets (--username & --password)

    Note that the automated tweeting mode takes a very long time to complete,
    it will tweet every two to five minutes until it's stories urls are
    exausted. You should run this in a screen session.

                        TODO: process locking in automated tweeting mode
        
    %prog [options] [query string]""")
    parser.add_option("--username", "-u", action="store", type="string",
                      dest="username", help="specify the author's username")
    parser.add_option("--password", "-p", action="store", type="string",
                      dest="password", help="specify the author's password")
    parser.add_option("--trends", action="store_true", dest="trends",
                      help="retrieve stories URLs by trend")
    
    opts, args = parser.parse_args()

    if opts.trends:
        # we ignore the channel trends by ignoring trends with an '#'
        trends = [trend for trend in getTrends() if not '#' in trend]

        stories = {}
        for trend in trends:
            stories[trend] = getNewsStories(trend)

        for topic in stories:
            urls = []
            filename = topic.replace(" ", "_")
            f = file&#40;'/home/pyseo/stories/%s' % filename, 'w'&#41;
            for url in stories[topic]:
                urls.append(urllib.unquote(url.split("&usg;=")[0]))

            f.write("\n".join(urls))
            f.close()

    elif opts.username and opts.password:
        for root, dirs, files in os.walk('/home/pyseo/stories'):
            for name in files:
                topic = name.replace("_", " ")
                
                absfile = os.path.join(root, name)
                print absfile
                f = file&#40;absfile, "r"&#41;
                urls = f.readlines()
                f.close()

                while True:
                    if not urls:
                        break

                    url = urls.pop()
                    print url
                    story = getStoryFromHTML(url)
                    url = tinyurl.create_one(url)

                    # 2/10 posts include an URL in the tweet
                    if randrange(0, 11) not in (1, 9):
                        url = "."

                    excerpt = story[:140-len(url)-len(topic)-3].split(".")[0]
                    excerpt += " %s" % url

                    # once every ~3 posts prefix the excerpt with the trend name
                    if randrange(0, 3) == 1:
                        excerpt = topic + " " + excerpt

                    # once every ~3 posts postfix the excerpt with the trend name
                    elif randrange(0, 3) == 1:
                        excerpt += excerpt + " " + topic

                    twit = twitter.Api(username=opts.username, password=opts.password)

                    print "\n" + excerpt + "\n"

                    twit.PostUpdates(excerpt)
        
                    # post every 33 to 333 seconds for what it's worth...
                    time.sleep( randrange(33,167))

                    f = file&#40;absfile, "w"&#41;
                    f.write("\n".join(urls))
                    f.close()
    else:
        parser.print_help()


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print "User interrupted"
        sys.exit(1)