Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as Python by aspidites ( 17 years ago )
#! /usr/bin/env python

""" Copyright (C) 2009 Edwin Marshall <[email protected]>

    This file is part of "Project 339".

    "Project 339" is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    "Project 339" is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with "Project 339".  If not, see <http://www.gnu.org/licenses/>. 
"""

from os.path import exists, splitext # Do I still need splitext?
from os import makedirs
import sys
import pickle
import sqlite3
from urllib2 import urlopen
from lxml.html import parse
from PyQt4.QtCore import QCoreApplication, QSettings, QDir

reload(sys)
sys.setdefaultencoding('utf-8')

try:
    import win32api
    WINDOWS = True
except ImportError:
    WINDOWS = False

class Database(object):
    """ P339's database backend.
        
        This class is responsible for the creation of, and editing of P339's
        database. 
    """
    def __init__(self):
        """Connects to database, creating a new one if it isn"t found"""

        settings = QSettings()
        if WINDOWS:
            self.settingsDir = QDir(os.path.dirname(os.path.abspath(__file__)))
        else:
            self.settingsDir = QDir(settings.fileName())
            self.settingsDir.cdUp()

        self.statePath = str(self.settingsDir.path() + "/state.pkl")
        self.dbPath = str(self.settingsDir.path() + "/p339.db")
        print WINDOWS
        print self.settingsDir.path()
        print self.dbPath
        if not exists(self.dbPath):
            self.conn = sqlite3.connect(self.dbPath)
            self.createTables()
        else:
            self.conn = sqlite3.connect(self.dbPath)

    def createTables(self):
        """ Poplulats database with empty tables.
        
        """
        # Set up tables that rarely change.
        self.conn.execute("""CREATE TABLE versions 
                             (versionName TEXT PRIMARY KEY COLLATE NOCASE)""")

        self.conn.execute("""CREATE TABLE attributes 
                             (attributeName TEXT PRIMARY KEY COLLATE NOCASE, 
                              attributeIcon TEXT)""")

        self.conn.execute("""CREATE TABLE types 
                             (typeName TEXT PRIMARY KEY COLLATE NOCASE, 
                              typeIcon TEXT)""")

        self.conn.execute("""CREATE TABLE subtypes 
                             (subtypeName TEXT PRIMARY KEY COLLATE NOCASE, 
                              subtypeIcon TEXT)""")

        self.conn.execute("""CREATE TABLE levels 
                           (levelValue NUMERIC PRIMARY KEY,
                            levelIcon TEXT)""")

        # Set up tables that will have varied data
        self.conn.execute("""CREATE TABLE sets (
                  setImage TEXT,
                  setPrefix TEXT COLLATE NOCASE,
                  setName TEXT PRIMARY KEY COLLATE NOCASE,
                  version TEXT COLLATE NOCASE)""")
        self.conn.commit()

        # DON'T FORGET TO RESEARCH MULTIPLE FOREIGN KEYS
        self.conn.execute("""CREATE TABLE cards (
                  cardImage TEXT,
                  cardPrefix TEXT COLLATE NOCASE,
                  number TEXT,
                  cardName TEXT COLLATE NOCASE,
                  type TEXT COLLATE NOCASE,
                  subtypes TEXT COLLATE NOCASE,
                  lore TEXT COLLATE NOCASE,
                  attribute TEXT COLLATE NOCASE,
                  level NUMERIC,
                  attack NUMERIC,
                  defense NUMERIC,
                  PRIMARY KEY (cardPrefix, number))""")
        self.conn.commit() 

    def getSetInfo(self, version, withImages=2, setIndex=0,
                     baseUrl="http://yugioh.wikia.com"):
        setListPage = "/wiki/%s_Set_Prefixes" % version 
        page = parse(baseUrl + setListPage).getroot()
        setAnchors = page.cssselect("+ul li a")
            
        for index in range(setIndex, len(setAnchors)):
            if None is not setAnchors[index].get("title") and 
               len(setAnchors[index].get("title")) < 5:
                if "redlink" in setAnchors[index+1].get("href"):
                    yield {"info": setAnchors[index].text_content(),
                           "status": "missing", "index": index}
                    continue
                else:
                    # Append set prefix, name, version, and url.
                    set = [
                           u"Unavailable",
                           unicode(setAnchors[index].text_content()),
                           unicode(setAnchors[index+1].get("title")),
                          ]
                    if "TCG" in setListPage:
                        set.append(u"TCG")
                    elif "OCG" in setListPage:
                        set.append(u"OCG")
                    set.append(setAnchors[index+1].get("href"))

                    setUrl = set[4]
                    page = parse(baseUrl + setUrl).getroot()
                    suffix = ""

                    for a in page.cssselect("p i b a"):
                        set[4] = a.get("href")
                        if "TCG" in setListPage:
                            suffix = "TCG-EN"
                        elif "OCG" in setListPage:
                            suffix = "OCG-EN"
                        if suffix in set[4] and "List" in set[4] and 
                           "redlink" not in set[4]:
                            break

                    if suffix not in set[4] or "redlink" in set[4] or 
                        "List" not in set[4]:
                        yield {"info": set, "status": "missing card list", 
                               "index": index}
                        continue
                    # Retreive set image from set page.
                    for img in page.cssselect("div a.image img"):
                        if "EN" in img.get("src") or "NA" in img.get("src"): 
                            if withImages == 1:
                                setDir = "%s/images/%s" %(self.settingsDir.path(), 
                                                          set[1].lower())
                                if not exists(setDir):
                                    makedirs(setDir)
                                image = "%s/%s.%s" %(setDir, set[1].lower(), 
                                                     img.get("src")[-3:])
                                buffer = urlopen(img.get("src")).read()

                                open(image, "wb").write(buffer)
                            elif withImages == 2:
                                image = img.get("src")
                            set[0] = image

                yield {"info": set, "status": "complete", "index": index}
    
    def getCardInfo(self, set, withImages=2, card_index=0,
                      baseUrl="http://yugioh.wikia.com"):
        """ Generator function that retrieves card info from wiki and returns
        it as a dictionary with info, status, and index keys.

        """
        # Start a new card list.
        listPage = set[4] 
        page = parse(baseUrl + listPage).getroot()
        # Retrieve caord prefixes, numbers and urls.
        cardAnchors = page.cssselect("table.sortable a")
        missingCards = []
        [IMAGE, PREFIX, NUMBER, NAME, TYPE, SUBTYPES, LORE, ATTRIBUTE,
         LEVEL, ATK, DEF] = range(11)

        for cIndex in range(card_index, len(cardAnchors)):
            if set[1] in cardAnchors[cIndex].text_content():
                card = [u"N/A" for value in range(11)]
                card[IMAGE] = u"Unavailable"
                card[PREFIX] = unicode(cardAnchors[cIndex].text_content(). 
                                                             split("-")[0]. 
                                                             strip())
                card[NUMBER] = unicode(cardAnchors[cIndex].text_content(). 
                                                             split("-")[1]. 
                                                             strip("EN"))
                card[NAME] = unicode(cardAnchors[cIndex+1].text_content()). 
                                                             strip()

                cardPage = cardAnchors[cIndex].get("href")
                page = parse(baseUrl + cardPage).getroot()
                rawData = []
                
                for table in page.cssselect("table[border=1]"):
                    rawData = unicode(table.text_content()).split("
")
                    break

                for index in range(len(rawData)):
                    rawData[index] = rawData[index].strip()
                    if rawData[index] == "Attribute:":
                        card[ATTRIBUTE] = rawData[index+1].strip()
                    elif rawData[index] == "Type:":
                        card[TYPE] = rawData[index+1].split(" Cards")[0]. 
                                                       strip()
                        card[SUBTYPES] = u"Normal"
                    elif rawData[index] == "Types:":
                        card[TYPE] = rawData[index+1].split("/")[0].strip()
                        for subtype in rawData[index+1].split("/")[1:]:
                            card[SUBTYPES] += "/%s" % str(subtype)
                        card[SUBTYPES] = card[SUBTYPES].split(u"N/A/")[1]
                    elif rawData[index] == "Property:":
                        card[SUBTYPES] = rawData[index+1].strip()
                    elif rawData[index] == "Level:":
                        card[LEVEL] = rawData[index+1].strip("Level ")
                    elif rawData[index] == "ATK/DEF:":
                        card[ATK] = rawData[index+1].split("/")[0].strip()
                        card[DEF] = rawData[index+1].split("/")[1].strip()
                    elif rawData[index] == "Lore(s):":
                        card[LORE] = rawData[index+1].split(u"vxa0u2022xa0d"
                                                             u"xa0u2022xa0e"
                                                             u"xa0")[1]. 
                                                       split("English:")[1]. 
                                                       split("TCG")[0]. 
                                                       strip()
                if card[NAME] == u"N/A":
                    yield {"info": card, "status": "missing", "index": cIndex}
                    continue

                for img in page.cssselect("td center a.image img"): 
                    if "EN" in img.get("src") or "NA" in img.get("src"):
                        if withImages == 1:
                            setDir = "%s/images/%s" %(self.settingsDir.path(), set[1].lower())
                            if not exists(setDir):
                                makedirs(setDir)
                            image = "%s/%s-%s.%s" %(setDir, card[1].lower(), 
                                                    card[2], 
                                                    img.get("src")[-3:])
                            buffer = urlopen(img.get("src")).read()
                            open(image, "wb").write(buffer)
                        elif withImages == 2:
                            image = img.get("src")
                        card[IMAGE] = image

                yield {"info": card, "status": "exists", "index": cIndex}

    def fillTables(self, withImages=2):
        """ Fills the database with data gathered from the wiki.

            fiilTables optionally takes one option which determines how to save
            images as follows:
                0 - Don't save images at all.
                1 - Save images locally
                2 - Only load images on demand.
            
            **TODO**
            - look into returning lists to add in updating exising records.
            - implement updating exisintg records (ie, for images)
            - implement error handling to deal with 404, 405, etc errors.
            - clean up layout of code, deciding on a consistant indentation
              scheme.
        """
        if exists(self.statePath):
            state = pickle.load(open(self.statePath, "rb"))
            versions = state[0]
            missingSets = state[1]
            missingCardLists = state[2]
            missingCards = state[3]
            indexes = state[4]
        else:
            versions = ["TCG", "OCG"]
            missingSets = []
            missingCardLists = []
            missingCards = []
            indexes = {"version": 0, "set": 0, "card": 0}
            state = [versions, missingSets, missingCardLists, 
                     missingCards, indexes]

        self.conn.isolation_level = "EXCLUSIVE"

        for index in range(indexes["version"], len(versions)):
            for set in self.getSetInfo(versions[index], withImages, indexes["set"]):
                indexes["set"] = set["index"]
                if set["status"] == "complete":
                    for card in self.getCardInfo(set["info"], withImages,
                                                   indexes["card"]):
                        indexes["card"] = card["index"]
                        if card["status"] == "exists":
                            self.conn.execute("""INSERT OR IGNORE INTO types
                                                 VALUES (?, "Unavailable")""",
                                                 [card["info"][4]])
                            self.conn.execute("""INSERT OR IGNORE INTO subtypes
                                                 VALUES (?, "Unavailable")""",
                                                 [card["info"][5]])
                            self.conn.execute("""INSERT OR IGNORE INTO
                                                 attributes
                                                 VALUES (?, "Unavailable")""",
                                                 [card["info"][7]])
                            self.conn.execute("""INSERT OR IGNORE INTO levels
                                                 VALUES (?, "Unavailable")""",
                                                 [card["info"][8]])
                            self.conn.execute("""INSERT OR IGNORE INTO cards
                                                 VALUES (?, ?, ?, ?, ?, ?, ?,
                                                 ?, ?, ?, ?)""", card["info"])
                            self.conn.commit()
                            print "%s-%s: %s added to database." 
                                   % (card["info"][1], card["info"][2],
                                     card["info"][3])
                        elif card["status"] == "missing" and 
                             card["info"] not in missingCards:
                                 missingCards.append(card["info"])
                        pickle.dump(state, open(self.statePath, "wb")) 
                        
                    indexes["card"] = 0
                    del set["info"][4]
                    self.conn.execute("""INSERT OR IGNORE INTO versions
                                         VALUES (?)""", [set["info"][3]])
                    self.conn.execute("""INSERT OR IGNORE INTO sets 
                                      VALUES (?, ?, ?, ?)""", set["info"])
                    self.conn.commit()
                    print "%s: %s added to database." 
                           %(set["info"][3], set["info"][2])
                elif set["status"] == "missing" and set["info"] not in missingSets:
                    missingSets.append(set["info"])
                elif set["status"] == "missing card list" and 
                     set["info"] not in missingCardLists:
                        missingCardLists.append(set["info"]) 
                pickle.dump(state, open(self.statePath, "wb"))
            indexes["set"] = 0


        self.conn.isolation_level = "None"

if __name__ == "__main__":
    app = QCoreApplication(sys.argv)
    app.setOrganizationName("Project 339")
    app.setOrganizationDomain("p339.tuxfamily.org")
    app.setApplicationName("P339 Database")
    Database().fillTables(1)

 

Revise this Paste

Your Name: Code Language: