Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python by aspidites ( 17 years ago )
#! /usr/bin/env python
""" Copyright (C) 2009 Edwin Marshall <[email protected]>
This file is part of "Project 339".
"Project 339" is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
"Project 339" is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with "Project 339". If not, see <http://www.gnu.org/licenses/>.
"""
from os.path import exists, splitext # Do I still need splitext?
from os import makedirs
import sys
import pickle
import sqlite3
from urllib2 import urlopen
from lxml.html import parse
from PyQt4.QtCore import QCoreApplication, QSettings, QDir
reload(sys)
sys.setdefaultencoding('utf-8')
try:
import win32api
WINDOWS = True
except ImportError:
WINDOWS = False
class Database(object):
""" P339's database backend.
This class is responsible for the creation of, and editing of P339's
database.
"""
def __init__(self):
"""Connects to database, creating a new one if it isn"t found"""
settings = QSettings()
if WINDOWS:
self.settingsDir = QDir(os.path.dirname(os.path.abspath(__file__)))
else:
self.settingsDir = QDir(settings.fileName())
self.settingsDir.cdUp()
self.statePath = str(self.settingsDir.path() + "/state.pkl")
self.dbPath = str(self.settingsDir.path() + "/p339.db")
print WINDOWS
print self.settingsDir.path()
print self.dbPath
if not exists(self.dbPath):
self.conn = sqlite3.connect(self.dbPath)
self.createTables()
else:
self.conn = sqlite3.connect(self.dbPath)
def createTables(self):
""" Poplulats database with empty tables.
"""
# Set up tables that rarely change.
self.conn.execute("""CREATE TABLE versions
(versionName TEXT PRIMARY KEY COLLATE NOCASE)""")
self.conn.execute("""CREATE TABLE attributes
(attributeName TEXT PRIMARY KEY COLLATE NOCASE,
attributeIcon TEXT)""")
self.conn.execute("""CREATE TABLE types
(typeName TEXT PRIMARY KEY COLLATE NOCASE,
typeIcon TEXT)""")
self.conn.execute("""CREATE TABLE subtypes
(subtypeName TEXT PRIMARY KEY COLLATE NOCASE,
subtypeIcon TEXT)""")
self.conn.execute("""CREATE TABLE levels
(levelValue NUMERIC PRIMARY KEY,
levelIcon TEXT)""")
# Set up tables that will have varied data
self.conn.execute("""CREATE TABLE sets (
setImage TEXT,
setPrefix TEXT COLLATE NOCASE,
setName TEXT PRIMARY KEY COLLATE NOCASE,
version TEXT COLLATE NOCASE)""")
self.conn.commit()
# DON'T FORGET TO RESEARCH MULTIPLE FOREIGN KEYS
self.conn.execute("""CREATE TABLE cards (
cardImage TEXT,
cardPrefix TEXT COLLATE NOCASE,
number TEXT,
cardName TEXT COLLATE NOCASE,
type TEXT COLLATE NOCASE,
subtypes TEXT COLLATE NOCASE,
lore TEXT COLLATE NOCASE,
attribute TEXT COLLATE NOCASE,
level NUMERIC,
attack NUMERIC,
defense NUMERIC,
PRIMARY KEY (cardPrefix, number))""")
self.conn.commit()
def getSetInfo(self, version, withImages=2, setIndex=0,
baseUrl="http://yugioh.wikia.com"):
setListPage = "/wiki/%s_Set_Prefixes" % version
page = parse(baseUrl + setListPage).getroot()
setAnchors = page.cssselect("+ul li a")
for index in range(setIndex, len(setAnchors)):
if None is not setAnchors[index].get("title") and
len(setAnchors[index].get("title")) < 5:
if "redlink" in setAnchors[index+1].get("href"):
yield {"info": setAnchors[index].text_content(),
"status": "missing", "index": index}
continue
else:
# Append set prefix, name, version, and url.
set = [
u"Unavailable",
unicode(setAnchors[index].text_content()),
unicode(setAnchors[index+1].get("title")),
]
if "TCG" in setListPage:
set.append(u"TCG")
elif "OCG" in setListPage:
set.append(u"OCG")
set.append(setAnchors[index+1].get("href"))
setUrl = set[4]
page = parse(baseUrl + setUrl).getroot()
suffix = ""
for a in page.cssselect("p i b a"):
set[4] = a.get("href")
if "TCG" in setListPage:
suffix = "TCG-EN"
elif "OCG" in setListPage:
suffix = "OCG-EN"
if suffix in set[4] and "List" in set[4] and
"redlink" not in set[4]:
break
if suffix not in set[4] or "redlink" in set[4] or
"List" not in set[4]:
yield {"info": set, "status": "missing card list",
"index": index}
continue
# Retreive set image from set page.
for img in page.cssselect("div a.image img"):
if "EN" in img.get("src") or "NA" in img.get("src"):
if withImages == 1:
setDir = "%s/images/%s" %(self.settingsDir.path(),
set[1].lower())
if not exists(setDir):
makedirs(setDir)
image = "%s/%s.%s" %(setDir, set[1].lower(),
img.get("src")[-3:])
buffer = urlopen(img.get("src")).read()
open(image, "wb").write(buffer)
elif withImages == 2:
image = img.get("src")
set[0] = image
yield {"info": set, "status": "complete", "index": index}
def getCardInfo(self, set, withImages=2, card_index=0,
baseUrl="http://yugioh.wikia.com"):
""" Generator function that retrieves card info from wiki and returns
it as a dictionary with info, status, and index keys.
"""
# Start a new card list.
listPage = set[4]
page = parse(baseUrl + listPage).getroot()
# Retrieve caord prefixes, numbers and urls.
cardAnchors = page.cssselect("table.sortable a")
missingCards = []
[IMAGE, PREFIX, NUMBER, NAME, TYPE, SUBTYPES, LORE, ATTRIBUTE,
LEVEL, ATK, DEF] = range(11)
for cIndex in range(card_index, len(cardAnchors)):
if set[1] in cardAnchors[cIndex].text_content():
card = [u"N/A" for value in range(11)]
card[IMAGE] = u"Unavailable"
card[PREFIX] = unicode(cardAnchors[cIndex].text_content().
split("-")[0].
strip())
card[NUMBER] = unicode(cardAnchors[cIndex].text_content().
split("-")[1].
strip("EN"))
card[NAME] = unicode(cardAnchors[cIndex+1].text_content()).
strip()
cardPage = cardAnchors[cIndex].get("href")
page = parse(baseUrl + cardPage).getroot()
rawData = []
for table in page.cssselect("table[border=1]"):
rawData = unicode(table.text_content()).split("
")
break
for index in range(len(rawData)):
rawData[index] = rawData[index].strip()
if rawData[index] == "Attribute:":
card[ATTRIBUTE] = rawData[index+1].strip()
elif rawData[index] == "Type:":
card[TYPE] = rawData[index+1].split(" Cards")[0].
strip()
card[SUBTYPES] = u"Normal"
elif rawData[index] == "Types:":
card[TYPE] = rawData[index+1].split("/")[0].strip()
for subtype in rawData[index+1].split("/")[1:]:
card[SUBTYPES] += "/%s" % str(subtype)
card[SUBTYPES] = card[SUBTYPES].split(u"N/A/")[1]
elif rawData[index] == "Property:":
card[SUBTYPES] = rawData[index+1].strip()
elif rawData[index] == "Level:":
card[LEVEL] = rawData[index+1].strip("Level ")
elif rawData[index] == "ATK/DEF:":
card[ATK] = rawData[index+1].split("/")[0].strip()
card[DEF] = rawData[index+1].split("/")[1].strip()
elif rawData[index] == "Lore(s):":
card[LORE] = rawData[index+1].split(u"vxa0u2022xa0d"
u"xa0u2022xa0e"
u"xa0")[1].
split("English:")[1].
split("TCG")[0].
strip()
if card[NAME] == u"N/A":
yield {"info": card, "status": "missing", "index": cIndex}
continue
for img in page.cssselect("td center a.image img"):
if "EN" in img.get("src") or "NA" in img.get("src"):
if withImages == 1:
setDir = "%s/images/%s" %(self.settingsDir.path(), set[1].lower())
if not exists(setDir):
makedirs(setDir)
image = "%s/%s-%s.%s" %(setDir, card[1].lower(),
card[2],
img.get("src")[-3:])
buffer = urlopen(img.get("src")).read()
open(image, "wb").write(buffer)
elif withImages == 2:
image = img.get("src")
card[IMAGE] = image
yield {"info": card, "status": "exists", "index": cIndex}
def fillTables(self, withImages=2):
""" Fills the database with data gathered from the wiki.
fiilTables optionally takes one option which determines how to save
images as follows:
0 - Don't save images at all.
1 - Save images locally
2 - Only load images on demand.
**TODO**
- look into returning lists to add in updating exising records.
- implement updating exisintg records (ie, for images)
- implement error handling to deal with 404, 405, etc errors.
- clean up layout of code, deciding on a consistant indentation
scheme.
"""
if exists(self.statePath):
state = pickle.load(open(self.statePath, "rb"))
versions = state[0]
missingSets = state[1]
missingCardLists = state[2]
missingCards = state[3]
indexes = state[4]
else:
versions = ["TCG", "OCG"]
missingSets = []
missingCardLists = []
missingCards = []
indexes = {"version": 0, "set": 0, "card": 0}
state = [versions, missingSets, missingCardLists,
missingCards, indexes]
self.conn.isolation_level = "EXCLUSIVE"
for index in range(indexes["version"], len(versions)):
for set in self.getSetInfo(versions[index], withImages, indexes["set"]):
indexes["set"] = set["index"]
if set["status"] == "complete":
for card in self.getCardInfo(set["info"], withImages,
indexes["card"]):
indexes["card"] = card["index"]
if card["status"] == "exists":
self.conn.execute("""INSERT OR IGNORE INTO types
VALUES (?, "Unavailable")""",
[card["info"][4]])
self.conn.execute("""INSERT OR IGNORE INTO subtypes
VALUES (?, "Unavailable")""",
[card["info"][5]])
self.conn.execute("""INSERT OR IGNORE INTO
attributes
VALUES (?, "Unavailable")""",
[card["info"][7]])
self.conn.execute("""INSERT OR IGNORE INTO levels
VALUES (?, "Unavailable")""",
[card["info"][8]])
self.conn.execute("""INSERT OR IGNORE INTO cards
VALUES (?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?)""", card["info"])
self.conn.commit()
print "%s-%s: %s added to database."
% (card["info"][1], card["info"][2],
card["info"][3])
elif card["status"] == "missing" and
card["info"] not in missingCards:
missingCards.append(card["info"])
pickle.dump(state, open(self.statePath, "wb"))
indexes["card"] = 0
del set["info"][4]
self.conn.execute("""INSERT OR IGNORE INTO versions
VALUES (?)""", [set["info"][3]])
self.conn.execute("""INSERT OR IGNORE INTO sets
VALUES (?, ?, ?, ?)""", set["info"])
self.conn.commit()
print "%s: %s added to database."
%(set["info"][3], set["info"][2])
elif set["status"] == "missing" and set["info"] not in missingSets:
missingSets.append(set["info"])
elif set["status"] == "missing card list" and
set["info"] not in missingCardLists:
missingCardLists.append(set["info"])
pickle.dump(state, open(self.statePath, "wb"))
indexes["set"] = 0
self.conn.isolation_level = "None"
if __name__ == "__main__":
app = QCoreApplication(sys.argv)
app.setOrganizationName("Project 339")
app.setOrganizationDomain("p339.tuxfamily.org")
app.setApplicationName("P339 Database")
Database().fillTables(1)
Revise this Paste