Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python by dude ( 15 years ago )
#!/usr/bin/python
import os, sys
import urllib.request
import socket
import shutil
from zipfile import ZipFile, ZIP_STORED
import binascii
import tempfile
import threading
import queue
from queue import Empty
from queue import Queue
import time, math
import tkinter
from tkinter import *
from tkinter import tix
from tkinter.constants import *
import traceback, tkinter.messagebox
from itertools import count
TCL_DONT_WAIT = 1<<1
TCL_WINDOW_EVENTS = 1<<2
TCL_FILE_EVENTS = 1<<3
TCL_TIMER_EVENTS = 1<<4
TCL_IDLE_EVENTS = 1<<5
TCL_ALL_EVENTS = 0
try:
from lxml import etree
import lxml # for lxml.etree._Element classinfo
except ImportError:
sys.exit("The lxml library was not found. Check http://lxml.de/ \nExiting ...")
def parsem(*args, **kwargs):
""" a workhorse for parsing the html
Keyword arguments:
args[0] -- either an Element or Tree to parse through
args[1] -- the tag of desired element
args[2] -- the attribute of the desired tag
so a tag should look like:
<args[0] args[1]=args[2]...></args[0]>
Returns:
either the first element found,
a dictionary of elements and their tags,
or a dictionary of elements, their tags and their attributes.
"""
if isinstance(args[0], lxml.etree._Element) or isinstance(args[0], lxml.etree._ElementTree):
elementA=args[0]
elif not isinstance(args[0], lxml.etree._Element) or isinstance(args[0], lxml.etree._ElementTree):
raise TypeError("The first argument should be a <class 'lxml.etree._Element'> or <class 'lxml.etree._ElementTree'>, not a %s containing '%s'" % (args[0].__class__, args[0]))
if isinstance(args[1], str):
tagA="".join(("{http://www.w3.org/1999/xhtml}", args[1]))
elif not isinstance(args[1], str):
tagA=str("{http://www.w3.org/1999/xhtml}html")
#raise TypeError("The second argument should be a <class 'str'>, not a %s containing '%s'" % (args[1].__class__, args[1]))
try:
if isinstance(args[2], str):
AttribB=args[2]
elif not isinstance(args[2], str):
raise TypeError("The third argument should be a <class 'str'>, not a %s containing '%s'" % (args[2].__class__, args[2]))
except:
AttribB=None
elementBList=[]
elementBattribList=[]
kwargsList=[]
for eventB, elementB in etree.iterwalk(elementA, tag=tagA):
if kwargs: kwargsList.append(kwargs)
elementBList.append(elementB)
if AttribB:
elementBattrib = elementB.attrib
if (elementBattrib.get(AttribB)):
elementBattribList.append(elementBattrib[AttribB])
elif not (elementBattrib.get(AttribB)):
elementBattribList.append("pass")
if AttribB:
if kwargs:
return dict(zip(elementBList,zip(elementBattribList,zip(kwargsList))))
elif not kwargs:
return dict(zip(elementBList,zip(elementBattribList)))
elif not AttribB:
if kwargs:
return dict(zip(elementBList,zip(kwargsList)))
elif not kwargs:
return elementBList
def extractor(*args, **kwargs):
""" primary function for parsing the html
Keyword arguments:
args[0] -- the html's url
Returns:
the the next url,
the dictionary containning the split image's url,
and same url given
"""
URL=args[0]
grace=args[1]
reattempt=args[2]
parser = etree.XMLParser(resolve_entities=False, ns_clean=True, remove_blank_text=True, recover=True, attribute_defaults=True, dtd_validation=True, load_dtd=True)
# open url
#s_tree = etree.parse(urllib.request.urlretrieve(URL)[0], parser) # alternative for multipass
s_tree = etree.parse(urllib.request.urlopen(URL), parser)
NEXT_URL=()
body_ELEMENT=parsem(s_tree, "body")
p_ELEMENT=parsem(body_ELEMENT[0], "div", "id")
NEXT_URL=()
ImageDict={}
for key in p_ELEMENT.keys():
if (p_ELEMENT[key]) == ("p",):
for keya in (parsem(key, "a", "href")).keys():
if ((parsem(key, "a", "href"))[keya][0]) :
NEXT_URL = "".join(("http://mangastream.com",(parsem(key, "a", "href"))[keya][0]))
for keyb in (parsem(key, "img", "src")).keys():
if ((parsem(key, "img", "src"))[keyb][0]) :
ImageDict[(parsem(key, "img", "src"))[keyb][0]]=p_ELEMENT[key][0]
return NEXT_URL, ImageDict, URL
def zipdir(*args, **kwargs):
""" directory archiver
Keyword arguments:
args[0] -- the directory to zip
args[1] -- the archive's name
Returns:
None
"""
basedir = args[0]
archivename = args[1]
with ZipFile(archivename, "w", ZIP_STORED) as z:
for root, dirs, files in os.walk(basedir):
#NOTE: ignore empty directories
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path
z.write(absfn, zfn)
def meter(can, x, y, xx, yy, xxx):
if xxx==x: xxx+=6
def rounded_rectangle(can,x, y, xx, yy, fill):
can.create_rectangle(x+1, y, xx, yy, width=0, fill=fill)
can.create_line(x, y+1, x, yy-1, width=0, fill=fill)
can.create_line(xx, y+1, xx, yy-1, width=0, fill=fill)
#bar
rounded_rectangle(can,x,y,xx,yy,"#676767")
rounded_rectangle(can,x+1,y+1,xx-1,yy-1,"#BDBDBD")
rounded_rectangle(can,x+2,y+2,xx-2,yy-2,"#efefef")
rounded_rectangle(can,x+3,y+3,xx-3,yy-3,"#ffffff")
#fillings
rounded_rectangle(can,x+3,y+3,xxx-3,yy-3,"#91ec93")
rounded_rectangle(can,x+3,y+4,xxx-3,yy-4,"#79e47a")
rounded_rectangle(can,x+3,y+5,xxx-3,yy-5,"#31d234")
class GUI(threading.Thread):
""" gui tkinter classinfo
just a little gui class...
"""
def __init__(self):
""" thread init
defines some vars and starts stuff when the class is called (gui=GUI())
"""
self.root=tkinter.tix.Tk()
z = self.root.winfo_toplevel()
z.wm_title("ManagaStream.com Ripper")
if z.winfo_screenwidth() <= 800:
z.geometry("790x590+10+10")
else:
z.geometry("890x640+10+10")
frame1 = self.MkMainNotebook()
frame2 = self.MkMainStatus()
frame1.pack(side=TOP, expand=1, fill=BOTH, padx=4, pady=4)
frame2.pack(side=BOTTOM, fill=X)
z.wm_protocol("WM_DELETE_WINDOW", lambda self=self: self.stop())
threading.Thread.__init__(self)
def run(self):
""" thread start
kick starts the main loop when the thread start()
"""
self.root.mainloop()
def stop(self):
""" escape plan
Exits gui thread
"""
self._stop()
raise SystemExit
#self.root.quit()
#sys.exit("Bye")
#self.root.destroy
def MkMainStatus(self):
""" status bar
"""
top = self.root
w = tkinter.tix.Frame(top, relief=tkinter.tix.RAISED, bd=1)
self.status = tkinter.tix.Label(w, anchor=E, bd=1)
self.exitbutton = tkinter.Button(w, text="Exit GUI Thread", width=20, command=lambda self=self: self.stop())
self.exitbutton.grid(row=0, column=0, sticky=W, padx=3, pady=3)
self.status.grid(row=0, column=1, sticky=E, padx=3, pady=3)
self.print_log=queue.Queue()
self.multiqueue=queue.Queue()
return w
def listner_log(self):
""" listner
listner
"""
rate=0.05 # seconds to re-read queue; 0.5=half a second, 1=a full second
counter = count(0, rate)
def update_func():
self.secs= str(counter.__next__())
if not self.print_log.empty() :
self.status.config(text=str("%s(secs): Processing log queue..." % (self.secs.split("."))[0]), fg=str("red"))
a = tix.Label(self.LogFrame, text=(self.print_log.get_nowait()))
a.pack()
elif self.print_log.empty() :
self.status.config(text=str("%s(secs): Waiting for queue..." % (self.secs.split("."))[0]), fg=str("black"))
self.status.after(int(rate*1000), update_func)
update_func()
def listner_pro(self):
""" listner
listner
"""
self.textPane.delete(ALL) # clear canvas
self.barsPane.delete(ALL) # clear canvas
pre_bars={}
bars={}
rate=0.001
def update_func():
if not self.multiqueue.empty() :
multiqueue=self.multiqueue.get_nowait()
self.status.config(text=str("%s(secs): Processing progress queue..." % (self.secs.split("."))[0]), fg=str("red"))
try:
pagename=multiqueue[0]
imagename=multiqueue[1]
barsize=multiqueue[2]/2.2
fillbar=multiqueue[3]/2.2
pre_bars[pagename,imagename]=(barsize,fillbar)
bars=sorted(pre_bars.items())
pre_counter=-1
for item in bars:
pre_counter+=1
counter=pre_counter*40
imgcounter=pre_counter*400
pagename=(item[0])[0]
imagename=(item[0])[1]
barsize=int((item[1])[0])
fillbar=int((item[1])[1])
for item in (self.textPane.find_overlapping(20, counter, 1000, counter+37)):
self.textPane.delete(item) # deletes the item below - more efficient than .delete(ALL)
self.textPane.create_text(20, counter, anchor=NW, text=("Page %s:" % pagename))
self.textPane.create_text(20, counter+20, anchor=NW, text=("%s:" % imagename))
for item in (self.barsPane.find_overlapping(20, counter, 1000, counter+37)):
self.barsPane.delete(item) # deletes the item below - more efficient than .delete(ALL)
meter(self.barsPane,20,counter+17,barsize+20,counter+37,20+fillbar)
except:
self.status.after(int(rate*1000), update_func)
elif self.multiqueue.empty():
self.status.config(text=str("%s(secs): Waiting for queue..." % (self.secs.split("."))[0]), fg=str("black"))
self.status.after(int(rate*1000), update_func)
update_func()
def MkMainNotebook(self):
""" the tabs frame
defines the tabs
"""
top = self.root
w = tkinter.tix.NoteBook(top, ipadx=5, ipady=5, options="""
tagPadX 6
tagPadY 4
borderWidth 2
""")
# This may be required if there is no *Background option
top["bg"] = w["bg"]
# open log first and discards settings tab if arguments were passed
if sys.argv[1:]==[]:
w.add("set", label="Settings", underline=0)
w.add("pro", label="Progress", underline=0)
w.add("log", label="Log", underline=0)
self.MkLog(w, "log")
self.MkProgress(w, "pro")
self.MkSettings(w, "set")
elif sys.argv[1:]!=[]:
w.add("log", label="Log", underline=0)
w.add("pro", label="Progress", underline=0)
self.MkLog(w, "log")
self.MkProgress(w, "pro")
return w
def MkSettings(self, nb, name):
""" settings tab
"""
w = nb.page(name)
options="label.width %d label.anchor %s entry.width %d" % (10, tkinter.tix.W, 13)
settings_scr_win = tix.ScrolledWindow(w, width=400, height=400)
settings_scr_win.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill="both", expand=1)
self.SettingsFrame = settings_scr_win.window
def okidoki():
if int(threadsno.entry.get()) < 3: threadsno["value"]="4" # idiot test
if float(graceno.entry.get()) < 1: graceno["value"]=float("5.0") # idiot test
oki = threading.Thread(target=start_rip, args=([sys.argv[0].split("\\")[-1],address.entry.get(),zipname.entry.get()], self, int(threadsno.entry.get()), float(graceno.entry.get()), page_sel.cget("value"),int(reattemptno.entry.get()), int(timeoutno.entry.get())))
oki.start()
rip_frame = tkinter.tix.Frame(self.SettingsFrame,relief=RIDGE)
settings_lbl_frame = tkinter.tix.Frame(rip_frame)
settings_lbl = tkinter.Label(settings_lbl_frame, text="Rip:")
settings_lbl.pack(side=tkinter.tix.LEFT, padx=2, pady=2)
settings_lbl_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
confirm = tkinter.tix.Frame(rip_frame)
ok = tkinter.tix.Button(confirm, text="OK", width = 6)
cancel = tkinter.tix.Button(confirm, text="Cancel", width = 6)
ok.pack(side=tkinter.tix.TOP, padx=0, pady=1)
ok["command"] = lambda : okidoki()
cancel.pack(side=tkinter.tix.TOP, padx=0, pady=1)
cancel["command"] = lambda : self.stop()
confirm.pack(side=tkinter.tix.RIGHT, padx=0, pady=0, fill=tkinter.tix.BOTH)
address = tkinter.tix.LabelEntry(rip_frame, label="Address:", options=options)
address.entry.insert(0,"http://mangastream.com/read/naruto/24027713/1")
address.pack(side=tkinter.tix.TOP, padx=20, pady=1, fill=tkinter.tix.BOTH)
zipname = tkinter.tix.LabelEntry(rip_frame, label="Name:", options=options)
zipname.entry.insert(0,"Naruto_551_Kishimoto_Masashi")
zipname.pack(side=tkinter.tix.TOP, padx=20, pady=1, fill=tkinter.tix.BOTH)
rip_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
exp_frame = tkinter.tix.Frame(self.SettingsFrame,relief=RIDGE)
valuesA = tkinter.tix.Frame(exp_frame)
exp_lbl_frame = tkinter.tix.Frame(valuesA)
exp_lbl = tkinter.Label(exp_lbl_frame, text="Experimental:")
exp_lbl.pack(side=tkinter.tix.LEFT, padx=2, pady=2)
exp_lbl_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
threadsno = tkinter.tix.Control(valuesA, label="Threads:",value=8, options="label.anchor %s entry.width %d" % (tkinter.tix.W, 13))
graceno = tkinter.tix.Control(valuesA, label="Grace period:",value=2.0, options="label.anchor %s entry.width %d" % (tkinter.tix.W, 13))
threadsno.pack(side=tkinter.tix.LEFT, padx=20, pady=2)
graceno.pack(side=tkinter.tix.LEFT, padx=20, pady=2)
valuesA.pack(side=tkinter.tix.TOP, padx=0, pady=2, fill=tkinter.tix.X)
valuesB = tkinter.tix.Frame(exp_frame)
timeoutno = tkinter.tix.Control(valuesB, label="Timeout:",value=15, options="label.anchor %s entry.width %d" % (tkinter.tix.W, 13))
reattemptno = tkinter.tix.Control(valuesB, label="Reattempts:",value=8, options="label.anchor %s entry.width %d" % (tkinter.tix.W, 13))
timeoutno.pack(side=tkinter.tix.LEFT, padx=20, pady=2)
reattemptno.pack(side=tkinter.tix.LEFT, padx=20, pady=2)
page_frame = Frame(valuesB)
page_sel = tkinter.tix.Select(page_frame, label="Page: ", allowzero=0, radio=1,
orientation=tkinter.tix.HORIZONTAL,
labelside=tkinter.tix.LEFT,
options="label.anchor %s entry.width %d" % (tkinter.tix.W, 13))
page_sel.add("multi", text="Multi")
page_sel.add("single", text="Single")
page_sel.pack(side=tkinter.tix.LEFT, padx=0, pady=0, fill=tkinter.tix.X)
page_sel.invoke("multi") # default
page_frame.pack(side=tkinter.tix.LEFT, padx=20, pady=2)
valuesB.pack(side=tkinter.tix.TOP, padx=0, pady=2, fill=tkinter.tix.X)
exp_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
info_frame = tkinter.tix.Frame(self.SettingsFrame,relief=RIDGE)
infotitle_lbl_frame = tkinter.tix.Frame(info_frame)
infotitle_lbl = tkinter.Label(infotitle_lbl_frame, text="Info:")
infotitle_lbl.pack(side=tkinter.tix.LEFT, padx=2, pady=2)
infotitle_lbl_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
text="""
Threads: The number of images that are downloaded at the same time.\n
Grace period: The delay in seconds when file operations may hinder thread safety.\n
The slower the machine or connection, the higher the grace needs be.\n
Timeout: Number of seconds before a download-socket is dropped.\n
Reattempts: Number of attempts to restart a failed download.\n
Page: Multi\Single: Select whether to download only the page in the address box,\n
or to continue untill last page of release.\n
"""
info_lbl_frame = tkinter.tix.Frame(info_frame)
info_lbl = tkinter.Label(info_lbl_frame, text=text, justify=LEFT)
info_lbl.pack(side=tkinter.tix.LEFT, padx=0, pady=2)
info_lbl_frame.pack(side=tkinter.tix.TOP, padx=0, pady=2, fill=tkinter.tix.X)
info_frame.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill=tkinter.tix.X)
def MkProgress(self, nb, name):
""" progress tab
"""
w = nb.page(name)
options="label.width %d label.anchor %s entry.width %d" % (10, tkinter.tix.W, 13)
progress_scr_win = tix.ScrolledWindow(w, width=400, height=400)
progress_scr_win.pack(side=LEFT, padx=2, pady=2, fill=BOTH, expand=1)
pane = tkinter.tix.PanedWindow(progress_scr_win, orientation='horizontal')
p1 = pane.add('textPane', min=70, size=340)
p2 = pane.add('barsPane', min=70, size=245)
self.textPane = Canvas(p1, width=1, height=1)
self.barsPane = Canvas(p2, width=1, height=1)
self.textPane.config(scrollregion=(0, 0, 1000, 3000))
self.barsPane.config(scrollregion=(0, 0, 1000, 3000))
def update_widgets(*args):
self.textPane.yview(*args)
self.barsPane.yview(*args)
sbar = Scrollbar(w)
sbar.config(command=update_widgets)
self.textPane.config(yscrollcommand=sbar.set)
sbar.pack(side=RIGHT, fill=Y, padx=2, pady=2)
self.textPane.pack(expand=YES, fill=tkinter.tix.BOTH, padx=4, pady=6)
self.barsPane.pack(expand=YES, fill=tkinter.tix.BOTH, padx=4, pady=6)
pane.pack(side=LEFT, expand=YES, fill=BOTH)
self.textPane.create_text(20, 20, anchor=NW, text="Test progress bar:")
meter(self.barsPane,20,20,200,40,50)
def MkLog(self, nb, name):
""" log
a simple replacement to the console output.
feeds off the main process.
"""
w = nb.page(name)
options = "label.padX 4"
log_scr_win = tix.ScrolledWindow(w, width=400, height=400)
log_scr_win.pack(side=tkinter.tix.TOP, padx=2, pady=2, fill="both", expand=1)
self.LogFrame = log_scr_win.window
def start_rip(argv, gui, threads=8, grace=2.0, page="multi", reattempt=8, timeout=15):
""" ripper
the big bad function
"""
BASE_URL = []
EXTRACT_DIR = tempfile.mkdtemp()
BASE_URL.append(" ".join(argv[1:-1]))
ZIP_NAME = " ".join(argv[2:])
gui.listner_log()
gui.listner_pro()
gui.print_log.put("Running with %s threads, %s seconds grace period,\n %s seconds timeouts, %s no. of reattempts \nand in %s-page mode." % (threads, grace, timeout, reattempt, page))
def dlProgress(count, blockSize, totalSize):
try:
if (math.ceil(totalSize/blockSize) < int("1000")):
barsize = int(1000/math.ceil(totalSize/blockSize))*math.ceil(totalSize/blockSize)
elif (math.ceil(totalSize/blockSize) >= int("1000")):
barsize = int("1000")
fillbar = int(count/math.ceil(totalSize/blockSize)*barsize)
gui.multiqueue.put(((threading.current_thread().name).split(" ")[0], (threading.current_thread().name).split(" ")[1], barsize, fillbar), True) # the bar progress report
except:
print("multiqueue error")
def downloader(key, dstfile, reporthook, grace, reattempt):
itr=0
while itr<reattempt:
try:
urllib.request.urlretrieve(key, dstfile, reporthook)
break
except:
time.sleep(grace)
try:
urllib.request.urlretrieve(key, dstfile, reporthook)
print("%s times reattempted %s" % (itr, (threading.current_thread().name).split(" ")[0]))
break
except:
itr+=1
socket.setdefaulttimeout(timeout)
dummythreads=threading.active_count()
print ("starting with", dummythreads, "threads")
while (BASE_URL[0].split("/")[-1])!=("end"):
# open url
itr=0
while itr<reattempt:
try:
BASE_URL = extractor(BASE_URL[0], grace, reattempt)
break
except:
time.sleep(grace)
try:
BASE_URL = extractor(BASE_URL[0], grace, reattempt)
print("%s times reattempted %s" % (itr, BASE_URL[0]))
break
except:
itr+=1
#BASE_URL = extractor(BASE_URL[0], grace, reattempt)
gui.print_log.put("Ripping %s" % BASE_URL[2])
ImageDict = BASE_URL[1]
filename = BASE_URL[2].split("/")[-1]
try:
page_int = int(filename)
except:
try:
page_int = int(filename[4:])
except:
try:
page_int = (filename[4:])
except:
try:
page_int = (filename)
except ValueError as err:
sys.exit("%s \nExiting ..." % err)
page_padded = (str(page_int).zfill(4))
DownImageDict={}
for key in ImageDict.keys():
reporthook=dlProgress
dstfile = str("%s\\page%s.%s" % (EXTRACT_DIR, page_padded,key[-3:]))
download=threading.Thread(target=downloader, name=("%s %s" % (page_padded, (key.split("/"))[-1])), args=(key, dstfile, reporthook, grace, reattempt))
download.start()
while threading.active_count()>threads: time.sleep(grace)
if page=="single": break
while threading.active_count()>dummythreads:
print("waiting", grace, "seconds for threads(%s)" % threading.active_count(), "to reach", dummythreads, "threads.")
time.sleep(grace)
gui.print_log.put("Ripping Done.")
archivename = ZIP_NAME + ".cbz"
itr=0
while os.path.exists(archivename):
itr+=1
archivename = ZIP_NAME + "." + str(itr) + ".cbz"
zipdir(EXTRACT_DIR, archivename)
shutil.rmtree(EXTRACT_DIR)
gui.print_log.put("Zipping Done.")
gui.print_log.put("Done in %s seconds." % gui.secs.split(".")[0])
return None
def main(argv):
""" main function
Keyword arguments:
args[0] -- the url of the first page of the release
args[1] -- the name of the desired archive
Returns:
None
"""
#GUI
gui=GUI()
gui.start()
#while not gui.is_alive(): # better stick with a fixed number
time.sleep(3) # give the gui time to draw.
BASE_URL = []
BASE_URL.append(" ".join(argv[1:-1]))
ZIP_NAME = " ".join(argv[2:])
if BASE_URL != [""] or ZIP_NAME != "":
gui.print_log.put("Arguments were passed.")
start_rip(argv, gui)
gui.stop() # forces closing the gui thread for batch processing to continue.
elif BASE_URL == [""] or ZIP_NAME == "":
gui.print_log.put("No arguments were passed.")
return None
if __name__ == "__main__":
sys.exit(main(sys.argv))
Revise this Paste
Parent: 37421
Children: 37457