영어로 된 만화 다운로드하는 코드. 다운로드 후 꿀뷰 등으로 보는 것이 편해서... ㅋㅋ
from twill.commands import *
import re,urllib2,os
def WebLinkSave(url, savedfilename):
try:
f = urllib2.urlopen(url)
except:
print "Cannot save url: " + url
exit()
output = open(savedfilename,'wb')
binaryfile = f.read()
output.write(binaryfile)
output.close()
if __name__ == "__main__":
title=raw_input('input manga title: ')
datafolder = '.\\' + title + '\\'
url0 = 'http://www.mangafox.com/manga/'
url = url0 + title
go(url)
save_html('mangafox.htm')
f=open('mangafox.htm')
htmlsrc=f.read()
s=htmlsrc.split('\n')
f.close()
os.remove('mangafox.htm')
### create data folder to save pictures into
d=os.path.dirname(datafolder)
if not os.path.exists(d):
os.makedirs(d)
### find all lines having chapter url
s1=[]
key='''class="tips">'''
for ss in s:
if ss.find(key)>0:
s1.append(ss)
#print ss
s2=''.join(s1)
### extract all chapter urls
pa="http://[A-Za-z0-9-_./]+.html"
p=re.compile(pa)
chapters=p.findall(s2)
totalchapter = len(chapters)
print 'Total number of chapters is', totalchapter
ans = raw_input("Input start and end chapter number you want to save : ")
ans1 = ans.split()
startchapter = int(ans1[0])
endchapter = int(ans1[1])
for chapternum in range(startchapter, endchapter+1):
chapter=chapters[-chapternum]
pageheader=chapter[:-6]
print '\nChapter', chapternum
#print firstpage, '\n', pageheader
#quit()
s1=urllib2.urlopen(chapter).read().split('\n')
for ss in s1:
if ss.find('total_pages=')>0:
pos=ss.find('=')
totalpages=int(ss[pos+1:-2])
break
print 'total pages for this chapter is', totalpages
#quit()
for pagenum in range(1,totalpages+1):
# find picture link containing string
pageurl=pageheader+str(pagenum)+'.html'
s1=urllib2.urlopen(pageurl).read().split('\n')
for ss in s1:
if ss.find('<img src=')>0:
s2=ss
break
# find picture link
pa="http://[A-Za-z0-9-_./]+.jpg"
p=re.compile(pa)
m=p.findall(s2)
# save picture
filename=title + "-c%04d-%03d.jpg" % (chapternum,pagenum)
WebLinkSave(m[0],datafolder+filename)
print filename
quit()
'Coding > Python Matlab' 카테고리의 다른 글
파이썬 - multi-column data array (list) sorting (0) | 2012.02.24 |
---|---|
파이썬 - SciPy, NumPy (2) | 2012.02.23 |
파이썬 - wxPython (0) | 2012.01.29 |
파이썬 - lambda (0) | 2012.01.27 |
파이썬 - how to sort a Python dict (0) | 2012.01.26 |