您的位置 : 资讯 > 软件教程 > python实现爬虫下载漫画示例

python实现爬虫下载漫画示例

来源:菜鸟下载 | 更新时间:2025-04-29

代码如下: ! usr bin python3 2import os,socketimport urllibimport urllib request,threading,timeimport re,sysglobal manhuaweb

代码如下:

#!/usr/bin/python3.2import os,socketimport urllibimport urllib.request,threading,timeimport re,sysglobal manhuaweb,weburl,floder,chapterbegin,currentthreadnum,threadcount,mutex,mutex2

weburl=''floder=''chapterbegin=0currentthreadnum=0threadcount=6

if len(sys.argv)>=3:  weburl=sys.argv[1]  floder=sys.argv[2]else:    print("usag: downloadmanhua weburl floder chapterbegin=0 threadnnum=6")     sys.exit(0)if len(sys.argv)>=4:  chapterbegin=int(sys.argv[3])if len(sys.argv)>=5:  threadcount=(int)(sys.argv[4])

 

def jin(i,jinzhi):        finalans=""        answer=i%jinzhi        i=int(i/jinzhi)        if answer>9:                finalans=finalans+chr(ord('a')+(answer-10))        else:                finalans=finalans+str(answer)        if i!=0:                finalans=jin(i,jinzhi)+finalans        return finalansdef urlparse(p,a,c,k):        d={}        e=lambda c:     jin(c,36)        if 1:                while c:                        c=c-1                        if not k[c]:                                d[jin(c,36)]=jin(c,36)                        else:                                d[jin(c,36)]=k[c]                k=[lambda e:d[e]]                e=lambda c:'\w+'                c=1        newstr=""        while c:                c=c-1                if k[c]:                        for i in range(0,len(p)):                                tempi=p[i]                                tempi=ord(tempi)                                if tempi>=ord('a') and tempi                                        newstr+=d[chr(tempi)]                                elif tempi>=ord('0') and tempi                                        newstr+=d[chr(tempi)]                                else:                                        newstr+=chr(tempi)        return newstrdef meispower(s):        p=re.compile(r"(?=}().*",re.IGNORECASE)        s=p.findall(s)        s=s[0]        s=s[0:(len(s)-19)]        par=s.split(',')        par[3]=par[3][1:len(par[3])]        answer=par[3].split('|')        chapterpath=urlparse(par[0],int(par[1]),int(par[2]),answer)        allurl=re.findall('imgpath=[^;]*',chapterpath)[0]        allurl=allurl[10:(len(allurl)-2)]        return allurldef pictofile(weburl,filename,loop=100):        if loop                print('can't download the picture %s'%weburl)                return        loop=loop-1        if os.path.exists(filename):            return        try:                url=urllib.request.urlopen(weburl)                data=url.read()                if len(data)                        url.close()                        pictofile(weburl,filename,loop)                else:                        print('download from %s name is %s'%(weburl,filename))                        myfile=open('%s'%filename,'wb')                        myfile.write(data)                        myfile.close()                        url.close();        except socket.timeout:                print('timeout')                pictofile(weburl,filename,loop)        except Exception as e:          print('error',e)          pictofile(weburl,filename,loop)        finally:            passdef downloadpic(url,loadpicdir,num):    #download the all url picture to loadpicdir    global currentthreadnum,mutex,mutex2    mymode=re.compile(r'[0-9a-z.]*Z')    try:                mutex2.acquire()                os.chdir(loadpicdir)                mutex2.release()    except:                print("can't open the floder %s will be create"%loadpicdir)                try:                    if(mutex2.locked()):                        os.mkdir(loadpicdir)                        os.chdir(loadpicdir)                        mutex2.release()                    print('create floder succeed')                except:                    print("can't create floder %s"%loadpicdir)                    if(mutex.acquire()):                        mutex.release()                    quit(0)    name=mymode.findall(url)    filename='manhua'+name[0]    pictofile(url,loadpicdir+'//'+str(num)+'-'+filename)    mutex.acquire()    currentthreadnum=currentthreadnum-1    mutex.release()def downloadchapter(url,loadpicdir,num,begin=0):        global manhuaweb,threadcount,currentthreadnum,mutex        print(manhuaweb+url)        webdata=urllib.request.urlopen(manhuaweb+url).read()        webdata=webdata.decode('UTF-8')        chaptername=re.findall(r'

[^_]*',webdata)[0]        chaptername=chaptername[7:len(chaptername)]        webscrip=re.findall(r'eval.*[^]',webdata)        chapterurl=meispower(webscrip[0]);        chapterurl='http://mhimg.ali213.net'+chapterurl        for i in range(begin,num):                try:                        while(currentthreadnum>=threadcount):                                time.sleep(0.5)                        mutex.acquire()                        currentthreadnum=currentthreadnum+1                        mutex.release()                        threading.Thread(target=downloadpic,args=(r'%s%d.jpg'%(chapterurl,i),loadpicdir+chaptername,num)).start()                except socket.error:                        mutex.acquire()                        i=i-1                        currentthreadnum=currentthreadnum-1                        mutex.release()                except Exception as error:                        print(error,'break')                        print('download chapter %d of picture make a error'%i)                        breakif __name__=='__main__':        manhuaweb=r'http://manhua.ali213.net'        socket.setdefaulttimeout(60.0)        mutex=threading.Lock()        mutex2=threading.Lock()<p>                webfile=urllib.request.urlopen(weburl)        webdata=webfile.read();        webdata=webdata.decode('UTF-8')        meshmode=re.compile(r'</p>.*')        meshdata=meshmode.findall(webdata)[0]        indexmode=re.compile(r'([0-9]*页)')        indexdata=indexmode.findall(meshdata)<p>        picurlmode=re.compile(r'/comic/[0-9/]*.html')        picurldata=picurlmode.findall(meshdata)</p><p>        chapterlength=len(picurldata)        nummode=re.compile(r'[d]+')</p><p>        i=chapterbegin        while i<chapterlength:>                manhuachapter=picurldata[chapterlength-i-1]                downloadchapter(manhuachapter,floder,int(nummode.findall(indexdata[chapterlength-i-1])[0]))                i=i+1</chapterlength:></p>

菜鸟下载发布此文仅为传递信息,不代表菜鸟下载认同其观点或证实其描述。

展开

相关文章

更多>>

热门游戏

更多>>

手机扫描此二维码,

在手机上查看此页面

关于本站 下载帮助 版权声明 网站地图

版权投诉请发邮件到 cn486com#outlook.com (把#改成@),我们会尽快处理

Copyright © 2019-2020 菜鸟下载(www.cn486.com).All Reserved | 备案号:湘ICP备2022003375号-1

本站资源均收集整理于互联网,其著作权归原作者所有,如有侵犯你的版权,请来信告知,我们将及时下架删除相应资源