pg/lib/getsearchtxt.py

#!/usr/bin/env python3

import sys
import re
import json
import requests
import time
import traceback
import gzip

p=re.compile(r'.*/s/(.*)')
skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
reqcount=1
sharedict=set()

def getlist(w,shareid, fileid,morepage):
    global p
    global skipp
    global reqcount
    global sharedict

    reqcount += 1
    if reqcount % 5 == 0:
        print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
        #time.sleep(1)
    url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
    print(f"url: {url}",file=sys.stderr)
    resp = requests.get(url)
    content = resp.content.decode('utf-8')
    lines = content.split("\n")
    if "folder" not in content and len(lines)<=4:
        return
    isfirst=True
    for line in lines:
        if isfirst:
            isfirst=False
            print(f"first line:{line}",file=sys.stderr)
        if skipp.match(line):
            continue
        linearr = line.split('\t')
        if len(linearr)>2:
            m = p.match(linearr[0])
            if m:
                arr = m.group(1).split("/")
            else:
                arr = linearr[0].split("/")
            shareid=arr[0]
            fileid=arr[1] if len(arr)>1 else ""
            if shareid+"/"+fileid in sharedict:
                print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
                continue
            w.write(line+"\n")
            w.flush()
            if linearr[2] == "folder":
                getlist(w,shareid,fileid,False)

    if len(lines)>0:
        getlist(w,shareid,fileid,True)

def main():
    try:
        f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
        if f is not None:
            print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
            with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
                while(True):
                    lines = f.readlines()
                    if len(lines)<=0:
                        break
                    for line in lines:
                        line=line.strip()
                        w.write(line+"\n")
            f.seek(0)
    except:
        traceback.print_exc()
        try:
            f = open(sys.argv[1]+".raw","r",encoding="utf-8")
        except:
            f = None
    if f is not None:
        print("found old raw file")
        while True:
            lines = f.readlines()
            if len(lines)<=0:
                break
            for line in lines:
                linearr = line.split("\t")
                m = p.match(linearr[0])
                if m:
                    arr = m.group(1).split("/")
                else:
                    arr = linearr[0].split("/")
                if len(arr)>1:
                    shareid = arr[0]
                    fileid = arr[1]
                    sharedict.add(shareid+"/"+fileid)
        f.close()
        print(f"old raw file record:{len(sharedict)}")
    else:
        print("no old raw file")
    with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
        with(open(sys.argv[1],"r",encoding="utf-8")) as f:
            j = json.load(f)
            for c in j:
                shareid=c.get("type_id")
                fileid=""
                m = p.match(shareid)
                if m:
                    arr = m.group(1).split("/")
                else:
                    arr = shareid.split("/")
                shareid=arr[0]
                fileid=arr[1] if len(arr)>1 else ""
                if shareid+"/"+fileid in sharedict:
                    continue
                getlist(w,shareid,fileid,False)

main()
上传文件至 lib 2024-10-24 10:45:53 +03:00			`#!/usr/bin/env python3`

			`import sys`
			`import re`
			`import json`
			`import requests`
			`import time`
			`import traceback`
			`import gzip`

			`p=re.compile(r'./s/(.)')`
			`skipp = re.compile(r'.(cover\|screen\|频道).',re.IGNORECASE)`
			`reqcount=1`
			`sharedict=set()`

			`def getlist(w,shareid, fileid,morepage):`
			`global p`
			`global skipp`
			`global reqcount`
			`global sharedict`

			`reqcount += 1`
			`if reqcount % 5 == 0:`
			`print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)`
			`#time.sleep(1)`
			`url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'`
			`print(f"url: {url}",file=sys.stderr)`
			`resp = requests.get(url)`
			`content = resp.content.decode('utf-8')`
			`lines = content.split("\n")`
			`if "folder" not in content and len(lines)<=4:`
			`return`
			`isfirst=True`
			`for line in lines:`
			`if isfirst:`
			`isfirst=False`
			`print(f"first line:{line}",file=sys.stderr)`
			`if skipp.match(line):`
			`continue`
			`linearr = line.split('\t')`
			`if len(linearr)>2:`
			`m = p.match(linearr[0])`
			`if m:`
			`arr = m.group(1).split("/")`
			`else:`
			`arr = linearr[0].split("/")`
			`shareid=arr[0]`
			`fileid=arr[1] if len(arr)>1 else ""`
			`if shareid+"/"+fileid in sharedict:`
			`print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)`
			`continue`
			`w.write(line+"\n")`
			`w.flush()`
			`if linearr[2] == "folder":`
			`getlist(w,shareid,fileid,False)`

			`if len(lines)>0:`
			`getlist(w,shareid,fileid,True)`

			`def main():`
			`try:`
			`f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")`
			`if f is not None:`
			`print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)`
			`with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:`
			`while(True):`
			`lines = f.readlines()`
			`if len(lines)<=0:`
			`break`
			`for line in lines:`
			`line=line.strip()`
			`w.write(line+"\n")`
			`f.seek(0)`
			`except:`
			`traceback.print_exc()`
			`try:`
			`f = open(sys.argv[1]+".raw","r",encoding="utf-8")`
			`except:`
			`f = None`
			`if f is not None:`
			`print("found old raw file")`
			`while True:`
			`lines = f.readlines()`
			`if len(lines)<=0:`
			`break`
			`for line in lines:`
			`linearr = line.split("\t")`
			`m = p.match(linearr[0])`
			`if m:`
			`arr = m.group(1).split("/")`
			`else:`
			`arr = linearr[0].split("/")`
			`if len(arr)>1:`
			`shareid = arr[0]`
			`fileid = arr[1]`
			`sharedict.add(shareid+"/"+fileid)`
			`f.close()`
			`print(f"old raw file record:{len(sharedict)}")`
			`else:`
			`print("no old raw file")`
			`with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:`
			`with(open(sys.argv[1],"r",encoding="utf-8")) as f:`
			`j = json.load(f)`
			`for c in j:`
			`shareid=c.get("type_id")`
			`fileid=""`
			`m = p.match(shareid)`
			`if m:`
			`arr = m.group(1).split("/")`
			`else:`
			`arr = shareid.split("/")`
			`shareid=arr[0]`
			`fileid=arr[1] if len(arr)>1 else ""`
			`if shareid+"/"+fileid in sharedict:`
			`continue`
			`getlist(w,shareid,fileid,False)`

			`main()`