pyquery库使用

warning: 这篇文章距离上次修改已过723天,其中的内容可能已经有所变动。
import csv
from pyquery import PyQuery as pq
import requests
import json

def Get_HTML(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print("Error!")

def parse_page(html):
    doc=pq(html)
    title=doc('.li03 a').text().split(' ')  #名称
    actor=doc('.li04 span').text().split(' ')  #演员表
    index=doc('.li01 b').text().split(' ')     #序号
    href=[]  #链接
    for i in doc('.li03 a').items():  #遍历
        href.append(i.attr('href'))
    score=doc('.li05 span').text().split(' ')  #评分
#保存成CSV文件
    # with open('1905电影排行榜.csv','a+') as csvfile:
    #     writer = csv.writer(csvfile,)
    #     writer.writerow(['序号', '名称', '链接', '演员', '评分'])
    #     for i in range(len(title)):
    #         writer.writerow([index[i],title[i],href[i],actor[i],score[i]])
#保存为字典
    Result={}
    for i in range(100):
        result={
            '序号':index[i],
            '名称':title[i],
            '链接':href[i],
            '演员':actor[i],
            '评分':score[i]
        }
        Result[index[i]]=result
    return Result
def write_to_txtfile(item):
    with open('1905电影排行榜.txt', 'a+', encoding='UTF-8') as f:
        f.write(json.dumps(item, ensure_ascii=False) + '\n')
def main():
    url = 'https://www.1905.com/vod/top/lst/'
    html = Get_HTML(url)
    item=parse_page(html)
#字典写成TXT文件
    # for i in range(1,len(item)+1):
        # write_to_txtfile(item[str(i)])
#字典写成CSV文件
    # with open('1905电影排行榜_111.csv','a+',encoding='utf-8') as csvfile:
    #     fieldnames = ['序号', '名称', '链接', '演员', '评分']
    #     writer=csv.DictWriter(csvfile,fieldnames=fieldnames)
    #     writer.writeheader()
    #     for i in range(len(item)):
    #         writer.writerow(item[str(i+1)])

main()

添加新评论