爬取数据保存到MySQL

warning: 这篇文章距离上次修改已过765天,其中的内容可能已经有所变动。
import pymysql
from pyquery import PyQuery as pq
import requests

def Get_HTML(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print("Error!")
def parse_page(html):
    doc=pq(html)
    title=doc('.li03 a').text().split(' ')  #名称
    actor=doc('.li04 span').text().split(' ')  #演员表
    index=doc('.li01 b').text().split(' ')     #序号
    href=[]  #链接
    for i in doc('.li03 a').items():  #遍历
        href.append(i.attr('href'))
    score=doc('.li05 span').text().split(' ')  #评分
#保存为字典
    Result={}
    for i in range(100):
        result={
            '序号':index[i],
            '名称':title[i],
            '链接':href[i],
            '演员':actor[i],
            '评分':score[i]
        }
        # print(result)
        Result[index[i]]=result
    return Result


def write_to_mysql(dict):
    '''
    字典数据写到mysql
    :param dict:
    :return: Null
    '''
    db=pymysql.connect(host='******',user='root',password='******',port=3306,db='xiaomishop')
    cursor=db.cursor()
    table='movie'
    keys=','.join(dict.keys())
    values=','.join(['%s']*len(dict))
    sql='insert into {table}({keys}) values ({values})'.format(table=table,keys=keys,values=values)
    try:
        if cursor.execute(sql,tuple(dict.values())):
            print("successful")
            db.commit()
    except:
        print("failed")
        db.rollback()
    db.close()


def update_data(dict):
    '''
    若主键存在,进行更新操作;否则进行插入数据
    :param dict:
    :return:
    '''
    db = pymysql.connect(host='******', user='root', password='******', passwd='3306', db='xiaomishop')
    cursor=db.cursor()
    table='movie'    #表名
    keys=','.join(dict.keys())
    values=','.join(['%s']*len(dict))
    sql='insert into {table}({keys}) values ({values}) ON DUPLICATE KEY UPDATE'.format(table=table,keys=keys,values=values)
    update=','.join([' {key}=%s'.format(key=key) for key in dict])
    sql+=update
    try:
        if cursor.execute(sql,tuple(dict.values())*2):
            print("successful")
            db.commit()
    except:
        print("failed")
        db.rollback()
    db.close()

#查询
def search():
    db = pymysql.connect(host='127.0.0.1', user='root', password='581581', passwd='3306', db='xiaomishop')
    cursor = db.cursor()
    sql = 'select * from movie'
    try:
        cursor.execute(sql)
        print(cursor.rowcount)
        row = cursor.fetchone()
        while row:
            print('Row:', row)
            row = cursor.fetchone()
    except:
        print("Error")
    db.close()

def main():
    url = 'https://www.1905.com/vod/top/lst/'
    html = Get_HTML(url)
    item=parse_page(html)
    # for i in range(len(item)):
    #     # write_to_mysql(item[str(i+1)])   #写入
    #     update_data(item[str(i+1)])       #更新
    search()                    #查询
main()
none
最后修改于:2022年11月18日 21:12

添加新评论