目 录CONTENT

文章目录

python爬取图床壁纸

懿曲折扇情
2022-05-13 / 0 评论 / 8 点赞 / 313 阅读 / 363 字 / 正在检测是否收录...
温馨提示:
本文最后更新于 2022-05-15,若内容或图片失效,请留言反馈。部分素材来自网络,若不小心影响到您的利益,请联系我们删除。
广告 广告
# coding=utf-8
"""
    作者:gaojs
    功能:
    新增功能:
    日期:2022/5/13 20:14
"""
import os.path
import json
import pprint

import re

import requests
from faker import Factory


def get_random_ua():
    """
    获取随机UA
    """
    fact = Factory.create()
    ua = fact.user_agent()
    return ua


def get_photo():
    """
    爬取华灯上博客的图床信息
    """
    if not os.path.exists('photo/'):
        os.mkdir('photo/')
    random_ua = get_random_ua()
    # url = 'https://mengkai.fun:88/album/C7X'
    # 翻页操作
    for page in range(1, 10):
        url = f'https://mengkai.fun:88/album/C7X/?sort=date_desc&page={page}'
        headers = {
            'user-agent': random_ua
        }
        rsp = requests.get(url=url, headers=headers, timeout=50)
        # print(rsp.text)
        # 获取图片标题
        photo_name = re.findall('<a href=".*?" class="list-item-desc-title-link" data-text="image-title" data-content="image-link">(.*?)</a>', rsp.text)
        photo_list = list(photo_name)
        for photo in photo_list:
            photo_title = photo
            # print(photo_title)

        # 获取图片url
        url_title = re.findall('<img src="(.*?)" alt="(.*?)" width="500" height="281">', rsp.text)
        list_url_title = list(url_title)
        # print(list_url_title)
        for index in list_url_title:
            new_url = index[0]
            url_rfind = new_url[:new_url.rfind('/')]
            end_url = index[1]
            new_url_content = url_rfind + '/' + end_url
            print(new_url_content)
            # print(new_url)
            # 下载图床图片到本地
            content_photo = requests.get(new_url_content, headers=headers, timeout=50).content
            with open('photo/' + end_url, mode='wb') as f:
                print(f'******************************正在下载 {end_url} ****************************')
                f.write(content_photo)


def get_page():
    """
    翻页操作
    """
    pass


get_photo()


8

评论区