掌握以下内容你可以把你任意感兴趣的内容,整理发送到邮箱,每日观测
图片中我为了方便扩展分模块处理了,所以要注意一下
你只需要修改emial里面的邮箱部分,密码部分都改成你自己的
纯净版.py
import requests
from bs4 import BeautifulSoup
from pythonPackageOwn import email
url = 'https://www.zhihu.com/billboard?utm_id=0' # 替换成你要爬取的URL
response = requests.get(url)
# 这里假设html是你的接口返回的HTML内容
html = response.text
# 使用BeautifulSoup解析HTML
soup = BeautifulSoup(html, 'html.parser')
# 查找所有的<a>标签
a_tags = soup.find_all('a', class_='HotList-item')
# 获取50条,我们取前10条吧
# print(len(a_tags))
# 指定要保存的文件名(包括路径)
file_name = '知乎热榜.md' # 替换成你想要的文件名和路径
# 打开文件并写入Markdown内容,使用附加模式 'a'
# with open(file_name, 'a', encoding='utf-8') as md_file:
# for (index, a_tag) in enumerate(a_tags[:25]):
# title = a_tag.find('div', class_='HotList-itemTitle').text.strip()
# print(f"{index + 1}. {title}")
# everyNew = f"{index + 1}. {title}"
# # 写入Markdown内容
# md_file.write(everyNew + "\n")
# print(f"Markdown内容已成功追加到文件: {file_name}")
allNews = []
for (index, a_tag) in enumerate(a_tags[:25]):
title = a_tag.find('div', class_='HotList-itemTitle').text.strip()
print(f"{index + 1}. {title}")
everyNew = f"{index + 1}. {title}"
allNews.append(everyNew)
allNews = "<br>".join(item for item in allNews)
# print(allNews)
email.sendEmail(allNews)
pythonPackageOwn/email.py
## -*- coding: utf-8
import smtplib, ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header
def sendEmail(allContent):
# 邮箱服务器相关配置
smtp_server = 'smtp.163.com'
smtp_port = 465
# 发件人相关配置
mail_sender = '2@163.com' # 发件人电子邮箱
mail_sender_password = '' # 发件人邮箱密码(这个密码是自己开启邮箱服务给的鉴权密码)
# 创建SSL类的上下文对象
context = ssl.create_default_context()
# 建立到邮件服务器的加密连接
server = smtplib.SMTP_SSL(smtp_server, smtp_port)
server.login(mail_sender, mail_sender_password)
# 邮件内容
receiver_email = "24222@qq.com"
# 1.创建邮件对象
message = MIMEMultipart()
# 2.设置邮件主题
subject = Header('知乎热榜数据', 'utf-8').encode()
message['Subject'] = subject
# 3.设置发送者
message['From'] = mail_sender
# 4.设置接收者
message['To'] = receiver_email
# 5.设置抄送者
message['Cc'] = '120@qq.com'
# 6.添加文字内容
## 6.1 普通文字
text = MIMEText('这是我今天收集的你感兴趣的内容', 'plain', 'utf-8')
message.attach(text)
## 6.2 超文本内容
# 添加html内容
content = allContent
htmltext = MIMEText(content, 'html', 'utf-8')
message.attach(htmltext)
# 7.添加附件
# attachment = MIMEText(open('poycode.txt', 'rb').read(), 'base64', 'utf-8')
# attachment["Content-Disposition"] = 'attachment; filename="poycode.txt"'
# message.attach(attachment)
# 发送邮件
server.sendmail(mail_sender, receiver_email, message.as_string())
server.quit()
print("Email sent successfully.")
然后你再使用github的action服务,每日运行一次就可以实现
name: Crawl Zhihu and Append to Markdown
on:
schedule:
- cron: '0 0 * * *' # 每天 UTC 00:00 执行一次
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Check Out Repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Install Dependencies
run: |
pip install requests
pip install beautifulsoup4
- name: Run Crawler
run: python 纯净版.py # 替换成你的爬虫脚本的文件名