Pythonで、スクレイピングへの道(その2)

(その1)で、更新が分かったら、メール通知する

実行コマンド
 python scraping-header_email.py https://www.aitake.net/ ./diffFileName

# Program Start
# URL UpdateCheck Program
# Execute: scraping-header.py {urltxt} {difffilename}

# Import Files
import sys
import requests 
import bs4 
from datetime import datetime
import smtplib
from email.mime.text import MIMEText

def EmailUpdate(urltxt):
	# Send Config
     to_email = "info@heroheo.co.jp"
     from_email = "dokoka@aaaa.co.jp"
     smtp_server = '172.16.0.1'
     smtp_port = 25
	 
	# MIMEText Create
	message = "WebSite Update:" + urltxt
	msg = MIMEText(message, "html")
	msg["Subject"] = "WebSite Update"
	msg["To"] = to_email
	msg["From"] = from_email
	
	# SMTP Server Name
	server = smtplib.SMTP(smtp_server, smtp_port)
	# Email Send Execute
	server.send_message(msg)
	# Email Close
	server.quit()
	print('Email sent.')

	return ()

def ScrapingHeader(urltxt,diff_file_path):
	# Config Texts
	url = requests.head(urltxt)
	#url = requests.head('http://www.aitake.net/test.html')	#URL
	#diff_file_path = './urlUpdateCheck-1.txt'								#Update Check FilePass

	# URL Read & html_timestamp
	html_timestamp = datetime.strptime(url.headers['Last-Modified'], "%a, %d %b %Y %H:%M:%S GMT")
	print(html_timestamp)

	# Before RUN&Save File Read
	diff_file = open(diff_file_path)
	past_updateinfo = diff_file.read()
	print(past_updateinfo)

	# Check Update
	if(str(html_timestamp) == str(past_updateinfo)):
#Strings Check
	    print("Not Update")
	# UpdateFile Write CheckDate
	else:
	    diff_file = open(diff_file_path, 'w')
	    diff_file.writelines(str(html_timestamp))
#Strings Update
	    diff_file.close()
	    EmailUpdate(urltxt)
	    print("Update")
	return ()

if __name__ == '__main__':
	args = sys.argv
	if 2 <= len(args):
		ScrapingHeader(args[1],args[2])
	else:
		print('Arguments are too short, Need url & diff_filename ')
# Program End
タイトルとURLをコピーしました