Files
upload-bot/rss_gen.py

71 lines
3.2 KiB
Python

import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import argparse
import os
from urlencode import urlencode
XML_BEGIN_RECENT = """<?xml version="1.0" encoding="utf-8" ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Torfiles Last 25</title>
<link>https://torfiles.fieryeagle.org</link>
<description>Torfiles RSS Feed</description>
<language>en</language>
<ttl>5</ttl>
<atom:link href="https://rss.torfiles.fieryeagle.org/recent_25.xml" rel="self" type="application/rss+xml" />"""
XML_BEGIN_LAST = """<?xml version="1.0" encoding="utf-8" ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Torfiles Last 48h</title>
<link>https://torfiles.fieryeagle.org</link>
<description>Torfiles RSS Feed</description>
<language>en</language>
<ttl>5</ttl>
<atom:link href="https://rss.torfiles.fieryeagle.org/latest_48h.xml" rel="self" type="application/rss+xml" />"""
XML_END = """</channel></rss>"""
copyparty_token = os.getenv('COPYPARTY_TOKEN')
def make_torrent_entry(name, timestamp, category):
if category:
category = "DODI_Repack"
else:
category = "Non_Repack"
return f"""<item>
<title><![CDATA[{name}]]></title>
<pubDate>{timestamp} +0000</pubDate>
<category>{category}</category>
<link><![CDATA[https://torfiles.fieryeagle.org/{urlencode(name)}]]></link>
<description><![CDATA[Category: {category}{timestamp} +0000]]></description>
</item>"""
def write_file(path, data):
if not os.path.exists("RSS"):
os.mkdir("RSS")
with open(os.path.realpath(path), "wt") as f:
f.write(data)
def create_25_recent_feed(df):
most_recent_25 = df.sort_values("timestamp", ascending=False).head(25)
latest_25_xml = "\n".join([make_torrent_entry(i[0], i[1], i[3]) for i in most_recent_25.values])
write_file("RSS/recent_25.xml", XML_BEGIN_RECENT+latest_25_xml+XML_END)
def create_48h_latest(df):
last_48_hours = df[df['timestamp'] > datetime.now() - pd.Timedelta(days=2)]
last_48_xml = "\n".join([make_torrent_entry(i[0], i[1], i[3]) for i in last_48_hours.values])
write_file("RSS/latest_48h.xml", XML_BEGIN_LAST+last_48_xml+XML_END)
def create_df():
torfiles_scrape = requests.get("https://upload.fieryeagle.org/torfiles", auth=("torbot",copyparty_token))
soup = BeautifulSoup(torfiles_scrape.content, "html.parser")
torrent_files_names = [i.find_all("td")[1].text for i in soup.find_all("tbody")[0].find_all("tr")]
torrent_files_timestamps = [i.find_all("td")[7].text for i in soup.find_all("tbody")[0].find_all("tr")]
torrent_files_timestamps = [datetime.strptime(i, "%Y-%m-%d %H:%M:%S") for i in torrent_files_timestamps]
torrent_files_upload_ip = [i.find_all("td")[4].text for i in soup.find_all("tbody")[0].find_all("tr")]
df = pd.DataFrame({
"name": torrent_files_names,
"timestamp": torrent_files_timestamps,
"upload_ip": torrent_files_upload_ip
})
df['is_repack'] = df['name'].str.contains('DODI', case=False)
return df[df["upload_ip"] == "49.12.185.202"]