added rss generator
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
.env
|
.env
|
||||||
venv/
|
venv/
|
||||||
|
RSS
|
||||||
@@ -5,6 +5,8 @@ import re
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import logging
|
import logging
|
||||||
|
import rss_gen
|
||||||
|
from urlencode import urlencode
|
||||||
|
|
||||||
##########################################
|
##########################################
|
||||||
#Logging
|
#Logging
|
||||||
@@ -33,9 +35,6 @@ intents.messages = True
|
|||||||
intents.message_content = True
|
intents.message_content = True
|
||||||
bot = discord.Bot(intents=intents)
|
bot = discord.Bot(intents=intents)
|
||||||
|
|
||||||
def urlencode(inp: str):
|
|
||||||
return "".join([i if i in ["~", "-", "_", "."] or i.isalnum() else "%"+str(hex(ord(i))[2:].upper()) for i in inp])
|
|
||||||
|
|
||||||
def pixelget(url: str):
|
def pixelget(url: str):
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
@@ -138,6 +137,9 @@ async def on_message(message):
|
|||||||
os.remove(i)
|
os.remove(i)
|
||||||
# noqa: E501
|
# noqa: E501
|
||||||
logger.info(f"Posted link for {i}")
|
logger.info(f"Posted link for {i}")
|
||||||
|
df = rss_gen.create_df()
|
||||||
|
rss_gen.create_48h_latest(df)
|
||||||
|
rss_gen.create_25_recent_feed(df)
|
||||||
return
|
return
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -2,3 +2,4 @@ py-cord
|
|||||||
python-dotenv
|
python-dotenv
|
||||||
requests
|
requests
|
||||||
bs4
|
bs4
|
||||||
|
pandas
|
||||||
70
rss_gen.py
Normal file
70
rss_gen.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
import pandas as pd
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from urlencode import urlencode
|
||||||
|
|
||||||
|
XML_BEGIN_RECENT = """<?xml version="1.0" encoding="utf-8" ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Torfiles Last 25</title>
|
||||||
|
<link>https://torfiles.fieryeagle.org</link>
|
||||||
|
<description>Torfiles RSS Feed</description>
|
||||||
|
<language>en</language>
|
||||||
|
<ttl>5</ttl>
|
||||||
|
<atom:link href="http://rss.torfiles.fieryeagle.org" rel="self" type="application/rss+xml" />"""
|
||||||
|
|
||||||
|
XML_BEGIN_LAST = """<?xml version="1.0" encoding="utf-8" ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Torfiles Last 48h</title>
|
||||||
|
<link>https://torfiles.fieryeagle.org</link>
|
||||||
|
<description>Torfiles RSS Feed</description>
|
||||||
|
<language>en</language>
|
||||||
|
<ttl>5</ttl>
|
||||||
|
<atom:link href="http://rss48h.torfiles.fieryeagle.org" rel="self" type="application/rss+xml" />"""
|
||||||
|
|
||||||
|
XML_END = """</channel></rss>"""
|
||||||
|
|
||||||
|
def make_torrent_entry(name, timestamp, category):
|
||||||
|
if category:
|
||||||
|
category = "DODI Repack"
|
||||||
|
else:
|
||||||
|
category = "Non_Repack"
|
||||||
|
return f"""<item>
|
||||||
|
<title><![CDATA[{name}]]></title>
|
||||||
|
<pubDate>{timestamp} +0000</pubDate>
|
||||||
|
<category>{category}</category>
|
||||||
|
<link><![CDATA[{urlencode(name)}]]></link>
|
||||||
|
<description><![CDATA[Category: {category} → {timestamp} +0000]]></description>
|
||||||
|
</item>"""
|
||||||
|
|
||||||
|
def write_file(path, data):
|
||||||
|
if not os.path.exists("RSS"):
|
||||||
|
os.mkdir("RSS")
|
||||||
|
with open(os.path.realpath(path), "wt") as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
def create_25_recent_feed(df):
|
||||||
|
most_recent_25 = df.sort_values("timestamp", ascending=False).head(25)
|
||||||
|
latest_25_xml = "\n".join([make_torrent_entry(i[0], i[1], i[3]) for i in most_recent_25.values])
|
||||||
|
write_file("RSS/recent_25/index", XML_BEGIN_RECENT+latest_25_xml+XML_END)
|
||||||
|
|
||||||
|
def create_48h_latest(df):
|
||||||
|
last_48_hours = df[df['timestamp'] > datetime.now() - pd.Timedelta(days=2)]
|
||||||
|
last_48_xml = "\n".join([make_torrent_entry(i[0], i[1], i[3]) for i in last_48_hours.values])
|
||||||
|
write_file("RSS/latest_48/index", XML_BEGIN_LAST+last_48_xml+XML_END)
|
||||||
|
|
||||||
|
def create_df():
|
||||||
|
torfiles_scrape = requests.get("https://upload.fieryeagle.org/torfiles", auth=("",os.getenv("copyparty")))
|
||||||
|
|
||||||
|
soup = BeautifulSoup(torfiles_scrape.content, "html.parser")
|
||||||
|
torrent_files_names = [i.find_all("td")[1].text for i in soup.find_all("tbody")[0].find_all("tr")]
|
||||||
|
torrent_files_timestamps = [i.find_all("td")[7].text for i in soup.find_all("tbody")[0].find_all("tr")]
|
||||||
|
torrent_files_timestamps = [datetime.strptime(i, "%Y-%m-%d %H:%M:%S") for i in torrent_files_timestamps]
|
||||||
|
torrent_files_upload_ip = [i.find_all("td")[4].text for i in soup.find_all("tbody")[0].find_all("tr")]
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
"name": torrent_files_names,
|
||||||
|
"timestamp": torrent_files_timestamps,
|
||||||
|
"upload_ip": torrent_files_upload_ip
|
||||||
|
})
|
||||||
|
|
||||||
|
df['is_repack'] = df['name'].str.contains('DODI', case=False)
|
||||||
|
return df[df["upload_ip"] == "49.12.185.202"]
|
||||||
2
urlencode.py
Normal file
2
urlencode.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
def urlencode(inp: str):
|
||||||
|
return "".join([i if i in ["~", "-", "_", "."] or i.isalnum() else "%"+str(hex(ord(i))[2:].upper()) for i in inp])
|
||||||
Reference in New Issue
Block a user