telegram: Improve web media handling
patch 3a9740db19a230cfa53758a4d89dbe82ba024ff5
Author: E. Bosch <presidev@AT@gmail.com>
Date: Sat Feb 19 02:10:00 CET 2022
* telegram: Improve web media handling
diff -rN -u old-irgramd/telegram.py new-irgramd/telegram.py
--- old-irgramd/telegram.py 2024-11-22 15:59:07.154942421 +0100
+++ new-irgramd/telegram.py 2024-11-22 15:59:07.154942421 +0100
@@ -11,7 +11,7 @@
from include import CHAN_MAX_LENGHT, NICK_MAX_LENGTH
from irc import IRCUser
-from utils import sanitize_filename, remove_slash, remove_http_s, get_human_size, get_human_duration
+from utils import sanitize_filename, is_url_equiv, extract_url, get_human_size, get_human_duration
# Telegram
@@ -410,12 +410,10 @@
async def handle_webpage(self, webpage, message):
media_type = 'web'
logo = await self.download_telegram_media(message)
- if webpage.url != webpage.display_url \
- and remove_slash(webpage.url) != webpage.display_url \
- and remove_http_s(webpage.url) != webpage.display_url:
- media_url_or_data = '{} | {}'.format(webpage.url, webpage.display_url)
+ if is_url_equiv(webpage.url, webpage.display_url):
+ url_data = webpage.url
else:
- media_url_or_data = webpage.url
+ url_data = '{} | {}'.format(webpage.url, webpage.display_url)
if message:
# sometimes the 1st line of message contains the title, don't repeat it
message_line = message.message.splitlines()[0]
@@ -423,8 +421,18 @@
title = webpage.title
else:
title = ''
+ # extract the URL in the message, don't repeat it
+ message_url = extract_url(message.message)
+ if is_url_equiv(message_url, webpage.url):
+ if is_url_equiv(message_url, webpage.display_url):
+ media_url_or_data = message.message
+ else:
+ media_url_or_data = '{} | {}'.format(message.message, webpage.display_url)
+ else:
+ media_url_or_data = '{} | {}'.format(message.message, url_data)
else:
title = webpage.title
+ media_url_or_data = url_data
if title and logo:
caption = ' | {} | {}'.format(title, logo)
diff -rN -u old-irgramd/utils.py new-irgramd/utils.py
--- old-irgramd/utils.py 2024-11-22 15:59:07.154942421 +0100
+++ new-irgramd/utils.py 2024-11-22 15:59:07.158942415 +0100
@@ -6,6 +6,7 @@
# Constants
FILENAME_INVALID_CHARS = re.compile('[/{}<>()"\'\\|&]')
+SIMPLE_URL = re.compile('http(|s)://[^ ]+')
# Utilities
@@ -62,6 +63,16 @@
surl = url
return remove_slash(surl)
+def is_url_equiv(url1, url2):
+ if url1 and url2:
+ return url1 == url2 or remove_slash(remove_http_s(url1)) == remove_slash(remove_http_s(url2))
+ else:
+ return False
+
+def extract_url(text):
+ url = SIMPLE_URL.search(text)
+ return url.group() if url else None
+
def get_human_size(size):
human_units = ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')