patch 3a9740db19a230cfa53758a4d89dbe82ba024ff5 Author: E. Bosch Date: Sat Feb 19 02:10:00 CET 2022 * telegram: Improve web media handling diff -rN -u old-irgramd/telegram.py new-irgramd/telegram.py --- old-irgramd/telegram.py 2024-11-22 21:26:24.435762852 +0100 +++ new-irgramd/telegram.py 2024-11-22 21:26:24.439762845 +0100 @@ -11,7 +11,7 @@ from include import CHAN_MAX_LENGHT, NICK_MAX_LENGTH from irc import IRCUser -from utils import sanitize_filename, remove_slash, remove_http_s, get_human_size, get_human_duration +from utils import sanitize_filename, is_url_equiv, extract_url, get_human_size, get_human_duration # Telegram @@ -410,12 +410,10 @@ async def handle_webpage(self, webpage, message): media_type = 'web' logo = await self.download_telegram_media(message) - if webpage.url != webpage.display_url \ - and remove_slash(webpage.url) != webpage.display_url \ - and remove_http_s(webpage.url) != webpage.display_url: - media_url_or_data = '{} | {}'.format(webpage.url, webpage.display_url) + if is_url_equiv(webpage.url, webpage.display_url): + url_data = webpage.url else: - media_url_or_data = webpage.url + url_data = '{} | {}'.format(webpage.url, webpage.display_url) if message: # sometimes the 1st line of message contains the title, don't repeat it message_line = message.message.splitlines()[0] @@ -423,8 +421,18 @@ title = webpage.title else: title = '' + # extract the URL in the message, don't repeat it + message_url = extract_url(message.message) + if is_url_equiv(message_url, webpage.url): + if is_url_equiv(message_url, webpage.display_url): + media_url_or_data = message.message + else: + media_url_or_data = '{} | {}'.format(message.message, webpage.display_url) + else: + media_url_or_data = '{} | {}'.format(message.message, url_data) else: title = webpage.title + media_url_or_data = url_data if title and logo: caption = ' | {} | {}'.format(title, logo) diff -rN -u old-irgramd/utils.py new-irgramd/utils.py --- old-irgramd/utils.py 2024-11-22 21:26:24.435762852 +0100 +++ new-irgramd/utils.py 2024-11-22 21:26:24.439762845 +0100 @@ -6,6 +6,7 @@ # Constants FILENAME_INVALID_CHARS = re.compile('[/{}<>()"\'\\|&]') +SIMPLE_URL = re.compile('http(|s)://[^ ]+') # Utilities @@ -62,6 +63,16 @@ surl = url return remove_slash(surl) +def is_url_equiv(url1, url2): + if url1 and url2: + return url1 == url2 or remove_slash(remove_http_s(url1)) == remove_slash(remove_http_s(url2)) + else: + return False + +def extract_url(text): + url = SIMPLE_URL.search(text) + return url.group() if url else None + def get_human_size(size): human_units = ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y')