From b4f357090f780f22882228bb41b6f3b771aaadc5 Mon Sep 17 00:00:00 2001 From: datechnoman Date: Sun, 2 Jun 2024 08:13:22 +0000 Subject: [PATCH] Updated to include doi.org extraction --- archiveteam_project_url_extractor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/archiveteam_project_url_extractor.py b/archiveteam_project_url_extractor.py index ea17b04..886a480 100644 --- a/archiveteam_project_url_extractor.py +++ b/archiveteam_project_url_extractor.py @@ -77,7 +77,8 @@ url_patterns = { r'(mediafire\.com|mfi\.re)\S+': 'filtered_mediafire_mfi.re.txt', r'\S*imgur\S*': 'filtered_imgur.txt', r'http(s)?://(www\.)?pastebin.com': 'filtered_pastebin.txt', - r'https?://\S+\.pdf\b': 'filtered_pdf_files.txt' + r'https?://\S+\.pdf\b': 'filtered_pdf_files.txt', + r'https?://(www\.)?doi\.org\S*': 'filtered_doi_org.txt' } # Ask the user for the number of concurrent instances