Updated to extract Pastebin URL's

This commit is contained in:
datechnoman 2023-12-19 00:23:55 +00:00
parent 1036de64a7
commit fd9376cbe0

View File

@ -28,6 +28,7 @@ for file in "$directory"/*_urls.txt; do
filename=$(basename "$file")
grep -E "http(s)?://(www\.)?mediafire.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/mediafire_urls.txt"
grep -E "http(s)?://(www\.)?i.imgur.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/imgur_urls.txt"
grep -E "http(s)?://(www\.)?pastebin.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/pastebin_urls.txt"
grep "https://cdn.discordapp.com/" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/discord_urls.txt"
if [[ $filename != "mediafire_urls.txt" && $filename != "t.me_urls.txt" && $filename != "telegram.me_urls.txt" && $filename != "sitemap_urls.txt" ]]; then