From b881641c69375ccf8d3d53d172bd26980d5b1e2e Mon Sep 17 00:00:00 2001
From: datechnoman <datechnoman@hotmail.com>
Date: Fri, 12 Jan 2024 04:02:06 +0000
Subject: [PATCH] Commented out URL Extractions (to be done post downloading of
 files)

---
 urlextractor_archiveteam.sh | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/urlextractor_archiveteam.sh b/urlextractor_archiveteam.sh
index 5878834..b8289fb 100644
--- a/urlextractor_archiveteam.sh
+++ b/urlextractor_archiveteam.sh
@@ -26,10 +26,12 @@ export -f gzip_file
 
 for file in "$directory"/*_urls.txt; do
     filename=$(basename "$file")
-    grep -E "http(s)?://(www\.)?mediafire.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/mediafire_urls.txt"
-    grep -E "http(s)?://(www\.)?i.imgur.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/imgur_urls.txt"
-    grep -E "http(s)?://(www\.)?pastebin.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/pastebin_urls.txt"
-    grep "https://cdn.discordapp.com/" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/discord_urls.txt"
+    
+    # Commented out the lines that extract URLs
+    # grep -E "http(s)?://(www\.)?mediafire.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/mediafire_urls.txt"
+    # grep -E "http(s)?://(www\.)?i.imgur.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/imgur_urls.txt"
+    # grep -E "http(s)?://(www\.)?pastebin.com" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/pastebin_urls.txt"
+    # grep "https://cdn.discordapp.com/" "$file" | sort -u >> "/opt/CommonCrawl_URL_Processor/export/discord_urls.txt"
 
     if [[ $filename != "mediafire_urls.txt" && $filename != "t.me_urls.txt" && $filename != "telegram.me_urls.txt" && $filename != "sitemap_urls.txt" ]]; then
         parallel gzip_file ::: "$file" &
@@ -49,4 +51,4 @@ done
 wait
 
 # Move compressed files to /opt/CommonCrawl_URL_Processor
-# mv "$directory"/*.gz /opt/CommonCrawl_URL_Processor/
\ No newline at end of file
+# mv "$directory"/*.gz /opt/CommonCrawl_URL_Processor/