From 27a2b56936802b83ac445b9d7712573f371b3def Mon Sep 17 00:00:00 2001 From: datechnoman Date: Tue, 12 Mar 2024 04:19:43 +0000 Subject: [PATCH] Update due to bug in folder location --- urls_automated_cdx_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/urls_automated_cdx_processor.py b/urls_automated_cdx_processor.py index 008da3e..8e8497c 100644 --- a/urls_automated_cdx_processor.py +++ b/urls_automated_cdx_processor.py @@ -8,7 +8,7 @@ from urllib.parse import urlparse # Define constants URLS_DIRECTORY = "/opt/cdxfiles/urls" -URLS_FILES_DIRECTORY = "/opt/cdxfiles/urls_files" +URLS_FILES_DIRECTORY = "/opt/cdxfiles/urls_tophosts" ROOT_DIRECTORY = "/root/urls_files" CONCURRENCY = 10 BATCH_SIZE = 10 @@ -209,7 +209,7 @@ def main(): if filename.endswith(".cdx.json"): directory_output_file.write(f"{filename}\n") - # Process older files in /opt/cdxfiles/urls, URLS_FILES_DIRECTORY, and the ROOT_DIRECTORY + # Process older files in /opt/cdxfiles/urls and urls_tophosts older_than_48_hours = datetime.now() - timedelta(days=2) directories_to_check = [URLS_DIRECTORY, URLS_FILES_DIRECTORY]