Updated to delete older tophost json files

This commit is contained in:
datechnoman 2024-03-12 04:08:42 +00:00
parent 3fb177fb70
commit 7b4651b07e

View File

@ -8,6 +8,7 @@ from urllib.parse import urlparse
# Define constants # Define constants
URLS_DIRECTORY = "/opt/cdxfiles/urls" URLS_DIRECTORY = "/opt/cdxfiles/urls"
URLS_FILES_DIRECTORY = "/opt/cdxfiles/urls_files"
ROOT_DIRECTORY = "/root/urls_files" ROOT_DIRECTORY = "/root/urls_files"
CONCURRENCY = 10 CONCURRENCY = 10
BATCH_SIZE = 10 BATCH_SIZE = 10
@ -208,13 +209,16 @@ def main():
if filename.endswith(".cdx.json"): if filename.endswith(".cdx.json"):
directory_output_file.write(f"{filename}\n") directory_output_file.write(f"{filename}\n")
# Process older files in /opt/cdxfiles/urls # Process older files in /opt/cdxfiles/urls, URLS_FILES_DIRECTORY, and the ROOT_DIRECTORY
older_than_48_hours = datetime.now() - timedelta(days=2) older_than_48_hours = datetime.now() - timedelta(days=2)
for filename in os.listdir(URLS_DIRECTORY): directories_to_check = [URLS_DIRECTORY, URLS_FILES_DIRECTORY]
file_path = os.path.join(URLS_DIRECTORY, filename)
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp(): for directory in directories_to_check:
os.remove(file_path) for filename in os.listdir(directory):
print(f"Deleted '{filename}' as it is older than 48 hours.") file_path = os.path.join(directory, filename)
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp():
os.remove(file_path)
print(f"Deleted '{filename}' in '{directory}' as it is older than 48 hours.")
run_ia_command() # Run IA command after processing older files run_ia_command() # Run IA command after processing older files