Updated to delete older tophost json files
This commit is contained in:
parent
3fb177fb70
commit
7b4651b07e
@ -8,6 +8,7 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
# Define constants
|
# Define constants
|
||||||
URLS_DIRECTORY = "/opt/cdxfiles/urls"
|
URLS_DIRECTORY = "/opt/cdxfiles/urls"
|
||||||
|
URLS_FILES_DIRECTORY = "/opt/cdxfiles/urls_files"
|
||||||
ROOT_DIRECTORY = "/root/urls_files"
|
ROOT_DIRECTORY = "/root/urls_files"
|
||||||
CONCURRENCY = 10
|
CONCURRENCY = 10
|
||||||
BATCH_SIZE = 10
|
BATCH_SIZE = 10
|
||||||
@ -208,13 +209,16 @@ def main():
|
|||||||
if filename.endswith(".cdx.json"):
|
if filename.endswith(".cdx.json"):
|
||||||
directory_output_file.write(f"{filename}\n")
|
directory_output_file.write(f"{filename}\n")
|
||||||
|
|
||||||
# Process older files in /opt/cdxfiles/urls
|
# Process older files in /opt/cdxfiles/urls, URLS_FILES_DIRECTORY, and the ROOT_DIRECTORY
|
||||||
older_than_48_hours = datetime.now() - timedelta(days=2)
|
older_than_48_hours = datetime.now() - timedelta(days=2)
|
||||||
for filename in os.listdir(URLS_DIRECTORY):
|
directories_to_check = [URLS_DIRECTORY, URLS_FILES_DIRECTORY]
|
||||||
file_path = os.path.join(URLS_DIRECTORY, filename)
|
|
||||||
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp():
|
for directory in directories_to_check:
|
||||||
os.remove(file_path)
|
for filename in os.listdir(directory):
|
||||||
print(f"Deleted '{filename}' as it is older than 48 hours.")
|
file_path = os.path.join(directory, filename)
|
||||||
|
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp():
|
||||||
|
os.remove(file_path)
|
||||||
|
print(f"Deleted '{filename}' in '{directory}' as it is older than 48 hours.")
|
||||||
|
|
||||||
run_ia_command() # Run IA command after processing older files
|
run_ia_command() # Run IA command after processing older files
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user