Updated to delete older tophost json files
This commit is contained in:
parent
3fb177fb70
commit
7b4651b07e
@ -8,6 +8,7 @@ from urllib.parse import urlparse
|
||||
|
||||
# Define constants
|
||||
URLS_DIRECTORY = "/opt/cdxfiles/urls"
|
||||
URLS_FILES_DIRECTORY = "/opt/cdxfiles/urls_files"
|
||||
ROOT_DIRECTORY = "/root/urls_files"
|
||||
CONCURRENCY = 10
|
||||
BATCH_SIZE = 10
|
||||
@ -208,13 +209,16 @@ def main():
|
||||
if filename.endswith(".cdx.json"):
|
||||
directory_output_file.write(f"{filename}\n")
|
||||
|
||||
# Process older files in /opt/cdxfiles/urls
|
||||
# Process older files in /opt/cdxfiles/urls, URLS_FILES_DIRECTORY, and the ROOT_DIRECTORY
|
||||
older_than_48_hours = datetime.now() - timedelta(days=2)
|
||||
for filename in os.listdir(URLS_DIRECTORY):
|
||||
file_path = os.path.join(URLS_DIRECTORY, filename)
|
||||
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp():
|
||||
os.remove(file_path)
|
||||
print(f"Deleted '{filename}' as it is older than 48 hours.")
|
||||
directories_to_check = [URLS_DIRECTORY, URLS_FILES_DIRECTORY]
|
||||
|
||||
for directory in directories_to_check:
|
||||
for filename in os.listdir(directory):
|
||||
file_path = os.path.join(directory, filename)
|
||||
if filename.endswith(".cdx.json") and os.path.getmtime(file_path) < older_than_48_hours.timestamp():
|
||||
os.remove(file_path)
|
||||
print(f"Deleted '{filename}' in '{directory}' as it is older than 48 hours.")
|
||||
|
||||
run_ia_command() # Run IA command after processing older files
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user