Update urls_automated_cdx_processor.py

This commit is contained in:
datechnoman 2024-03-11 23:47:50 +00:00
parent 7106415581
commit eaa8278db2

View File

@ -9,13 +9,13 @@ from urllib.parse import urlparse
# Define constants
URLS_DIRECTORY = "/opt/cdxfiles/urls"
ROOT_DIRECTORY = "/root/urls_files"
CONCURRENCY = 5
BATCH_SIZE = 5
CONCURRENCY = 2
BATCH_SIZE = 2
# Function to run cdxsummary command
def run_cdxsummary(file_path, json_filepath):
# Construct the cdxsummary command
cdxsummary_command = f"/usr/local/bin/cdxsummary -t 30 --json {file_path}"
cdxsummary_command = f"/usr/local/bin/cdxsummary -t 50 --json {file_path}"
try:
# Run the cdxsummary command and capture the output