Update urls_automated_cdx_processor.py

This commit is contained in:
datechnoman 2024-03-11 23:47:50 +00:00
parent 7106415581
commit eaa8278db2

View File

@ -9,13 +9,13 @@ from urllib.parse import urlparse
# Define constants # Define constants
URLS_DIRECTORY = "/opt/cdxfiles/urls" URLS_DIRECTORY = "/opt/cdxfiles/urls"
ROOT_DIRECTORY = "/root/urls_files" ROOT_DIRECTORY = "/root/urls_files"
CONCURRENCY = 5 CONCURRENCY = 2
BATCH_SIZE = 5 BATCH_SIZE = 2
# Function to run cdxsummary command # Function to run cdxsummary command
def run_cdxsummary(file_path, json_filepath): def run_cdxsummary(file_path, json_filepath):
# Construct the cdxsummary command # Construct the cdxsummary command
cdxsummary_command = f"/usr/local/bin/cdxsummary -t 30 --json {file_path}" cdxsummary_command = f"/usr/local/bin/cdxsummary -t 50 --json {file_path}"
try: try:
# Run the cdxsummary command and capture the output # Run the cdxsummary command and capture the output