Update warc_wat_url_processor.py

This commit is contained in:
datechnoman 2024-03-31 11:45:33 +00:00
parent f95513b9fd
commit 7c157c5a48

View File

@ -98,7 +98,7 @@ def process_file(file_path):
def download_and_process_file(url): def download_and_process_file(url):
try: try:
command = f'axel -n 3 {url}' command = f'axel -q -n 1 {url}'
result = subprocess.run(command, shell=True, check=True) result = subprocess.run(command, shell=True, check=True)
if result.returncode == 0: if result.returncode == 0:
file_path = os.path.join(os.getcwd(), os.path.basename(url)) file_path = os.path.join(os.getcwd(), os.path.basename(url))
@ -117,7 +117,7 @@ def main():
urls = [url.strip() for url in urls] urls = [url.strip() for url in urls]
download_concurrency_level = 15 download_concurrency_level = 7
with ProcessPoolExecutor(max_workers=download_concurrency_level) as executor: with ProcessPoolExecutor(max_workers=download_concurrency_level) as executor:
print("Submitting tasks to the ProcessPoolExecutor...") print("Submitting tasks to the ProcessPoolExecutor...")