From d0fa7c84f48dd2ed496eeb31fd71833c76c7ad46 Mon Sep 17 00:00:00 2001 From: datechnoman Date: Sun, 28 Jan 2024 09:10:17 +0000 Subject: [PATCH] Update warc_wat_url_processor.py --- warc_wat_url_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/warc_wat_url_processor.py b/warc_wat_url_processor.py index 61f4619..3f2b6b3 100644 --- a/warc_wat_url_processor.py +++ b/warc_wat_url_processor.py @@ -48,6 +48,7 @@ def process_file(file_path): # Extract URLs from the gzipped file urls = extract_urls_from_file(file_path) + print(f"Extracted {len(urls)} URLs from {file_path}") # Create the output file path with '_urls.txt' extension output_file_path = os.path.splitext(file_path)[0] + '_urls.txt'