diff --git a/warc_wat_url_processor.py b/warc_wat_url_processor.py index 61f4619..3f2b6b3 100644 --- a/warc_wat_url_processor.py +++ b/warc_wat_url_processor.py @@ -48,6 +48,7 @@ def process_file(file_path): # Extract URLs from the gzipped file urls = extract_urls_from_file(file_path) + print(f"Extracted {len(urls)} URLs from {file_path}") # Create the output file path with '_urls.txt' extension output_file_path = os.path.splitext(file_path)[0] + '_urls.txt'