From 95abf80bd14a1934ec875393170f634978047ed5 Mon Sep 17 00:00:00 2001 From: datechnoman Date: Sun, 4 Feb 2024 22:14:27 +0000 Subject: [PATCH] Updated script to stream compressed files --- url_extractor.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/url_extractor.py b/url_extractor.py index c2b0a2d..0fe2dfb 100644 --- a/url_extractor.py +++ b/url_extractor.py @@ -10,11 +10,14 @@ def process_file(file, directory_path, output_file_path, keyword, counter_lock, # Run the command and append the output to the same output file command = f"zcat {file_path} | grep '{keyword}'" - result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, text=True) + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, text=True) - # Append the output to the same output file + # Stream the output line by line and append to the output file with open(output_file_path, "a") as output_file: - output_file.write(result.stdout) + for line in process.stdout: + output_file.write(line) + + process.wait() # Wait for the process to finish # Update the processed files count with counter_lock: @@ -68,4 +71,4 @@ with ThreadPoolExecutor(max_workers=num_concurrent_instances) as executor: for future in futures: future.result() -print(f"\nAll files processed. URLs appended to {output_file_path}") +print(f"\nAll files processed. URLs appended to {output_file_path}") \ No newline at end of file