diff --git a/url_extractor.py b/url_extractor.py index c2b0a2d..0fe2dfb 100644 --- a/url_extractor.py +++ b/url_extractor.py @@ -10,11 +10,14 @@ def process_file(file, directory_path, output_file_path, keyword, counter_lock, # Run the command and append the output to the same output file command = f"zcat {file_path} | grep '{keyword}'" - result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, text=True) + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, text=True) - # Append the output to the same output file + # Stream the output line by line and append to the output file with open(output_file_path, "a") as output_file: - output_file.write(result.stdout) + for line in process.stdout: + output_file.write(line) + + process.wait() # Wait for the process to finish # Update the processed files count with counter_lock: @@ -68,4 +71,4 @@ with ThreadPoolExecutor(max_workers=num_concurrent_instances) as executor: for future in futures: future.result() -print(f"\nAll files processed. URLs appended to {output_file_path}") +print(f"\nAll files processed. URLs appended to {output_file_path}") \ No newline at end of file