diff --git a/url_extractor.py b/url_extractor.py index b7de15e..29deac6 100644 --- a/url_extractor.py +++ b/url_extractor.py @@ -19,15 +19,18 @@ def process_file(file, directory_path, output_file_path, keyword, counter_lock, print(f"Skipping {file_path}. Unsupported file extension.") return - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, text=True) + # Load the entire file content into memory + try: + file_content = subprocess.check_output(command, shell=True, text=True) + except subprocess.CalledProcessError as e: + print(f"Error processing {file_path}: {e}") + return # Stream the output line by line and append to the output file with open(output_file_path, "a") as output_file: - for line in process.stdout: + for line in file_content.splitlines(): if keyword in line: - output_file.write(line) - - process.wait() # Wait for the process to finish + output_file.write(line + '\n') # Update the processed files count with counter_lock: @@ -81,4 +84,4 @@ with ThreadPoolExecutor(max_workers=num_concurrent_instances) as executor: for future in futures: future.result() -print(f"\nAll files processed. Matching lines appended to {output_file_path}") \ No newline at end of file +print(f"\nAll files processed. Matching lines appended to {output_file_path}")