Stream to RAM + regex at same time
This commit is contained in:
parent
b4f357090f
commit
46d8e2e718
@ -2,6 +2,7 @@ import subprocess
|
|||||||
import os
|
import os
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
import re
|
||||||
|
|
||||||
# Function to process each file
|
# Function to process each file
|
||||||
def process_file(file, directory_path, output_directory, patterns, counter_lock, processed_counter):
|
def process_file(file, directory_path, output_directory, patterns, counter_lock, processed_counter):
|
||||||
@ -19,20 +20,30 @@ def process_file(file, directory_path, output_directory, patterns, counter_lock,
|
|||||||
print(f"Skipping {file_path}. Unsupported file extension.")
|
print(f"Skipping {file_path}. Unsupported file extension.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Iterate through each pattern and process the file accordingly
|
# Load the entire file content into memory
|
||||||
for pattern, output_filename in patterns.items():
|
try:
|
||||||
grep_command = f"grep -E '{pattern}'"
|
file_content = subprocess.check_output(command, shell=True, text=True)
|
||||||
full_command = f"{command} | {grep_command}"
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error processing {file_path}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
# Open the output file in append mode
|
# Create output files if they don't exist and prepare for appending matches
|
||||||
|
output_files = {}
|
||||||
|
for output_filename in patterns.values():
|
||||||
output_file_path = os.path.join(output_directory, output_filename)
|
output_file_path = os.path.join(output_directory, output_filename)
|
||||||
if not os.path.exists(output_file_path):
|
if not os.path.exists(output_file_path):
|
||||||
open(output_file_path, 'a').close() # Create the file if it doesn't exist
|
open(output_file_path, 'a').close() # Create the file if it doesn't exist
|
||||||
|
output_files[output_filename] = open(output_file_path, "a")
|
||||||
|
|
||||||
with subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE, text=True) as proc:
|
# Process the file content and apply all patterns
|
||||||
with open(output_file_path, "a") as output_file:
|
for line in file_content.splitlines():
|
||||||
for line in proc.stdout:
|
for pattern, output_filename in patterns.items():
|
||||||
output_file.write(line)
|
if re.search(pattern, line):
|
||||||
|
output_files[output_filename].write(line + '\n')
|
||||||
|
|
||||||
|
# Close all output files
|
||||||
|
for output_file in output_files.values():
|
||||||
|
output_file.close()
|
||||||
|
|
||||||
# Update the processed files count outside the inner loop
|
# Update the processed files count outside the inner loop
|
||||||
with counter_lock:
|
with counter_lock:
|
||||||
@ -99,4 +110,4 @@ with ThreadPoolExecutor(max_workers=num_concurrent_instances) as executor:
|
|||||||
# Check file size for any remaining files
|
# Check file size for any remaining files
|
||||||
check_file_size(output_directory)
|
check_file_size(output_directory)
|
||||||
|
|
||||||
print(f"\nAll files processed. URLs appended to corresponding output files.")
|
print(f"\nAll files processed. URLs appended to corresponding output files.")
|
||||||
|
Loading…
Reference in New Issue
Block a user