diff --git a/archiveteam_project_url_extractor.py b/archiveteam_project_url_extractor.py index 088b844..954854a 100644 --- a/archiveteam_project_url_extractor.py +++ b/archiveteam_project_url_extractor.py @@ -11,6 +11,8 @@ def process_file(file, directory_path, output_directory, patterns, counter_lock, # Determine the appropriate command based on file extension if file.endswith(".gz"): command = f"zcat {file_path}" + elif file.endswith(".zst"): + command = f"zstdcat {file_path}" elif file.endswith(".txt"): command = f"cat {file_path}" else: @@ -38,20 +40,20 @@ def process_file(file, directory_path, output_directory, patterns, counter_lock, remaining_count = len(gzipped_files) - processed_counter[0] print(f"{file_path} processed. Remaining files: {remaining_count}") -# Ask the user for the directory containing .txt and .txt.gz files -directory_path = input("Enter the directory path containing .txt and .txt.gz files: ") +# Ask the user for the directory containing .txt, .txt.gz, and .zst files +directory_path = input("Enter the directory path containing .txt, .txt.gz, and .zst files: ") # Ensure the directory exists if not os.path.exists(directory_path): print(f"Error: The directory '{directory_path}' does not exist.") exit() -# List all files in the directory that end with .txt or .txt.gz -gzipped_files = [file for file in os.listdir(directory_path) if file.endswith(".txt") or file.endswith(".txt.gz")] +# List all files in the directory that end with .txt, .txt.gz, or .zst +gzipped_files = [file for file in os.listdir(directory_path) if file.endswith((".txt", ".txt.gz", ".zst"))] -# Check if there are any .txt or .txt.gz files in the directory +# Check if there are any .txt, .txt.gz, or .zst files in the directory if not gzipped_files: - print("Error: No .txt or .txt.gz files found in the specified directory.") + print("Error: No .txt, .txt.gz, or .zst files found in the specified directory.") exit() # Ask the user for the output directory