Updated to use command line zstd

This commit is contained in:
datechnoman 2024-01-20 11:29:57 +00:00
parent 78f6b69cdf
commit e98f80aec4

View File

@ -1,7 +1,6 @@
import subprocess import subprocess
import os import os
import gzip import gzip
import zstandard as zstd
import re import re
import traceback import traceback
from multiprocessing import Pool from multiprocessing import Pool
@ -20,7 +19,7 @@ def extract_urls_from_file(file_path):
print(f"An unexpected error occurred while processing '{file_path}': {e}") print(f"An unexpected error occurred while processing '{file_path}': {e}")
print("Full traceback:") print("Full traceback:")
traceback.print_exc() traceback.print_exc()
return urls return urls
def process_file(file_path): def process_file(file_path):
@ -38,10 +37,9 @@ def process_file(file_path):
output_file.write('\n'.join(urls)) output_file.write('\n'.join(urls))
print(f"URLs written to {output_file_path}") print(f"URLs written to {output_file_path}")
# Compress the output file using zstd with compression level -18 # Use zstd command-line tool for compression
with open(output_file_path, 'rb') as input_file, open(output_file_path + '.zst', 'wb') as output_zstd_file: command = f'zstd -T0 -18 --long {output_file_path} -o {output_file_path}.zst'
cctx = zstd.ZstdCompressor(level=18) subprocess.run(command, shell=True)
output_zstd_file.write(cctx.compress(input_file.read()))
print(f"Compressed file saved as '{output_file_path}.zst'") print(f"Compressed file saved as '{output_file_path}.zst'")
# Remove the original gzipped file # Remove the original gzipped file