diff --git a/commoncrawl_wat_path_comparer.py b/commoncrawl_wat_path_comparer.py index 942480c..18ea40b 100644 --- a/commoncrawl_wat_path_comparer.py +++ b/commoncrawl_wat_path_comparer.py @@ -9,7 +9,7 @@ def read_paths_from_file(file_path): return [line.strip() for line in file] def list_files_in_directory(directory): - return [f.replace("_urls.txt", "") for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] + return [f.replace("_urls.txt.zst", ".gz") for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))] def main(): # Get the directory containing wat.paths file @@ -54,4 +54,4 @@ def main(): print(f"List of modified files in {compare_directory} written to {output_file_path}") if __name__ == "__main__": - main() + main() \ No newline at end of file