diff --git a/commoncrawl_wat_path_comparer.py b/commoncrawl_wat_path_comparer.py index 18ea40b..81083fc 100644 --- a/commoncrawl_wat_path_comparer.py +++ b/commoncrawl_wat_path_comparer.py @@ -35,8 +35,8 @@ def main(): # Read paths from the wat.paths file paths = read_paths_from_file(wat_paths_file_path) - # Filter out paths that partially match directory lines - filtered_paths = [path for path in paths if not any(directory_line in path for directory_line in directory_files)] + # Filter out paths that exactly match directory lines + filtered_paths = [path for path in paths if os.path.basename(path) not in directory_files] # Write the list of modified files to a text file output_file_path = 'missing_wat_files.txt'