diff --git a/commoncrawl_transfer.ps1 b/commoncrawl_transfer.ps1 index f8dab9c..e93995f 100644 --- a/commoncrawl_transfer.ps1 +++ b/commoncrawl_transfer.ps1 @@ -17,10 +17,13 @@ $sessionOptions = New-Object $sessionOptionsType # Set properties for session options $sessionOptions.Protocol = [WinSCP.Protocol]::Sftp -$sessionOptions.HostName = "37.27.11.121" +$sessionOptions.HostName = "65.109.140.15" $sessionOptions.UserName = "root" $sessionOptions.Password = "Tcft65rdx!" -$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY" +$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 UeJ2ZrZQte4L0Inyewiu0BJ/PtPf+pc3OxF01xExWes" + +# Specify the path for the local record file +$recordFilePath = "E:\CommonCrawl\transferred_files.txt" try { @@ -33,6 +36,11 @@ try # Specify the remote directory path $remotePath = "/opt/" + # Check if the record file exists, if not, create it + if (-not (Test-Path $recordFilePath)) { + New-Item -ItemType File -Path $recordFilePath | Out-Null + } + do { # Get a list of files matching the pattern "_urls.txt.zst" @@ -48,22 +56,18 @@ try # Iterate through each file and download/delete it foreach ($fileInfo in $remoteFiles) { + # Check if the file has been transferred before + if ((Get-Content $recordFilePath) -contains $fileInfo.FullName) { + Write-Host "File already transferred: $($fileInfo.FullName)" + continue + } + # Construct the full remote file path $remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName) # Construct the local file path $localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl", $fileInfo.Name) - # Check if lock file exists - $lockFilePath = $remoteFilePath + ".lock" - if ($session.FileExists($lockFilePath)) { - Write-Host "Lock file exists. Skipping file: $remoteFilePath" - continue - } - - # Create lock file - $session.PutFiles("/dev/null", $lockFilePath).Check() - # Download the file $transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True) @@ -72,6 +76,9 @@ try { Write-Host "Download successful: $localFilePath" + # Record the transferred file + Add-Content -Path $recordFilePath -Value $fileInfo.FullName + # Attempt to delete the remote file $removalResult = $session.RemoveFiles($remoteFilePath) @@ -89,9 +96,6 @@ try { Write-Host "Failed to download file: $remoteFilePath" } - - # Remove lock file - $session.RemoveFiles($lockFilePath).Check() } # Get a new list of files after downloading