diff --git a/commoncrawl_tranfer.ps1 b/commoncrawl_tranfer.ps1 new file mode 100644 index 0000000..cf68e67 --- /dev/null +++ b/commoncrawl_tranfer.ps1 @@ -0,0 +1,91 @@ +# Specify the full path to WinSCPnet.dll +$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll" + +# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom +[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null + +# Specify the path to winscp.exe +$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe" + +# Create a session instance +$sessionType = [WinSCP.Session].FullName +$session = New-Object $sessionType + +# Set up session options +$sessionOptionsType = [WinSCP.SessionOptions].FullName +$sessionOptions = New-Object $sessionOptionsType + +# Set properties for session options +$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp +$sessionOptions.HostName = "37.27.11.121" +$sessionOptions.UserName = "root" +$sessionOptions.Password = "Tcft65rdx!" +$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY" + +try +{ + # Set the ExecutablePath property + $session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null) + + # Connect + $session.Open($sessionOptions) + + # Specify the remote directory path + $remotePath = "/opt/commoncrawl/" + + do + { + # Get a list of files matching the pattern "_urls.txt.gz" + $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) + + # Check if there are any files to download + if ($remoteFiles.Count -eq 0) + { + Write-Host "No files found to download." + break + } + + # Iterate through each file and download/delete it + foreach ($fileInfo in $remoteFiles) + { + # Construct the full remote file path + $remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName) + + # Construct the local file path + $localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name) + + # Download the file + $transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True) + + # Check if the download was successful + if ($transferResult.IsSuccess) + { + Write-Host "Download successful: $localFilePath" + + # Attempt to delete the remote file + $removalResult = $session.RemoveFiles($remoteFilePath) + + # Check if the removal was successful + if ($removalResult.IsSuccess) + { + Write-Host "Deletion successful: $remoteFilePath" + } + else + { + Write-Host "Failed to delete remote file: $remoteFilePath" + } + } + else + { + Write-Host "Failed to download file: $remoteFilePath" + } + } + + # Get a new list of files after downloading + $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) + } while ($remoteFiles.Count -gt 0) +} +finally +{ + $session.Dispose() +}