From 727d2c31872b29435727b8f874aa164132f9ada3 Mon Sep 17 00:00:00 2001 From: datechnoman Date: Mon, 18 Dec 2023 04:27:30 +0000 Subject: [PATCH] Update commoncrawl_transfer.ps1 --- ...wl_tranfer.ps1 => commoncrawl_transfer.ps1 | 182 +++++++++--------- 1 file changed, 91 insertions(+), 91 deletions(-) rename commoncrawl_tranfer.ps1 => commoncrawl_transfer.ps1 (97%) diff --git a/commoncrawl_tranfer.ps1 b/commoncrawl_transfer.ps1 similarity index 97% rename from commoncrawl_tranfer.ps1 rename to commoncrawl_transfer.ps1 index cf68e67..511f8cd 100644 --- a/commoncrawl_tranfer.ps1 +++ b/commoncrawl_transfer.ps1 @@ -1,91 +1,91 @@ -# Specify the full path to WinSCPnet.dll -$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll" - -# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom -[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null - -# Specify the path to winscp.exe -$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe" - -# Create a session instance -$sessionType = [WinSCP.Session].FullName -$session = New-Object $sessionType - -# Set up session options -$sessionOptionsType = [WinSCP.SessionOptions].FullName -$sessionOptions = New-Object $sessionOptionsType - -# Set properties for session options -$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp -$sessionOptions.HostName = "37.27.11.121" -$sessionOptions.UserName = "root" -$sessionOptions.Password = "Tcft65rdx!" -$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY" - -try -{ - # Set the ExecutablePath property - $session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null) - - # Connect - $session.Open($sessionOptions) - - # Specify the remote directory path - $remotePath = "/opt/commoncrawl/" - - do - { - # Get a list of files matching the pattern "_urls.txt.gz" - $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) - - # Check if there are any files to download - if ($remoteFiles.Count -eq 0) - { - Write-Host "No files found to download." - break - } - - # Iterate through each file and download/delete it - foreach ($fileInfo in $remoteFiles) - { - # Construct the full remote file path - $remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName) - - # Construct the local file path - $localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name) - - # Download the file - $transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True) - - # Check if the download was successful - if ($transferResult.IsSuccess) - { - Write-Host "Download successful: $localFilePath" - - # Attempt to delete the remote file - $removalResult = $session.RemoveFiles($remoteFilePath) - - # Check if the removal was successful - if ($removalResult.IsSuccess) - { - Write-Host "Deletion successful: $remoteFilePath" - } - else - { - Write-Host "Failed to delete remote file: $remoteFilePath" - } - } - else - { - Write-Host "Failed to download file: $remoteFilePath" - } - } - - # Get a new list of files after downloading - $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) - } while ($remoteFiles.Count -gt 0) -} -finally -{ - $session.Dispose() -} +# Specify the full path to WinSCPnet.dll +$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll" + +# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom +[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null + +# Specify the path to winscp.exe +$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe" + +# Create a session instance +$sessionType = [WinSCP.Session].FullName +$session = New-Object $sessionType + +# Set up session options +$sessionOptionsType = [WinSCP.SessionOptions].FullName +$sessionOptions = New-Object $sessionOptionsType + +# Set properties for session options +$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp +$sessionOptions.HostName = "37.27.11.121" +$sessionOptions.UserName = "root" +$sessionOptions.Password = "Tcft65rdx!" +$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY" + +try +{ + # Set the ExecutablePath property + $session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null) + + # Connect + $session.Open($sessionOptions) + + # Specify the remote directory path + $remotePath = "/opt/commoncrawl/" + + do + { + # Get a list of files matching the pattern "_urls.txt.gz" + $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) + + # Check if there are any files to download + if ($remoteFiles.Count -eq 0) + { + Write-Host "No files found to download." + break + } + + # Iterate through each file and download/delete it + foreach ($fileInfo in $remoteFiles) + { + # Construct the full remote file path + $remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName) + + # Construct the local file path + $localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name) + + # Download the file + $transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True) + + # Check if the download was successful + if ($transferResult.IsSuccess) + { + Write-Host "Download successful: $localFilePath" + + # Attempt to delete the remote file + $removalResult = $session.RemoveFiles($remoteFilePath) + + # Check if the removal was successful + if ($removalResult.IsSuccess) + { + Write-Host "Deletion successful: $remoteFilePath" + } + else + { + Write-Host "Failed to delete remote file: $remoteFilePath" + } + } + else + { + Write-Host "Failed to download file: $remoteFilePath" + } + } + + # Get a new list of files after downloading + $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) + } while ($remoteFiles.Count -gt 0) +} +finally +{ + $session.Dispose() +}