Update commoncrawl_transfer.ps1

This commit is contained in:
datechnoman 2023-12-18 04:27:30 +00:00
parent 171d3e2d2d
commit 727d2c3187

View File

@ -1,91 +1,91 @@
# Specify the full path to WinSCPnet.dll # Specify the full path to WinSCPnet.dll
$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll" $assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll"
# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom # Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom
[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null [Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null
# Specify the path to winscp.exe # Specify the path to winscp.exe
$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe" $winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe"
# Create a session instance # Create a session instance
$sessionType = [WinSCP.Session].FullName $sessionType = [WinSCP.Session].FullName
$session = New-Object $sessionType $session = New-Object $sessionType
# Set up session options # Set up session options
$sessionOptionsType = [WinSCP.SessionOptions].FullName $sessionOptionsType = [WinSCP.SessionOptions].FullName
$sessionOptions = New-Object $sessionOptionsType $sessionOptions = New-Object $sessionOptionsType
# Set properties for session options # Set properties for session options
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp $sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
$sessionOptions.HostName = "37.27.11.121" $sessionOptions.HostName = "37.27.11.121"
$sessionOptions.UserName = "root" $sessionOptions.UserName = "root"
$sessionOptions.Password = "Tcft65rdx!" $sessionOptions.Password = "Tcft65rdx!"
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY" $sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY"
try try
{ {
# Set the ExecutablePath property # Set the ExecutablePath property
$session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null) $session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null)
# Connect # Connect
$session.Open($sessionOptions) $session.Open($sessionOptions)
# Specify the remote directory path # Specify the remote directory path
$remotePath = "/opt/commoncrawl/" $remotePath = "/opt/commoncrawl/"
do do
{ {
# Get a list of files matching the pattern "_urls.txt.gz" # Get a list of files matching the pattern "_urls.txt.gz"
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
# Check if there are any files to download # Check if there are any files to download
if ($remoteFiles.Count -eq 0) if ($remoteFiles.Count -eq 0)
{ {
Write-Host "No files found to download." Write-Host "No files found to download."
break break
} }
# Iterate through each file and download/delete it # Iterate through each file and download/delete it
foreach ($fileInfo in $remoteFiles) foreach ($fileInfo in $remoteFiles)
{ {
# Construct the full remote file path # Construct the full remote file path
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName) $remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName)
# Construct the local file path # Construct the local file path
$localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name) $localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name)
# Download the file # Download the file
$transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True) $transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True)
# Check if the download was successful # Check if the download was successful
if ($transferResult.IsSuccess) if ($transferResult.IsSuccess)
{ {
Write-Host "Download successful: $localFilePath" Write-Host "Download successful: $localFilePath"
# Attempt to delete the remote file # Attempt to delete the remote file
$removalResult = $session.RemoveFiles($remoteFilePath) $removalResult = $session.RemoveFiles($remoteFilePath)
# Check if the removal was successful # Check if the removal was successful
if ($removalResult.IsSuccess) if ($removalResult.IsSuccess)
{ {
Write-Host "Deletion successful: $remoteFilePath" Write-Host "Deletion successful: $remoteFilePath"
} }
else else
{ {
Write-Host "Failed to delete remote file: $remoteFilePath" Write-Host "Failed to delete remote file: $remoteFilePath"
} }
} }
else else
{ {
Write-Host "Failed to download file: $remoteFilePath" Write-Host "Failed to download file: $remoteFilePath"
} }
} }
# Get a new list of files after downloading # Get a new list of files after downloading
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories) $remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
} while ($remoteFiles.Count -gt 0) } while ($remoteFiles.Count -gt 0)
} }
finally finally
{ {
$session.Dispose() $session.Dispose()
} }