Update commoncrawl_transfer.ps1
This commit is contained in:
parent
171d3e2d2d
commit
727d2c3187
@ -1,91 +1,91 @@
|
||||
# Specify the full path to WinSCPnet.dll
|
||||
$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll"
|
||||
|
||||
# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom
|
||||
[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null
|
||||
|
||||
# Specify the path to winscp.exe
|
||||
$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe"
|
||||
|
||||
# Create a session instance
|
||||
$sessionType = [WinSCP.Session].FullName
|
||||
$session = New-Object $sessionType
|
||||
|
||||
# Set up session options
|
||||
$sessionOptionsType = [WinSCP.SessionOptions].FullName
|
||||
$sessionOptions = New-Object $sessionOptionsType
|
||||
|
||||
# Set properties for session options
|
||||
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
|
||||
$sessionOptions.HostName = "37.27.11.121"
|
||||
$sessionOptions.UserName = "root"
|
||||
$sessionOptions.Password = "Tcft65rdx!"
|
||||
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY"
|
||||
|
||||
try
|
||||
{
|
||||
# Set the ExecutablePath property
|
||||
$session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null)
|
||||
|
||||
# Connect
|
||||
$session.Open($sessionOptions)
|
||||
|
||||
# Specify the remote directory path
|
||||
$remotePath = "/opt/commoncrawl/"
|
||||
|
||||
do
|
||||
{
|
||||
# Get a list of files matching the pattern "_urls.txt.gz"
|
||||
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
|
||||
|
||||
# Check if there are any files to download
|
||||
if ($remoteFiles.Count -eq 0)
|
||||
{
|
||||
Write-Host "No files found to download."
|
||||
break
|
||||
}
|
||||
|
||||
# Iterate through each file and download/delete it
|
||||
foreach ($fileInfo in $remoteFiles)
|
||||
{
|
||||
# Construct the full remote file path
|
||||
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName)
|
||||
|
||||
# Construct the local file path
|
||||
$localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name)
|
||||
|
||||
# Download the file
|
||||
$transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True)
|
||||
|
||||
# Check if the download was successful
|
||||
if ($transferResult.IsSuccess)
|
||||
{
|
||||
Write-Host "Download successful: $localFilePath"
|
||||
|
||||
# Attempt to delete the remote file
|
||||
$removalResult = $session.RemoveFiles($remoteFilePath)
|
||||
|
||||
# Check if the removal was successful
|
||||
if ($removalResult.IsSuccess)
|
||||
{
|
||||
Write-Host "Deletion successful: $remoteFilePath"
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host "Failed to delete remote file: $remoteFilePath"
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host "Failed to download file: $remoteFilePath"
|
||||
}
|
||||
}
|
||||
|
||||
# Get a new list of files after downloading
|
||||
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
|
||||
} while ($remoteFiles.Count -gt 0)
|
||||
}
|
||||
finally
|
||||
{
|
||||
$session.Dispose()
|
||||
}
|
||||
# Specify the full path to WinSCPnet.dll
|
||||
$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll"
|
||||
|
||||
# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom
|
||||
[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null
|
||||
|
||||
# Specify the path to winscp.exe
|
||||
$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe"
|
||||
|
||||
# Create a session instance
|
||||
$sessionType = [WinSCP.Session].FullName
|
||||
$session = New-Object $sessionType
|
||||
|
||||
# Set up session options
|
||||
$sessionOptionsType = [WinSCP.SessionOptions].FullName
|
||||
$sessionOptions = New-Object $sessionOptionsType
|
||||
|
||||
# Set properties for session options
|
||||
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
|
||||
$sessionOptions.HostName = "37.27.11.121"
|
||||
$sessionOptions.UserName = "root"
|
||||
$sessionOptions.Password = "Tcft65rdx!"
|
||||
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY"
|
||||
|
||||
try
|
||||
{
|
||||
# Set the ExecutablePath property
|
||||
$session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null)
|
||||
|
||||
# Connect
|
||||
$session.Open($sessionOptions)
|
||||
|
||||
# Specify the remote directory path
|
||||
$remotePath = "/opt/commoncrawl/"
|
||||
|
||||
do
|
||||
{
|
||||
# Get a list of files matching the pattern "_urls.txt.gz"
|
||||
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
|
||||
|
||||
# Check if there are any files to download
|
||||
if ($remoteFiles.Count -eq 0)
|
||||
{
|
||||
Write-Host "No files found to download."
|
||||
break
|
||||
}
|
||||
|
||||
# Iterate through each file and download/delete it
|
||||
foreach ($fileInfo in $remoteFiles)
|
||||
{
|
||||
# Construct the full remote file path
|
||||
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName)
|
||||
|
||||
# Construct the local file path
|
||||
$localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl\", $fileInfo.Name)
|
||||
|
||||
# Download the file
|
||||
$transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True)
|
||||
|
||||
# Check if the download was successful
|
||||
if ($transferResult.IsSuccess)
|
||||
{
|
||||
Write-Host "Download successful: $localFilePath"
|
||||
|
||||
# Attempt to delete the remote file
|
||||
$removalResult = $session.RemoveFiles($remoteFilePath)
|
||||
|
||||
# Check if the removal was successful
|
||||
if ($removalResult.IsSuccess)
|
||||
{
|
||||
Write-Host "Deletion successful: $remoteFilePath"
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host "Failed to delete remote file: $remoteFilePath"
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Write-Host "Failed to download file: $remoteFilePath"
|
||||
}
|
||||
}
|
||||
|
||||
# Get a new list of files after downloading
|
||||
$remoteFiles = $session.EnumerateRemoteFiles($remotePath, "*_urls.txt.gz", [WinSCP.EnumerationOptions]::AllDirectories)
|
||||
} while ($remoteFiles.Count -gt 0)
|
||||
}
|
||||
finally
|
||||
{
|
||||
$session.Dispose()
|
||||
}
|
Loading…
Reference in New Issue
Block a user