Update commoncrawl_transfer.ps1

This commit is contained in:
datechnoman 2024-01-27 06:28:16 +00:00
parent 0014256679
commit 54d85523b4

View File

@ -17,10 +17,13 @@ $sessionOptions = New-Object $sessionOptionsType
# Set properties for session options
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
$sessionOptions.HostName = "37.27.11.121"
$sessionOptions.HostName = "65.109.140.15"
$sessionOptions.UserName = "root"
$sessionOptions.Password = "Tcft65rdx!"
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY"
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 UeJ2ZrZQte4L0Inyewiu0BJ/PtPf+pc3OxF01xExWes"
# Specify the path for the local record file
$recordFilePath = "E:\CommonCrawl\transferred_files.txt"
try
{
@ -33,6 +36,11 @@ try
# Specify the remote directory path
$remotePath = "/opt/"
# Check if the record file exists, if not, create it
if (-not (Test-Path $recordFilePath)) {
New-Item -ItemType File -Path $recordFilePath | Out-Null
}
do
{
# Get a list of files matching the pattern "_urls.txt.zst"
@ -48,22 +56,18 @@ try
# Iterate through each file and download/delete it
foreach ($fileInfo in $remoteFiles)
{
# Check if the file has been transferred before
if ((Get-Content $recordFilePath) -contains $fileInfo.FullName) {
Write-Host "File already transferred: $($fileInfo.FullName)"
continue
}
# Construct the full remote file path
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName)
# Construct the local file path
$localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl", $fileInfo.Name)
# Check if lock file exists
$lockFilePath = $remoteFilePath + ".lock"
if ($session.FileExists($lockFilePath)) {
Write-Host "Lock file exists. Skipping file: $remoteFilePath"
continue
}
# Create lock file
$session.PutFiles("/dev/null", $lockFilePath).Check()
# Download the file
$transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True)
@ -72,6 +76,9 @@ try
{
Write-Host "Download successful: $localFilePath"
# Record the transferred file
Add-Content -Path $recordFilePath -Value $fileInfo.FullName
# Attempt to delete the remote file
$removalResult = $session.RemoveFiles($remoteFilePath)
@ -89,9 +96,6 @@ try
{
Write-Host "Failed to download file: $remoteFilePath"
}
# Remove lock file
$session.RemoveFiles($lockFilePath).Check()
}
# Get a new list of files after downloading