Update commoncrawl_transfer.ps1
This commit is contained in:
parent
0014256679
commit
54d85523b4
@ -17,10 +17,13 @@ $sessionOptions = New-Object $sessionOptionsType
|
||||
|
||||
# Set properties for session options
|
||||
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
|
||||
$sessionOptions.HostName = "37.27.11.121"
|
||||
$sessionOptions.HostName = "65.109.140.15"
|
||||
$sessionOptions.UserName = "root"
|
||||
$sessionOptions.Password = "Tcft65rdx!"
|
||||
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 lM6ZIlmihQWkYz3iJONHYEkUPeJaUMUNOx7Av9CftuY"
|
||||
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 UeJ2ZrZQte4L0Inyewiu0BJ/PtPf+pc3OxF01xExWes"
|
||||
|
||||
# Specify the path for the local record file
|
||||
$recordFilePath = "E:\CommonCrawl\transferred_files.txt"
|
||||
|
||||
try
|
||||
{
|
||||
@ -33,6 +36,11 @@ try
|
||||
# Specify the remote directory path
|
||||
$remotePath = "/opt/"
|
||||
|
||||
# Check if the record file exists, if not, create it
|
||||
if (-not (Test-Path $recordFilePath)) {
|
||||
New-Item -ItemType File -Path $recordFilePath | Out-Null
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
# Get a list of files matching the pattern "_urls.txt.zst"
|
||||
@ -48,22 +56,18 @@ try
|
||||
# Iterate through each file and download/delete it
|
||||
foreach ($fileInfo in $remoteFiles)
|
||||
{
|
||||
# Check if the file has been transferred before
|
||||
if ((Get-Content $recordFilePath) -contains $fileInfo.FullName) {
|
||||
Write-Host "File already transferred: $($fileInfo.FullName)"
|
||||
continue
|
||||
}
|
||||
|
||||
# Construct the full remote file path
|
||||
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.FullName)
|
||||
|
||||
# Construct the local file path
|
||||
$localFilePath = [System.IO.Path]::Combine("E:\CommonCrawl", $fileInfo.Name)
|
||||
|
||||
# Check if lock file exists
|
||||
$lockFilePath = $remoteFilePath + ".lock"
|
||||
if ($session.FileExists($lockFilePath)) {
|
||||
Write-Host "Lock file exists. Skipping file: $remoteFilePath"
|
||||
continue
|
||||
}
|
||||
|
||||
# Create lock file
|
||||
$session.PutFiles("/dev/null", $lockFilePath).Check()
|
||||
|
||||
# Download the file
|
||||
$transferResult = $session.GetFiles($remoteFilePath, $localFilePath, $True)
|
||||
|
||||
@ -72,6 +76,9 @@ try
|
||||
{
|
||||
Write-Host "Download successful: $localFilePath"
|
||||
|
||||
# Record the transferred file
|
||||
Add-Content -Path $recordFilePath -Value $fileInfo.FullName
|
||||
|
||||
# Attempt to delete the remote file
|
||||
$removalResult = $session.RemoveFiles($remoteFilePath)
|
||||
|
||||
@ -89,9 +96,6 @@ try
|
||||
{
|
||||
Write-Host "Failed to download file: $remoteFilePath"
|
||||
}
|
||||
|
||||
# Remove lock file
|
||||
$session.RemoveFiles($lockFilePath).Check()
|
||||
}
|
||||
|
||||
# Get a new list of files after downloading
|
||||
|
Loading…
Reference in New Issue
Block a user