CommonCrawl_URL_Processor/commoncrawl_local_to_share_move.ps1

80 lines
2.9 KiB
PowerShell
Raw Permalink Normal View History

# Specify the full path to WinSCPnet.dll
$assemblyPath = "C:\Program Files (x86)\WinSCP\WinSCPnet.dll"
# Load WinSCP .NET assembly using [System.Reflection.Assembly]::LoadFrom
[Reflection.Assembly]::LoadFrom($assemblyPath) | Out-Null
# Specify the path to winscp.exe
$winscpPath = "C:\Program Files (x86)\WinSCP\winscp.exe"
# Specify the local directory path
$localPath = "E:\CommonCrawl"
# Specify the remote directory path
$remotePath = "/mnt/user/ArchiveTeam/CommonCrawl_Files/WAT_URLs/CC-MAIN-2022-40_September_October_2022"
while ($true) {
$session = $null
try {
# Create a session instance
$sessionType = [WinSCP.Session].FullName
$session = New-Object $sessionType
# Set up session options
$sessionOptionsType = [WinSCP.SessionOptions].FullName
$sessionOptions = New-Object $sessionOptionsType
# Set properties for session options
$sessionOptions.Protocol = [WinSCP.Protocol]::Sftp
$sessionOptions.HostName = "192.168.1.248"
$sessionOptions.UserName = "root"
$sessionOptions.Password = "Tcft65rdx!"
$sessionOptions.SshHostKeyFingerprint = "ssh-ed25519 255 xyTP5uPM3w6ebT7P7Mx1945YIYrK7NXWlZNNOGt2geY"
# Set the ExecutablePath property
$session.GetType().GetProperty("ExecutablePath").SetValue($session, $winscpPath, $null)
# Connect
$session.Open($sessionOptions)
2024-01-27 02:26:27 +00:00
# Get a list of local files matching the pattern "_urls.txt.zst"
$localFiles = Get-ChildItem -Path $localPath -Filter "*_urls.txt.zst" -File -Recurse
# Shuffle the local files array
$localFiles = $localFiles | Get-Random -Count $localFiles.Count
# Check if there are any files to upload
if ($localFiles.Count -eq 0) {
Write-Host "No files found to upload."
break # Break out of the loop if no files are found
} else {
# Iterate through each file and upload/delete it
foreach ($fileInfo in $localFiles) {
# Construct the full local file path
$localFilePath = $fileInfo.FullName
# Construct the remote file path
$remoteFilePath = [WinSCP.RemotePath]::Combine($remotePath, $fileInfo.Name)
# Upload the file
$transferResult = $session.PutFiles($localFilePath, $remoteFilePath, $False)
# Check if the upload was successful
if ($transferResult.IsSuccess) {
Write-Host "Upload successful: $localFilePath to $remoteFilePath"
# Attempt to delete the local file
Remove-Item -Path $localFilePath -Force
} else {
Write-Host "Failed to upload file: $localFilePath"
}
}
}
} finally {
# Dispose of the session if it's not null
if ($session -ne $null) {
$session.Dispose()
}
}
}