Implemented subprocess for running multiple json extractions at once
This commit is contained in:
parent
8496391064
commit
182c58f1ce
@ -1,8 +1,8 @@
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
MEDIAFIRE_DIRECTORY = "/opt/MediaFire"
|
MEDIAFIRE_DIRECTORY = "/opt/MediaFire"
|
||||||
CONCURRENCY = 4 # Set the desired concurrency for downloading multiple files
|
CONCURRENCY = 4 # Set the desired concurrency for downloading multiple files
|
||||||
@ -62,13 +62,14 @@ def process_batch(urls, start_index, end_index):
|
|||||||
downloaded_files = download_files(batch_urls)
|
downloaded_files = download_files(batch_urls)
|
||||||
|
|
||||||
# Move files and run cdxsummary for each downloaded file
|
# Move files and run cdxsummary for each downloaded file
|
||||||
for file_path in downloaded_files:
|
with ThreadPoolExecutor(max_workers=CONCURRENCY) as executor:
|
||||||
# Construct file paths
|
for file_path in downloaded_files:
|
||||||
file_path = os.path.join(os.getcwd(), file_path)
|
# Construct file paths
|
||||||
json_filepath = os.path.join(MEDIAFIRE_DIRECTORY, file_path.replace(".cdx.gz", ".cdx.json"))
|
file_path = os.path.join(os.getcwd(), file_path)
|
||||||
|
json_filepath = os.path.join(MEDIAFIRE_DIRECTORY, file_path.replace(".cdx.gz", ".cdx.json"))
|
||||||
|
|
||||||
# Run cdxsummary and delete .cdx.gz file
|
# Run cdxsummary and delete .cdx.gz file
|
||||||
run_cdxsummary(file_path, json_filepath)
|
executor.submit(run_cdxsummary, file_path, json_filepath)
|
||||||
|
|
||||||
def run_ia_command():
|
def run_ia_command():
|
||||||
# Get the current date formatted as YYYY-MM-DD
|
# Get the current date formatted as YYYY-MM-DD
|
||||||
@ -133,4 +134,3 @@ def main():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user