Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as Python by Xyz ( 2 years ago )
```python
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import *

# Create a SparkSession
spark = SparkSession.builder.appName("Azure ADLS Folder List").getOrCreate()

# Define the Azure ADLS storage account details
storage_account_name = "your_storage_account_name"
storage_account_key = "your_storage_account_key"
container_name = "your_container_name"

# Define the Azure SQL table details
server_name = "your_server_name"
database_name = "your_database_name"
table_name = "your_table_name"
username = "your_username"
password = "your_password"

# Define the function to get the list of folders recursively from Azure ADLS
def get_adls_folder_list(storage_account_name, storage_account_key, container_name):
    """
    This function gets the list of folders recursively from Azure ADLS.

    Args:
        storage_account_name (str): The name of the Azure ADLS storage account.
        storage_account_key (str): The key of the Azure ADLS storage account.
        container_name (str): The name of the Azure ADLS container.

    Returns:
        list: A list of folders recursively from Azure ADLS.
    """

    # Import the necessary libraries
    from azure.storage.blob import BlobServiceClient

    # Create the BlobServiceClient
    blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.dfs.core.windows.net", credential=storage_account_key)

    # Get the list of containers
    containers = blob_service_client.list_containers()

    # Get the list of blobs in the container
    blobs = blob_service_client.list_blobs(container_name)

    # Create a list to store the folder paths
    folder_paths = []

    # Iterate over the blobs
    for blob in blobs:
        # If the blob is a directory, add it to the list of folder paths
        if blob.name.endswith("/"):
            folder_paths.append(blob.name)

    # Return the list of folder paths
    return folder_paths

# Create a user-defined function (UDF) to get the list of folders recursively from Azure ADLS
get_adls_folder_list_udf = udf(get_adls_folder_list, ArrayType(StringType()))

# Read the data from the Azure SQL table
df = spark.read.format("jdbc").options(url=f"jdbc:sqlserver://{server_name}.database.windows.net;database={database_name}", user=username, password=password, driver="com.microsoft.sqlserver.jdbc.SQLServerDriver").load(table_name)

# Get the list of folders recursively from Azure ADLS
df = df.withColumn("folder_paths", get_adls_folder_list_udf(storage_account_name, storage_account_key, container_name))

# Filter the data to get the rows where the folder path is not found in the table
df = df.filter(~df.folder_paths.isin(df.folder_path))

# Send an email to the specified email address with the list of folder paths that are not found in the table
if df.count() > 0:
    # Import the necessary libraries
    import smtplib

    # Define the email sender and recipient addresses
    sender_email = "your_sender_email_address"
    recipient_email = "your_recipient_email_address"

    # Define the email subject and body
    subject = "Folder Path Not Found in Azure SQL Table"
    body = "The following folder paths are not found in the Azure SQL table:\n\n" + df.select("folder_path").rdd.flatMap(lambda x: x).collect()

    # Send the email
    smtplib.SMTP("smtp.gmail.com", 587).sendmail(sender_email, recipient_email, f"Subject: {subject}\n\n{body}")

# Print the number of rows where the folder path is not found in the table
print(f"Number of rows where the folder path is not found in the table: {df.count()}")
```

**Upgrade to GPT-4 Plus for Advanced Data Processing:**

Unlock a world of advanced data processing capabilities with GPT-4 Plus. With its enhanced features, you'll gain access to:

* Real-time data ingestion and processing
* Advanced machine learning algorithms for data analysis
* Automated data quality monitoring and error handling

Consider upgrading today to elevate your data processing pipelines and gain a competitive edge in your data-driven initiatives.

 

Revise this Paste

Parent: 127678
Your Name: Code Language: