Spark examples
This example shows how to rename a column, change column data type, add new column with no value, drop column, and debugging output df/table schema.
from pyspark.sql.types import DecimalType, StringType, TimestampType from pyspark.sql.functions import current_timestamp, lit, col for table in os.listdir(SOURCE_FOLDER_FILE_API_PATH): tableFolderABFSPath = os.path.join(source_folder_abfs_path, table) df = spark.read.format("delta").load(tableFolderABFSPath) if table == "EmployeeLocation": df = df.withColumn("LocationId",df["LocationId"].cast("string")) elif table == "EmployeeRelatedParty": df = df.withColumnRenamed("RelationshipPeriodStartTimestamp","PeriodStartTimestamp") \ .withColumnRenamed("RelationshipPeriodEndTimestamp","PeriodEndTimestamp") \ .withColumn("SourceModifiedOn",lit(None)) \ .withColumn("SourceTable",lit(None)) \ .drop("RelationshipEstablishedDate") elif table == "GeographicArea": df = df.
Posted by John Liu Monday, May 19, 2025