Structured Streaming

Read in Streaming Data

Reading JSON files from storage

from pyspark.sql.types import *
inputPath = "/mnt/data/jsonfiles/"
# Define your schema if it's known (rather than relying on Spark to infer the schema)
jsonSchema = StructType([StructField("time", TimestampType(), True),
StructField("id", IntegerType(), True),
StructField("value", StringType(), True)])
streamingInputDF = spark.readStream \
.schema(jsonSchema) \
.option("maxFilesPerTrigger", 1) \ # Treat a sequence of files as a stream by picking one file at a time
.json(inputPath)

References