Recipes for Data Pipelines
from prefect import task, Flow, Parameter
import pandas as pd
import numpy as np
@task
def get_csv(f:str) -> pd.DataFrame:
return pd.read_csv(f)
@task
def transform_cols(df:pd.DataFrame) -> pd.DataFrame:
numeric_cols = df[df.columns[df.dtypes.isin([np.integer, np.floating])]]
for col in numeric_cols:
df[col] = df[col] + 1000
return df
with Flow('a flow') as flow:
filename = Parameter('filename')
x = get_csv(filename)
x = transform_cols(x)
flow.run(filename='https://raw.githubusercontent.com/tidyverse/ggplot2/master/data-raw/diamonds.csv')