attrs==22.1.0
certifi==2022.12.7
charset-normalizer==2.1.1
click==8.1.3
colorama==0.4.6
contourpy==1.0.6
cycler==0.11.0
dash==2.7.0
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
Flask==2.2.2
fonttools==4.38.0
htmlmin==0.1.12
idna==3.4
ImageHash==4.3.1
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.2.0
kiwisolver==1.4.4
MarkupSafe==2.1.1
matplotlib==3.6.2
multimethod==1.9
networkx==2.8.8
numpy==1.23.5
packaging==22.0
pandas==1.5.2
patsy==0.5.3
phik==0.12.3
Pillow==9.3.0
plotly==5.11.0
pydantic==1.10.2
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2022.6
PyWavelets==1.4.1
PyYAML==6.0
requests==2.28.1
scikit-learn==1.2.0
scipy==1.9.3
seaborn==0.12.1
six==1.16.0
statsmodels==0.13.5
tangled-up-in-unicode==0.2.0
tenacity==8.1.0
threadpoolctl==3.1.0
tqdm==4.64.1
typeguard==2.13.3
typing_extensions==4.4.0
urllib3==1.26.13
visions==0.7.5
Werkzeug==2.2.2
ydata-profiling==4.1.2
import base64
import datetime
import io
import webbrowser
import dash
from dash.dependencies import Input, Output, State
from dash import dcc
from dash import html
from dash import dash_table
import plotly.express as px
import pandas as pd
from flask import Flask, send_file
# import numpy as np
import sklearn.linear_model as lm
from sklearn import tree, neighbors
# from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from ydata_profiling import ProfileReport
external_stylesheets = ['<https://codepen.io/chriddyp/pen/bWLwgP.css>']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets,
suppress_callback_exceptions=True)
app.layout = html.Div([ # this code section taken from Dash docs <https://dash.plotly.com/dash-core-components/upload>
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select a CSV File')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-div'),
html.Div(id='output-datatable'),
])
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
# Create a placeholder for the profiling report
profile_div = html.Div(id='output-profiling')
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
html.P("Inset X axis data"),
dcc.Dropdown(id='xaxis-data',
options=[{'label':x, 'value':x} for x in df.columns]),
html.P("Inset Y axis data"),
dcc.Dropdown(id='yaxis-data',
options=[{'label':x, 'value':x} for x in df.columns]),
html.Button(id="submit-button", children="Create Graph"),
html.P("Select type of regression"),
dcc.RadioItems(
id="regression-type",
options=[
{"label": "Linear Regression", "value": "linear"},
{"label": "Decision Tree", "value": "tree"},
{"label": "k-NN", "value": "neighbors"},
],
),
html.P("Select EDA button to generate a report"), # New message
dcc.RadioItems(
id="eda-type",
options=[{"label": "EDA", "value": "eda"}],
),
html.P("*Carry on while we generate a report. (Find at the bottom)"),
html.Hr(),
dash_table.DataTable(
data=df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in df.columns],
page_size=15
),
dcc.Store(id='stored-data', data=df.to_dict('records')),
html.Hr(), # horizontal line
# Add the placeholder for the profiling report
profile_div,
# # For debugging, display the raw contents provided by the web browser
# html.Div('Raw Content'),
# html.Pre(contents[0:200] + '...', style={
# 'whiteSpace': 'pre-wrap',
# 'wordBreak': 'break-all'
# })
])
@app.callback(Output('output-profiling', 'children'),
Input('eda-type', 'value'),
State('stored-data', 'data'))
def update_profiling(edatype, data):
if edatype == 'eda':
# Generate the profiling report
df = pd.DataFrame.from_dict(data)
profile = ProfileReport(df)
# Save report to an HTML file
profile.to_file("report.html")
# Open the HTML report in a new browser tab
webbrowser.open_new_tab("report.html")
return html.Div([
html.A(
'Open report',
href='/report',
target='_blank'
)
])
#Callback to set the MIME type to "text/html"
@app.server.route('/report')
def report():
return send_file('report.html', mimetype='text/html')
# #New callback for downloading report in PDF format
# @app.server.route('/download_report_pdf')
# def download_report_pdf():
# return send_file('report.pdf',
# mimetype='application/pdf',
# # attachment_filename='report.pdf',
# as_attachment=True)
@app.callback(Output('output-datatable', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
@app.callback(Output('output-div', 'children'),
Input('submit-button','n_clicks'),
State('stored-data','data'),
State('xaxis-data','value'),
State('yaxis-data', 'value'),
State('regression-type', 'value'))
def update_graph(n, data, x_data, y_data, regression_type):
# Generate the graph using Plotly Express
if n is None:
return dash.no_update
else:
fig = px.scatter(data, x=x_data, y=y_data)
dcc.Graph(figure=fig)
df = pd.DataFrame.from_dict(data)
df.dropna(inplace=True)
x = df[[x_data]]
y = df[y_data]
if regression_type == 'linear':
# create a linear regression model
model = lm.LinearRegression()
model.fit(x, y)
y_pred = model.predict(x)
# create a scatter plot with the regression line
fig = go.Figure()
fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='purple', name='regression'))
return dcc.Graph(figure=fig)
elif regression_type == 'tree':
# create a decision tree regression model
model = tree.DecisionTreeRegressor()
model.fit(x, y)
y_pred = model.predict(x)
# create a scatter plot with the regression line
fig = go.Figure()
fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='green', name='regression'))
return dcc.Graph(figure=fig)
elif regression_type == 'neighbors':
# create a decision tree regression model
model = neighbors.KNeighborsRegressor()
model.fit(x, y)
y_pred = model.predict(x)
# create a scatter plot with the regression line
fig = go.Figure()
fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='cyan', name='regression'))
return dcc.Graph(figure=fig)
if __name__ == '__main__':
app.run_server(debug=False)