ML Dashboard Code


attrs==22.1.0
certifi==2022.12.7
charset-normalizer==2.1.1
click==8.1.3
colorama==0.4.6
contourpy==1.0.6
cycler==0.11.0
dash==2.7.0
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
Flask==2.2.2
fonttools==4.38.0
htmlmin==0.1.12
idna==3.4
ImageHash==4.3.1
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.2.0
kiwisolver==1.4.4
MarkupSafe==2.1.1
matplotlib==3.6.2
multimethod==1.9
networkx==2.8.8
numpy==1.23.5
packaging==22.0
pandas==1.5.2
patsy==0.5.3
phik==0.12.3
Pillow==9.3.0
plotly==5.11.0
pydantic==1.10.2
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2022.6
PyWavelets==1.4.1
PyYAML==6.0
requests==2.28.1
scikit-learn==1.2.0
scipy==1.9.3
seaborn==0.12.1
six==1.16.0
statsmodels==0.13.5
tangled-up-in-unicode==0.2.0
tenacity==8.1.0
threadpoolctl==3.1.0
tqdm==4.64.1
typeguard==2.13.3
typing_extensions==4.4.0
urllib3==1.26.13
visions==0.7.5
Werkzeug==2.2.2
ydata-profiling==4.1.2

import base64
import datetime
import io
import webbrowser
import dash
from dash.dependencies import Input, Output, State
from dash import dcc
from dash import html
from dash import dash_table
import plotly.express as px

import pandas as pd
from flask import Flask, send_file
# import numpy as np
import sklearn.linear_model as lm
from sklearn import tree, neighbors
# from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from ydata_profiling import ProfileReport

external_stylesheets = ['<https://codepen.io/chriddyp/pen/bWLwgP.css>']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets,
                suppress_callback_exceptions=True)

app.layout = html.Div([ # this code section taken from Dash docs <https://dash.plotly.com/dash-core-components/upload>
    dcc.Upload(
        id='upload-data',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select a CSV File')

        ]),

        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        # Allow multiple files to be uploaded
        multiple=True
    ),
    html.Div(id='output-div'),
    html.Div(id='output-datatable'),
])

def parse_contents(contents, filename, date):
    content_type, content_string = contents.split(',')

    decoded = base64.b64decode(content_string)

    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')))

        elif 'xls' in filename:
            # Assume that the user uploaded an excel file
            df = pd.read_excel(io.BytesIO(decoded))

    except Exception as e:
        print(e)
        return html.Div([
            'There was an error processing this file.'
        ])

    # Create a placeholder for the profiling report
    profile_div = html.Div(id='output-profiling')

    return html.Div([
        html.H5(filename),
        html.H6(datetime.datetime.fromtimestamp(date)),
        html.P("Inset X axis data"),
        dcc.Dropdown(id='xaxis-data',
                     options=[{'label':x, 'value':x} for x in df.columns]),
        html.P("Inset Y axis data"),
        dcc.Dropdown(id='yaxis-data',
                     options=[{'label':x, 'value':x} for x in df.columns]),
        html.Button(id="submit-button", children="Create Graph"),
            html.P("Select type of regression"),
        dcc.RadioItems(
            id="regression-type",
            options=[
                {"label": "Linear Regression", "value": "linear"},
                {"label": "Decision Tree", "value": "tree"},
                {"label": "k-NN", "value": "neighbors"},
            ],
        ),

            html.P("Select EDA button to generate a report"),  # New message
        dcc.RadioItems(
            id="eda-type",
            options=[{"label": "EDA", "value": "eda"}],
        ),

        html.P("*Carry on while we generate a report. (Find at the bottom)"),
        html.Hr(),

        dash_table.DataTable(
            data=df.to_dict('records'),
            columns=[{'name': i, 'id': i} for i in df.columns],
            page_size=15
        ),
        dcc.Store(id='stored-data', data=df.to_dict('records')),

        html.Hr(),  # horizontal line

        # Add the placeholder for the profiling report
        profile_div,

        # # For debugging, display the raw contents provided by the web browser
        # html.Div('Raw Content'),
        # html.Pre(contents[0:200] + '...', style={
        #     'whiteSpace': 'pre-wrap',
        #     'wordBreak': 'break-all'
        # })
    ])

@app.callback(Output('output-profiling', 'children'),
              Input('eda-type', 'value'),
              State('stored-data', 'data'))
def update_profiling(edatype, data):
    if edatype == 'eda':

        # Generate the profiling report
        df = pd.DataFrame.from_dict(data)
        profile = ProfileReport(df)

        # Save report to an HTML file
        profile.to_file("report.html")

        # Open the HTML report in a new browser tab
        webbrowser.open_new_tab("report.html")
        return html.Div([
            html.A(
                'Open report',
                href='/report',
                target='_blank'
            )
        ])

#Callback to set the MIME type to "text/html"
@app.server.route('/report')
def report():
    return send_file('report.html', mimetype='text/html')

# #New callback for downloading report in PDF format
# @app.server.route('/download_report_pdf')
# def download_report_pdf():
#     return send_file('report.pdf',
#                           mimetype='application/pdf',
#                         #   attachment_filename='report.pdf',
#                           as_attachment=True)

@app.callback(Output('output-datatable', 'children'),
              Input('upload-data', 'contents'),
              State('upload-data', 'filename'),
              State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
    if list_of_contents is not None:
        children = [
            parse_contents(c, n, d) for c, n, d in
            zip(list_of_contents, list_of_names, list_of_dates)]
        return children

@app.callback(Output('output-div', 'children'),
              Input('submit-button','n_clicks'),
              State('stored-data','data'),
              State('xaxis-data','value'),
              State('yaxis-data', 'value'),
              State('regression-type', 'value'))

def update_graph(n, data, x_data, y_data, regression_type):
    # Generate the graph using Plotly Express
    if n is None:
        return dash.no_update
    else:
        fig = px.scatter(data, x=x_data, y=y_data)
        dcc.Graph(figure=fig)

    df = pd.DataFrame.from_dict(data)
    df.dropna(inplace=True)
    x = df[[x_data]]
    y = df[y_data]

    if regression_type == 'linear':
        # create a linear regression model
        model = lm.LinearRegression()
        model.fit(x, y)
        y_pred = model.predict(x)

        # create a scatter plot with the regression line
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
        fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='purple', name='regression'))
        return dcc.Graph(figure=fig)

    elif regression_type == 'tree':
        # create a decision tree regression model
        model = tree.DecisionTreeRegressor()
        model.fit(x, y)
        y_pred = model.predict(x)

        # create a scatter plot with the regression line
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
        fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='green', name='regression'))
        return dcc.Graph(figure=fig)

    elif regression_type == 'neighbors':
        # create a decision tree regression model
        model =  neighbors.KNeighborsRegressor()
        model.fit(x, y)
        y_pred = model.predict(x)

        # create a scatter plot with the regression line
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df[x_data], y=y, mode='markers'))
        fig.add_trace(go.Scatter(x=df[x_data], y=y_pred, mode='lines', line_color='cyan', name='regression'))
        return dcc.Graph(figure=fig)

if __name__ == '__main__':
    app.run_server(debug=False)