# google cloud imports
import google.auth
from google.cloud import bigquery, bigquery_storage
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cred_access_token.json"
# Explicitly create a credentials object. This allows you to use the same
# credentials for both the BigQuery and BigQuery Storage clients, avoiding
# unnecessary API calls to fetch duplicate authentication tokens.
credentials, your_project_id = google.auth.default(
scopes=["https://www.googleapis.com/auth/bigquery"]
)
# Make clients.
bqclient = bigquery.Client(credentials=credentials, project=your_project_id)
bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials)
# Download query results.
query_string = """
SELECT
user_id, chargebee_cancelled_at, chargebee_plan_id, chargebee_started_at, chargebee_status
FROM
`zetland-master.user_churn_eda.test_data`
"""
raw_data = (
bqclient.query(query_string)
.result()
.to_dataframe(bqstorage_client=bqstorageclient)
)
print(raw_data.columns)
# NAVIGATE TO PROJECT FOLDER
% python -m venv .my-venv
% source my-venv/bin/activate # activates
# INSTALL BASE REQS
% pip install -r /Users/nicolai/Desktop/repos/base_reqs.txt
% ipython kernel install --user --name=projectname
# IF VSC OPEN, RESTART VSC.
% deactivate
lst = [3, 7, 8, 10, 5, 12]
i = 6
lst.sort(key = lambda x: abs(x-int(i)))
# [7, 5, 8, 3, 10, 12]
import numpy as np
import pandas as pd
current_country = pd.DataFrame({
'Country': ['South Sudan','South Sudan','South Sudan','South Sudan'],
'Region': ['Sub-Saharan Africa', 'Sub-Saharan Africa', 'Sub-Saharan Africa', 'Sub-Saharan Africa',],
'Happiness Rank': [143, 147, 154, 156],
'Score': [3.83200, 3.59100, 3.25400, 2.85300],
'GDP per capita': [0.393940, 397249, 0.337000, 0.306000],
'Family': [0.185190, 0.601323, 0.608000, 0.575000],
'Life Expectancy': [0.157810, 0.163486, 0.177000, 0.295000],
'Freedom': [0.196620, 0.147062, 0.112000, 0.010000],
'Trust in Government': [0.130150, 0.116794, 0.106000, 0.091000],
'Generosity': [0.258990, 0.285671, 0.224000, 0.202000],
'Dystopia Residual': [2.509300, 1.879416, 1.690000, 1.374000],
'Year': ['2016', '2017', '2018', '2019']
})
interpol_subset = current_country.append({
'Country': current_country["Country"].iloc[0],
'Region': current_country["Region"].iloc[0],
'Happiness Rank': 0, 'Score': np.nan,
'GDP per capita': np.nan, 'Family': np.nan,
'Life Expectancy': np.nan, 'Freedom': np.nan,
'Trust in Government': np.nan, 'Dystopia Residual': np.nan,
'Year': 2015}, ignore_index = True)
interpol_subset = interpol_subset.interpolate(method = "pchip", order = 2)
from matplotlib.pyplot import figure
figure(figsize=(8, 6))