Make a table in IPython Notebook:

1
2
3
4
5
from ipy_table import *

make_table(data)
apply_theme('basic')
set_global_style(align='right', float_format='%0.2F')

Matplotlib

Network plot with matplotlib and networkx:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns

sns.set_context(rc={"figure.figsize": (16, 12)})
def network(nodeList, FTLList, LTLList, posiList=None, archieve=0):
    G = nx.DiGraph()

    G.add_nodes_from(nodeList)
    G.add_edges_from(FTLList+LTLList)

    if posiList:
        pos = posiList
    else:
        pos = nx.graphviz_layout(G)
    nx.draw_networkx_nodes(G, pos, node_size=1600, node_color="w")
    nx.draw_networkx_edges(G, pos, edgelist=FTLList, width=2, edge_color="g", style="solid")
    nx.draw_networkx_edges(G, pos, edgelist=LTLList, width=2, edge_color="b", style="dashed")

    labels = {key: store[key] for key in nodeList}
    nx.draw_networkx_labels(G, pos, labels=labels, font_size=12, font_family='sans-serif')

    plt.axis('off')
    plt.show()
    if archieve:
        plt.savefig('volumes.pdf', format='pdf', bbox_inches='tight')

Numpy

Grubbs' Outiler Test in python:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
import numpy as np
from scipy import stats

def outlier(Y, alpha=0.95): 
    s = Y.std()
    G = np.abs(Y - Y.mean()) / s
    N = Y.size
    t = stats.t.isf(1-alpha/(2*N), N-2)
    GTest = (N-1)/np.sqrt(N) * np.sqrt(t**2/(N-2+t**2))    
    return [G.max() > GTest, G.argmax()]

result = outlier(data)
outliers = np.zeros(len(self.data))
while result[0]:
    outliers[result[1]] = 1
    result = outlier(np.ma.array(data, mask=outliers))

Pandas

Un-pivot all sheets in an excel file:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
def unpivotor(filename, columns, output='Unpivoted.xlsx'):
    xl = pd.ExcelFile(filename)
    unpivoted = pd.DataFrame(columns=columns)

    for s in xl.sheet_names:
        df = pd.melt(
            pd.read_excel(xl, sheetname=s),
            id_vars=columns[1], value_vars=None,
            var_name=columns[2], value_name=columns[3]
        )
        df[columns[0]] = s
        unpivoted = unpivoted.append(df, ignore_index=True)

    unpivoted[columns].to_excel(output, sheet_name='Unpivoted', index=False)

Calculate business days between two dates including holidays:

1
2
3
4
5
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar

bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())
bussinesdays = pd.date_range('STARTING DATE','ENDING DATE',freq=bday_us)

Sort count of grouped pandas dataframe:

1
grouped.describe().unstack().sort('count', ascending=False)['count']

Useful pandas snippets (Source: Useful Pandas Snippets):

1
2
#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

Get a list from pandas dataframe:

1
df.values.tolist()

Skip BOM in utf-8 file with pandas’ read_csv:

1
2
df = pd.read_csv('YOUR FILE NAME', encoding='utf-8-sig', sep='\t',
    header=None, names=[], dtype={})

Scipy

Generate random training and testing data set from arrays:

1
2
from sklearn.cross_validation import train_test_split
train, test = train_test_split([TRAINING DATA], test_size=0.2, random_state=42)