# Hello World!
def hello_world():
print("Hello World!")
hello_world()
Hello World!
# list comprehension
[i * i for i in range(10)]
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# set comprehension
{i * i for i in range(-10, 11) if i % 2 == 1}
{1, 9, 25, 49, 81}
# range & slicing
a = range(100)
b = a[0:100:2]
print(list(b))
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]
b[10:20:3]
range(20, 40, 6)
Source : Jake VanderPlas (University of Washington)
Python has continued its upward trajectory from last year and jumped two places to the No. 1 slot, though the top four—Python, C, Java, and C++—all remain very close in popularity.
Source IEEE : https://spectrum.ieee.org/computing/software/the-2017-top-programming-languages
Python, whose usage has been growing faster than R for the last several years, has finally caught up with R, and (barely) overtook it, with 52.6% respondents using it v. 52.1% for R.
Source : KDNuggets 2017 Poll http://www.kdnuggets.com/2017/05/poll-analytics-data-science-machine-learning-software-leaders.html
Source : David Robison – Why is Python Growing So Quickly ? https://stackoverflow.blog/2017/09/14/python-growing-quickly/
# launch spyder
%run C:/Test/Anaconda3/Scripts/spyder-script.py
# fonctions de Bessel
from numpy import linspace, pi
from scipy.special import jn
from matplotlib.pylab import plot
%matplotlib inline
x = linspace(0, 4*pi)
for i in range(5,-1,-1):
plot(x, jn(i, x))
$J_\nu (x) = \sum_{n=0}^\infty \frac{(-1)^n}{n! \cdot \Gamma(n+1+\nu)} \left ( \frac{x}{2} \right )^{2n+\nu}$
# Interface multimedia
from IPython.display import IFrame
IFrame("http://calcul.math.cnrs.fr/spip.php?article287", width=800, height=600)
Librairie destinée à manipuler des matrices ou tableaux multidimensionnels ainsi que des fonctions mathématiques opérant sur ces tableaux
Caractéristiques principales
NumPy est optimisé par rapport à Python : ~ x100 pour des boucles numériques
# Performances de Python vs NumPy
# boucle Python
%timeit [i * i for i in range(10000000)]
1 loop, best of 3: 943 ms per loop
# ufunc NumPy
import numpy as np
%timeit np.arange(1e7) ** 2
10 loops, best of 3: 63.3 ms per loop
Librairie destinée à tracer et visualiser des données sous formes de graphiques 2D
Caractéristiques principales
Nouvelles librairies
import matplotlib.pyplot as plt
x = np.random.random(12)
fig = plt.figure(figsize=(12, 3))
ax1 = fig.add_subplot(131)
ax1.set_title("Figure 1")
ax1.plot(x)
ax1.plot(x, '*')
ax2 = fig.add_subplot(132)
ax2.set_title("Figure 2")
ax2.bar(np.arange(12), x, 1, color='r')
ax2.axhline(x.mean(), color='k', linestyle='-.')
ax3 = fig.add_subplot(133, projection='polar')
ax3.set_title("Figure 3")
r = np.linspace(0, (2 - 1 / 6) * np.pi, 12)
ax3.plot(r, x, '*', color='darkred')
ax3.plot(r, x, 'c:');
Librairie visant à unifier et fédérer un ensemble de librairies Python à usage scientifique
Caractéristiques principales
# random normal distribution
mu = np.random.randint(10)
std = np.random.randint(1, 4)
print("mu={:.2f}, std={:.2f}".format(mu, std))
a = np.random.normal(mu, std, 1000)
plt.hist(a, bins=20, color='g', alpha=0.6);
mu=6.00, std=3.00
# estimation of a normal distribution
from scipy.stats import norm
mu, std = norm.fit(a)
print("mu={0:.2f}, std={1:.2f}".format(mu, std))
plt.hist(a, bins=20, normed=True, color='g', alpha=0.6)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
y = norm.pdf(x, mu, std)
plt.plot(x, y, 'b--', linewidth=2);
mu=6.13, std=2.89
Librairie fournissant des structures de données et les outils d’analyse associés performants et faciles à utiliser
Caractéristiques principales
import pandas as pnd
df = pnd.read_csv("c:/Test/mydata/irmar/nat2015.txt",
sep="\t",
encoding='latin-1')
df.head(10)
sexe | preusuel | annais | nombre | |
---|---|---|---|---|
0 | 1 | A | 1980 | 3.0 |
1 | 1 | A | 1998 | 3.0 |
2 | 1 | A | XXXX | 21.0 |
3 | 1 | AADEL | 1976 | 5.0 |
4 | 1 | AADEL | 1978 | 3.0 |
5 | 1 | AADEL | 1980 | 3.0 |
6 | 1 | AADEL | 1981 | 5.0 |
7 | 1 | AADEL | 1982 | 4.0 |
8 | 1 | AADEL | 1983 | 3.0 |
9 | 1 | AADEL | 1987 | 5.0 |
tab = df.pivot_table(index="annais", columns="sexe", values="nombre", aggfunc="sum")
tab.plot(title="Evolution absolue du nombre de naissances par genre");
df = df.loc[df["annais"] != "XXXX"]
tab = df.pivot_table(index="annais", columns="sexe", values="preusuel", aggfunc="count")
tab.plot(title="Evolution absolue de la diversité des prénoms par genre");
def evol_prenom(prenom):
sel = df.loc[df['preusuel'] == prenom]
evol = sel.pivot_table(index='annais',
columns='sexe',
values='nombre',
aggfunc='sum')
tab = evol.div(evol.sum(axis=1), axis=0)
tab.plot(title="Evolution relative du prénom {}".format(prenom));
evol_prenom('ALIX')
Librairie de machine learning et de data mining
Caractéristiques principales
from sklearn.model_selection import train_test_split
X = np.random.random((2, 1000))
y = X[1] > np.sin(np.pi * X[0])
X_train, X_test, y_train, y_test = train_test_split(X.T, y, test_size=0.3)
c_train = np.where(y_train, ['g'] * len(y_train), ['r'] * len(y_train))
c_test = np.where(y_test, ['g'] * len(y_test), ['r'] * len(y_test))
fig = plt.figure(figsize=(9, 4))
ax1 = fig.add_subplot(121)
ax1.set_title("Base d'apprentissage")
ax1.scatter(X_train.T[0], X_train.T[1], color=c_train)
ax2 = fig.add_subplot(122)
ax2.set_title("Base de test")
ax2.scatter(X_test.T[0], X_test.T[1], color=c_test);
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
algo = RandomForestClassifier()
algo.fit(X_train, y_train)
y_pred = algo.predict(X_test)
c_pred = np.where(y_pred, ['g'] * len(y_pred), ['r'] * len(y_pred))
acc_test = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(acc_test))
fig = plt.figure(figsize=(9, 4))
ax1 = fig.add_subplot(121)
ax1.set_title("Base de test")
ax1.scatter(X_test.T[0], X_test.T[1], color=c_test)
ax2 = fig.add_subplot(122)
ax2.set_title("Prédictions")
ax2.scatter(X_test.T[0], X_test.T[1], color=c_pred);
Accuracy: 0.98%