Pandas
## Series
### Creating
```python
s = pd.Series([-1, 10, 2], index = ["a", "b", "c"])
print(s)
# a -1
# b 10
# c 2
# dtype: int64
print(s.index) # Index(['a', 'b', 'c'], dtype='object')
s.index = ["first", "second", "third"]
print(s.index) # Index(['first', 'second', 'third'], dtype='object')
```
### Selection
```python
s["a"] # -1
s[["first", "third"]]
# first -1
# third 2
# dtype: int64
s[s > 0]
# second 10
# third 2
# dtype: int64
```
## Dataframes
### Reading
```python
file = pd.read_csv("mottos.csv", index_col = "State")
file.head(5)
file.tail(5)
```
### Creating
```python
pd.DataFrame([[1, "one"], [2, "two"]], columns = ["Number", "Description"])
# Number Description
# 0 1 one
# 1 2 two
pd.DataFrame({"Fruit":["Strawberry", "Orange"],
"Price": [5.49, 3.99]})
# Fruit Price
# 0 Strawberry 5.49
# 1 Orange 3.99
s_a = pd.Series(["a1", "a2", "a3"], index = ["r1", "r2", "r3"])
s_b = pd.Series(["b1", "b2", "b3"], index = ["r1", "r2", "r3"])
pd.DataFrame({"A-column":s_a, "B-column":s_b})
# A-column B-column
# r1 a1 b1
# r2 a2 b2
# r3 a3 b3
```
### Selection
```python
a = pd.DataFrame([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],index=["x", "y", "z"], columns=["a", "b", "c"])
print(a.shape, a.size, a.ndim)
print(a.index, a.columns)
print(a)
# (3, 3) 9 2
# a b c
# x 1 2 3
# y 4 5 6
# z 7 8 9
```
```python
a.loc["x":"y", "a":"c"]
# a b c
# x 1 2 3
# y 4 5 6
a.loc[["x", "y"], ["a", "b"]]
# a b
# x 1 2
# y 4 5
a.iloc[0:2, 0:2]
# a b
# x 1 2
# y 4 5
a.iloc[[0, 1], [1, 2]]
# b c
# x 2 3
# y 5 6
a[["a", "c"]]
# a c
# x 1 3
# y 4 6
# z 7 9
a[0:2]
# a b c
# x 1 2 3
# y 4 5 6
```
### Conditional Selection
```python
babynames[(babynames["Sex"] == "F") & (babynames["Year"] < 2000)]
babynames[(babynames["Name"] == "Bella") |
(babynames["Name"] == "Alex") |
(babynames["Name"] == "Ani") |
(babynames["Name"] == "Lisa")]
names = ["Bella", "Alex", "Ani", "Lisa"]
babynames[babynames["Name"].isin(names)]
babynames[babynames["Name"].str.startswith("N")]
```
### Handy Utility Functions
```python
babynames.describe()
babynames.sample()
babynames.sample(5).iloc[:, 2:]
babynames["Name"].value_counts()
babynames["Name"].unique()
babynames["Name"].str.len()
babynames["Name"].str.lower()
babynames["Name"].map(lambda x: len(x))
```
### Custom Sorts
```python
babynames["Name"].sort_values()
babynames.sort_values(by="Count", ascending=False)
babynames[babynames["Year"]==2021].sort_values("Count", ascending=False)
babynames.sort_values(by="Name", key=lambda x:x.str.len(), ascending=False)
```
### Adding, Modifying, and Removing Columns
```python
babyname_lengths = babynames["Name"].str.len()
babynames["name_lengths"] = babyname_lengths
babynames = babynames.sort_values(by = "name_lengths", ascending=False)
babynames = babynames.drop("name_lengths", axis = "columns")
```
### Groupby.agg
```python
df = pd.DataFrame({'A': [1, 1, 2, 2],
'B': [1, 2, 3, 4],
'C': np.random.randn(4)})
df.groupby("A")[["B", "C"]].agg(lambda x: min(x))
df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'})
```