How Do I Make New Columns In Dataframe From A Row Of A Different Column?
Here's my current dataframe: >>>df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' , 'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (
Solution 1:
You can try this approach, which use mainly string methods. Then I pivot
and fillna
dataframe. I lost original column most_exhibitions
, but I hope it is unnecessary.
import pandas as pd
df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' ,
'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (4)'], index=['a', 'b', 'c','d']),
'name' : pd.Series(['Bob', 'Joe', 'Alex', 'Bill'], index=['a', 'b', 'c','d'])}
df = pd.DataFrame(df)
#cange ordering of columns
df = df[['name', 'most_exhibitions']]
print df
# name most_exhibitions#a Bob USA (1) Netherlands (5)#b Joe United Kingdom (2)#c Alex China (3) India (5) Pakistan (8)#d Bill USA (11) India (4)#remove '(' and last ')'
df['most_exhibitions'] = df['most_exhibitions'].str.replace('(', '')
df['most_exhibitions'] = df['most_exhibitions'].str.strip(')')
#http://stackoverflow.com/a/34065937/2901002
s = df['most_exhibitions'].str.split(')').apply(pd.Series, 1).stack()
s.index = s.index.droplevel(-1)
s.name = 'most_exhibitions'print s
#a USA 1#a Netherlands 5#b United Kingdom 2#c China 3#c India 5#c Pakistan 8#d USA 11#d India 4#Name: most_exhibitions, dtype: object
df = df.drop( ['most_exhibitions'], axis=1)
df = df.join(s)
print df
# name most_exhibitions#a Bob USA 1#a Bob Netherlands 5#b Joe United Kingdom 2#c Alex China 3#c Alex India 5#c Alex Pakistan 8#d Bill USA 11#d Bill India 4#exctract numbers and convert them to integer
df['numbers'] = df['most_exhibitions'].str.extract("(\d+)").astype('int')
#exctract text of most_exhibitions
df['most_exhibitions'] = df['most_exhibitions'].str.rsplit(' ', n=1).str[0]
print df
# name most_exhibitions numbers#a Bob USA 1#a Bob Netherlands 5#b Joe United Kingdom 2#c Alex China 3#c Alex India 5#c Alex Pakistan 8#d Bill USA 11#d Bill India 4#pivot dataframe
df = df.pivot(index='name', columns='most_exhibitions', values='numbers')
#NaN to empty string
df = df.fillna('')
printdf#most_exhibitions India Netherlands Pakistan China USA United Kingdom#name #Alex 5 8 3 #Bill 4 11 #Bob 5 1 #Joe 2
EDIT:
I try add all columns as recommended output by function merge
:
import pandas as pd
df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' ,
'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (4)'], index=['a', 'b', 'c','d']),
'name' : pd.Series(['Bob', 'Joe', 'Alex', 'Bill'], index=['a', 'b', 'c','d'])}
df = pd.DataFrame(df)
#cange ordering of columns
df = df[['name', 'most_exhibitions']]
print df
# name most_exhibitions#a Bob USA (1) Netherlands (5)#b Joe United Kingdom (2)#c Alex China (3) India (5) Pakistan (8)#d Bill USA (11) India (4)#copy original to new dataframe for joining original df
df1 = df.reset_index().copy()
#remove '(' and last ')'
df['most_exhibitions'] = df['most_exhibitions'].str.replace('(', '')
df['most_exhibitions'] = df['most_exhibitions'].str.strip(')')
#http://stackoverflow.com/a/34065937/2901002
s = df['most_exhibitions'].str.split(')').apply(pd.Series, 1).stack()
s.index = s.index.droplevel(-1)
s.name = 'most_exhibitions'print s
#a USA 1#a Netherlands 5#b United Kingdom 2#c China 3#c India 5#c Pakistan 8#d USA 11#d India 4#Name: most_exhibitions, dtype: object
df = df.drop( ['most_exhibitions'], axis=1)
df = df.join(s)
print df
# name most_exhibitions#a Bob USA 1#a Bob Netherlands 5#b Joe United Kingdom 2#c Alex China 3#c Alex India 5#c Alex Pakistan 8#d Bill USA 11#d Bill India 4#exctract numbers and convert them to integer
df['numbers'] = df['most_exhibitions'].str.extract("(\d+)").astype('int')
#exctract text of most_exhibitions
df['most_exhibitions'] = df['most_exhibitions'].str.rsplit(' ', n=1).str[0]
print df
# name most_exhibitions numbers#a Bob USA 1#a Bob Netherlands 5#b Joe United Kingdom 2#c Alex China 3#c Alex India 5#c Alex Pakistan 8#d Bill USA 11#d Bill India 4#pivot dataframe
df = df.pivot(index='name', columns='most_exhibitions', values='numbers')
#NaN to empty string
df = df.fillna('')
df = df.reset_index()
printdf#most_exhibitions name India Netherlands Pakistan China USA United Kingdom#0 Alex 5 8 3 #1 Bill 4 11 #2 Bob 5 1 #3 Joe 2print df1
# index name most_exhibitions#0 a Bob USA (1) Netherlands (5)#1 b Joe United Kingdom (2)#2 c Alex China (3) India (5) Pakistan (8)#3 d Bill USA (11) India (4)df = pd.merge(df1,df, on=['name'])
df = df.set_index('index')
printdf# name most_exhibitions India Netherlands Pakistan \#index #a Bob USA (1) Netherlands (5) 5 #b Joe United Kingdom (2) #c Alex China (3) India (5) Pakistan (8) 5 8 #d Bill USA (11) India (4) 4 ## China USA United Kingdom #index #a 1 #b 2 #c 3 #d 11
Post a Comment for "How Do I Make New Columns In Dataframe From A Row Of A Different Column?"