Skip to content Skip to sidebar Skip to footer

How Do I Make New Columns In Dataframe From A Row Of A Different Column?

Here's my current dataframe: >>>df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' , 'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (

Solution 1:

You can try this approach, which use mainly string methods. Then I pivot and fillna dataframe. I lost original column most_exhibitions, but I hope it is unnecessary.

import pandas as pd

df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' ,
'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (4)'], index=['a', 'b', 'c','d']), 
              'name' : pd.Series(['Bob', 'Joe', 'Alex', 'Bill'], index=['a', 'b', 'c','d'])}

df = pd.DataFrame(df)
#cange ordering of columns
df = df[['name', 'most_exhibitions']]
print df
#   name                  most_exhibitions#a   Bob           USA (1) Netherlands (5)#b   Joe                United Kingdom (2)#c  Alex  China (3) India (5) Pakistan (8)#d  Bill                USA (11) India (4)#remove '(' and last ')'
df['most_exhibitions'] = df['most_exhibitions'].str.replace('(', '')
df['most_exhibitions'] = df['most_exhibitions'].str.strip(')')

#http://stackoverflow.com/a/34065937/2901002
s = df['most_exhibitions'].str.split(')').apply(pd.Series, 1).stack()
s.index = s.index.droplevel(-1)
s.name = 'most_exhibitions'print s
#a               USA 1#a       Netherlands 5#b    United Kingdom 2#c             China 3#c             India 5#c          Pakistan 8#d              USA 11#d             India 4#Name: most_exhibitions, dtype: object

df = df.drop( ['most_exhibitions'], axis=1)
df = df.join(s)
print df
#   name  most_exhibitions#a   Bob             USA 1#a   Bob     Netherlands 5#b   Joe  United Kingdom 2#c  Alex           China 3#c  Alex           India 5#c  Alex        Pakistan 8#d  Bill            USA 11#d  Bill           India 4#exctract numbers and convert them to integer
df['numbers'] = df['most_exhibitions'].str.extract("(\d+)").astype('int')
#exctract text of most_exhibitions
df['most_exhibitions'] = df['most_exhibitions'].str.rsplit(' ', n=1).str[0]
print df
#   name most_exhibitions  numbers#a   Bob              USA        1#a   Bob      Netherlands        5#b   Joe   United Kingdom        2#c  Alex            China        3#c  Alex            India        5#c  Alex         Pakistan        8#d  Bill              USA       11#d  Bill            India        4#pivot dataframe
df = df.pivot(index='name', columns='most_exhibitions', values='numbers')
#NaN to empty string 
df = df.fillna('')
printdf#most_exhibitions  India  Netherlands  Pakistan China USA United Kingdom#name                                                                   #Alex                  5                      8     3                   #Bill                  4                               11               #Bob                                5                   1               #Joe                                                                   2

EDIT:

I try add all columns as recommended output by function merge:

import pandas as pd

df = {'most_exhibitions' : pd.Series(['USA (1) Netherlands (5)' ,
'United Kingdom (2)','China (3) India (5) Pakistan (8)','USA (11) India (4)'], index=['a', 'b', 'c','d']), 
              'name' : pd.Series(['Bob', 'Joe', 'Alex', 'Bill'], index=['a', 'b', 'c','d'])}

df = pd.DataFrame(df)
#cange ordering of columns
df = df[['name', 'most_exhibitions']]
print df
#   name                  most_exhibitions#a   Bob           USA (1) Netherlands (5)#b   Joe                United Kingdom (2)#c  Alex  China (3) India (5) Pakistan (8)#d  Bill                USA (11) India (4)#copy original to new dataframe for joining original df
df1 = df.reset_index().copy()

#remove '(' and last ')'
df['most_exhibitions'] = df['most_exhibitions'].str.replace('(', '')
df['most_exhibitions'] = df['most_exhibitions'].str.strip(')')

#http://stackoverflow.com/a/34065937/2901002
s = df['most_exhibitions'].str.split(')').apply(pd.Series, 1).stack()
s.index = s.index.droplevel(-1)
s.name = 'most_exhibitions'print s
#a               USA 1#a       Netherlands 5#b    United Kingdom 2#c             China 3#c             India 5#c          Pakistan 8#d              USA 11#d             India 4#Name: most_exhibitions, dtype: object

df = df.drop( ['most_exhibitions'], axis=1)
df = df.join(s)
print df
#   name  most_exhibitions#a   Bob             USA 1#a   Bob     Netherlands 5#b   Joe  United Kingdom 2#c  Alex           China 3#c  Alex           India 5#c  Alex        Pakistan 8#d  Bill            USA 11#d  Bill           India 4#exctract numbers and convert them to integer
df['numbers'] = df['most_exhibitions'].str.extract("(\d+)").astype('int')
#exctract text of most_exhibitions
df['most_exhibitions'] = df['most_exhibitions'].str.rsplit(' ', n=1).str[0]
print df
#   name most_exhibitions  numbers#a   Bob              USA        1#a   Bob      Netherlands        5#b   Joe   United Kingdom        2#c  Alex            China        3#c  Alex            India        5#c  Alex         Pakistan        8#d  Bill              USA       11#d  Bill            India        4#pivot dataframe
df = df.pivot(index='name', columns='most_exhibitions', values='numbers')
#NaN to empty string 
df = df.fillna('')
df = df.reset_index()
printdf#most_exhibitions  name  India  Netherlands  Pakistan China USA United Kingdom#0                 Alex      5                      8     3                   #1                 Bill      4                               11               #2                  Bob                   5                   1               #3                  Joe                                                      2print df1
#  index  name                  most_exhibitions#0     a   Bob           USA (1) Netherlands (5)#1     b   Joe                United Kingdom (2)#2     c  Alex  China (3) India (5) Pakistan (8)#3     d  Bill                USA (11) India (4)df = pd.merge(df1,df, on=['name'])
df = df.set_index('index')
printdf#       name                  most_exhibitions  India  Netherlands  Pakistan  \#index                                                                         #a       Bob           USA (1) Netherlands (5)                   5             #b       Joe                United Kingdom (2)                                 #c      Alex  China (3) India (5) Pakistan (8)      5                      8   #d      Bill                USA (11) India (4)      4                          ##      China USA United Kingdom  #index                           #a             1                 #b                            2  #c         3                     #d            11                 

Post a Comment for "How Do I Make New Columns In Dataframe From A Row Of A Different Column?"