Python和Pandas日期工具的区别

Source

unit定义

pandas时间序列解释

import pandas as pd
import numpy as np
# 引入datetime模块,创建date、time和datetime对象,微秒microsecond
import datetime
date = datetime.date(year=2020, month=6, day=7)
time = datetime.time(hour=12, minute=30, second=19, microsecond=463198)
dt = datetime.datetime(year=2020, month=6, day=7, 
                               hour=12, minute=30, second=19, microsecond=463198)

print("date is ", date)
print("time is", time)
print("datetime is", dt)
'''
date is  2020-06-07
time is 12:30:19.463198
datetime is 2020-06-07 12:30:19.463198
'''
# 创建并打印一个timedelta对象
td = datetime.timedelta(weeks=2, days=5, hours=10, minutes=20, 
                        seconds=6.73, milliseconds=99, microseconds=8)
print(td)
#19 days, 10:20:06.829008
# 将date和datetime,与timedelta做加减
print('new date is', date + td)
print('new datetime is', dt + td)
#new date is 2020-06-26
#new datetime is 2020-06-26 22:50:26.292206
# time和timedelta不能做加法
time + td
TypeError: unsupported operand type(s) for +: 'datetime.time' and 'datetime.timedelta'
# 再来看一下pandas的Timestamp对象。Timestamp构造器比较灵活,可以处理多种输入
pd.Timestamp(year=2021, month=2, day=28, hour=5, minute=10, second=8, microsecond=99)
#Timestamp('2021-02-28 05:10:08.000099')
pd.Timestamp('2016/1/10')
#Timestamp('2016-01-10 00:00:00')
pd.Timestamp('2014-5/10')
#Timestamp('2014-05-10 00:00:00')
pd.Timestamp('Jan 3, 2019 20:45.56')
#Timestamp('2019-01-03 20:45:33')
pd.Timestamp('2016-01-05T05:34:43.123456789')
#Timestamp('2016-01-05 05:34:43.123456789')
# 也可以传入一个整数或浮点数,表示距离1970年1月1日的时间
pd.Timestamp(500)
#Timestamp('1970-01-01 00:00:00.000000500')
pd.Timestamp(5000, unit='d') #d代表day的意思 string[D,s,ms,us,ns], default ‘ns’
#Timestamp('1983-09-10 00:00:00')
pd.Timestamp(5000, unit='s')
#Timestamp('1970-01-01 01:23:20')
# pandas的to_datetime函数与Timestamp类似,但有些参数不同
pd.to_datetime('2015-5-13')
#Timestamp('2015-05-13 00:00:00')
pd.to_datetime('2021-1-3', dayfirst=True) 
#国际通用格式中日通常出现在月的前面,传入dayfirst=True参数即可解决这个问题。
#Timestamp('2021-01-03 00:00:00')
pd.Timestamp('Saturday September 30th, 2017')
#Timestamp('2017-09-30 00:00:00')
pd.to_datetime('Start Date: Sep 30, 2017 Start Time: 1:30 pm', 
               format='Start Date: %b %d, %Y Start Time: %I:%M %p')
#Timestamp('2017-09-30 13:30:00')
pd.to_datetime(100, unit='D', origin='2021-1-1')
#Timestamp('2021-04-11 00:00:00')
# to_datetime可以将一个字符串或整数列表或Series转换为时间戳
s = pd.Series([10, 100, 1000, 10000])
pd.to_datetime(s, unit='D')
'''
0   1970-01-11
1   1970-04-11
2   1972-09-27
3   1997-05-19
dtype: datetime64[ns]

'''
s = pd.Series(['12-5-2015', '14-1-2013', '20/12/2017', '40/23/2017'])
pd.to_datetime(s, dayfirst=True, errors='coerce')
'''
0   2015-05-12
1   2013-01-14
2   2017-12-20
3          NaT
dtype: datetime64[ns]
'''
pd.to_datetime(['Aug 3 1999 3:45:56', '10/31/2017'])
#DatetimeIndex(['1999-08-03 03:45:56', '2017-10-31 00:00:00'], dtype='datetime64[ns]', freq=None)
# Pandas的Timedelta和to_timedelta也可以用来表示一定的时间量。
# to_timedelta函数可以产生一个Timedelta对象。
# 与to_datetime类似,to_timedelta也可以转换列表或Series变成Timedelta对象。
pd.Timedelta('12 days 5 hours 3 minutes 123456789 nanoseconds')
#Timedelta('12 days 05:03:00.123456')
pd.Timedelta(days=5, minutes=7.34)
#Timedelta('5 days 00:07:20.400000')
pd.Timedelta(100, unit='W') #week
#Timedelta('700 days 00:00:00')
pd.to_timedelta('5 dayz', errors='ignore')
#'5 dayz'
pd.to_timedelta('67:15:45.454')
#Timedelta('2 days 19:15:45.454000')
s = pd.Series([10, 100])
pd.to_timedelta(s, unit='s')
'''
0   00:00:10
1   00:01:40
dtype: timedelta64[ns]
'''
time_strings = ['2 days 24 minutes 89.67 seconds', '00:45:23.6']
pd.to_timedelta(time_strings)
'''
TimedeltaIndex(['2 days 00:25:29.670000', '0 days 00:45:23.600000'], dtype='timedelta64[ns]', freq=None)
'''
pd.Timedelta('12 days 5 hours 3 minutes') * 2
#Timedelta('24 days 10:06:00')
pd.Timestamp('1/1/2017') + pd.Timedelta('12 days 5 hours 3 minutes') * 2
#Timestamp('2017-01-25 10:06:00')
td1 = pd.to_timedelta([10, 100], unit='s')
td2 = pd.to_timedelta(['3 hours', '4 hours'])
td1 + td2
#TimedeltaIndex(['03:00:10', '04:01:40'], dtype='timedelta64[ns]', freq=None)
pd.Timedelta('12 days') / pd.Timedelta('3 days')
#4.0
# Timestamps 和 Timedeltas有许多可用的属性和方法,下面列举了一些:
ts = pd.Timestamp('2020-10-1 4:23:23.9')
ts.ceil('d')#在哪个位数取整
#Timestamp('2020-10-02 00:00:00')
ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second
#(2020, 10, 1, 4, 23, 23)

 

{指定日期所在年的总天数}
  DaysInYear(dt);        //365

{指定日期所在月的总天数}
  DaysInMonth(dt);       //31

{指定年的总天数}
  DaysInAYear(2009);     //365

{指定年、指定月的总天数}
  DaysInAMonth(2009, 5); //31

{指定日期所在年的总周数}
  WeeksInYear(dt);       //53

{指定年的总周数}
  WeeksInAYear(2009);    //53

ts.dayofweek, ts.dayofyear, ts.daysinmonth 
#DAYOFWEEK函数接受1个参数,即DATE或DATETIME值。 它返回一个整数,范围从1到7,表示星期日到星期六。
#MySQL 中 DAYOFYEAR(d) 函数返回 d 是一年中的第几天,范围为 1~366。
#指定日期所在月的总天数
#(3, 275, 31)
ts.to_pydatetime()
#采用Timestamp.to_pydatetime()函数将给定的时间戳转换为本地python datetime对象。
#datetime.datetime(2020, 10, 1, 4, 23, 23, 900000)
td = pd.Timedelta(125.8723, unit='h')
td
#Timedelta('5 days 05:52:20.280000')
td = pd.Timedelta(125.8723, unit='d')
td
#Timedelta('125 days 20:56:06.720000')
td.round('min')
#Timedelta('125 days 20:56:00')
td.components
#Components(days=125, hours=20, minutes=56, seconds=6, milliseconds=720, microseconds=0, nanoseconds=0)
td.total_seconds() #总秒数
#10875366.72
# 对比一下,在使用和没使用格式指令的条件下,将字符串转换为Timestamps对象的速度
date_string_list = ['Sep 30 1984'] * 10000
%timeit pd.to_datetime(date_string_list, format='%b %d %Y')
#2.93 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit pd.to_datetime(date_string_list)
#3.07 ms ± 87.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

其他: