{"id":513,"date":"2019-07-06T23:18:00","date_gmt":"2019-07-06T15:18:00","guid":{"rendered":"http:\/\/note.systw.net\/note\/?p=513"},"modified":"2023-11-03T23:20:01","modified_gmt":"2023-11-03T15:20:01","slug":"python-pandas-view","status":"publish","type":"post","link":"https:\/\/systw.net\/note\/archives\/513","title":{"rendered":"Python pandas view"},"content":{"rendered":"\n<p>\u6aa2\u8996dataframe\u72c0\u6cc1<\/p>\n\n\n\n<p>&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>\u986f\u793a\u8cc7\u6599\u6982\u6cc1<\/strong><\/h2>\n\n\n\n<p><strong>\u986f\u793a\u8cc7\u6599\u7dad\u5ea6(\u65e5\u8a8c\u7b46\u6578\u548c\u6b04\u4f4d\u6578\u91cf)<\/strong><br>df.shape<br>ex:<br>(100, 5)<\/p>\n\n\n\n<p><strong>\u986f\u793a\u8cc7\u6599\u524d10\u884c<\/strong><br>df.head()<br>ex:<br>\u53ea\u986f\u793a3\u7b46<br>df.head(3)<\/p>\n\n\n\n<p><strong>\u8cc7\u6599\u63cf\u8ff0<\/strong><br>\u4e86\u89e3data\u88e1\u9762\u7684\u72c0\u614b<br>df.describe(include=&#8217;all&#8217;)<br>ex:<br>categorical numeric object<br>count 3 3.0 3<br>unique 3 NaN 3<br>top f NaN c<br>freq 1 NaN 1<br>mean NaN 2.0 NaN<br>std NaN 1.0 NaN<br>min NaN 1.0 NaN<br>25% NaN 1.5 NaN<br>50% NaN 2.0 NaN<br>75% NaN 2.5 NaN<br>max NaN 3.0 NaN<\/p>\n\n\n\n<p><br><strong>\u986f\u793a\u6240\u6709\u6b04\u4f4d\u540d\u7a31<\/strong><br>df.columns.values<br>df.columns<\/p>\n\n\n\n<p><strong>\u986f\u793a\u6240\u6709\u6b04\u4f4d\u7684\u985e\u578b<\/strong><br>df.dtypes<\/p>\n\n\n\n<p><strong>\u986f\u793a\u7279\u5b9a\u6b04\u4f4d<\/strong><br>df.columns[ ]<br>ex:<br>\u986f\u793a\u6b04\u4f4d0\u7684\u540d\u7a31<br>print df.columns[0]<\/p>\n\n\n\n<p><strong>\u986f\u793a\u7279\u5b9a\u5217\u6578<\/strong><br>df[ &lt; start&gt;:&lt; end&gt; ]<br>ex:<br>\u986f\u793a\u7b2c10\u5217\u5230\u7b2c11\u5217\u7684\u8cc7\u6599<br>df[10:11]<\/p>\n\n\n\n<p><strong>\u8cc7\u6599\u6392\u5e8f<\/strong><br>sort_values( &lt; fieldset &gt; [,paramater] )<br>ex:<br>dataphoto.sort_values(by=&#8217;field1&#8242;,ascending=True)<br>ex:<br>dataphoto[&#8216;field1&#8217;].sort_values(ascending=True)<br>ex:<br>df.groupby(&#8216;job&#8217;).sort_values([&#8216;job&#8217;,&#8217;count&#8217;],ascending=False)<br>ps:<br>\u53c3\u6578\u52a0 inplace=True\u53ef\u4ee5\u5132\u5b58\u6392\u5e8f\u7d50\u679c<br>&#8230;<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>\u986f\u793a\u8cc7\u6599\u7d71\u8a08\u7d50\u679c<\/strong><\/h2>\n\n\n\n<p><strong>\u8a08\u7b97\u5e73\u5747\u503c<\/strong><br>df[&lt; field name &gt;].mean()<br>ex:<br>&gt; df[\u2018byte&#8217;].mean()<\/p>\n\n\n\n<p><br><strong>\u5217\u51fa\u8a72\u6b04\u4f4d\u6709\u5c11\u6578\u91cf<\/strong><br>df[&lt; field name &gt;].count()<\/p>\n\n\n\n<p><strong>\u5217\u51fa\u552f\u4e00\u7684\u503c\u548c\u51fa\u73fe\u6b21\u6578<\/strong><br>df[&lt; field name &gt;].value_counts()<br>ex:<br>&gt; df[\u2018protocol&#8217;].value_counts()<br>Tcp: 523<br>TCP: 52<br>udp: 45<br>UDP: 12<\/p>\n\n\n\n<p><strong><br>\u4e86\u89e3\u6bcf\u500b\u6b04\u4f4d\u7a7a\u503c\u7684\u6578\u91cf<\/strong><br>&gt; df.isnull().sum()<br>ex:<br>df.isnull().sum()<br>ipv4 52<br>ipv6 129<br>protocol 0<br>port 0<\/p>\n\n\n\n<p><strong>\u5217\u51fa\u552f\u4e00\u7684\u503c<\/strong><br>df[&lt; field name &gt;].unique()<\/p>\n\n\n\n<p><br><strong>\u5217\u51fa\u4e00\u4e9b\u7d71\u8a08\u8cc7\u8a0a<\/strong><br>df[&lt; field name &gt;].describe()<\/p>\n\n\n\n<p><br>&#8230;.<br>\u7528groupby\u770b\u8cc7\u6599<\/p>\n\n\n\n<p><strong>size()<\/strong><br>\u7528size()\u770bsector\u88e1\u9762\u6bcf\u500b\u7d44\u5225\u5167\u7684\u5927\u5c0f<br>ex:<br>sector = df.groupby(&#8220;fileid&#8221;)<br>print sector.size()<\/p>\n\n\n\n<p><strong>count(*)<\/strong><br>df.groupby(&#8220;&lt; group by column&gt;&#8221;).count()<br>ex:<br>print df.groupby(&#8220;fileid&#8221;).count()<\/p>\n\n\n\n<p><strong>count(distinct column)<\/strong><br>df.groupby(&#8220;&lt; group by column&gt;&#8221;).&lt; column&gt;.nunique()<br>ex:<br>print df.groupby(&#8220;fileid&#8221;).userid.nunique()<\/p>\n\n\n\n<p><strong>average<\/strong><br>ex:<br>pddata2=pddata.groupby(by=&#8217;fileid&#8217;).mean()<\/p>\n\n\n\n<p>\u5176\u4ed6groupby\u652f\u63f4\u7684\u529f\u80fd<br>max,min,sum,std,median<\/p>\n\n\n\n<p>&#8230;<\/p>\n\n\n\n<h2 class=\"wp-block-heading\"><strong>\u641c\u5c0b\u8cc7\u6599&nbsp;<\/strong><\/h2>\n\n\n\n<p><strong>\u9078\u64c7\u6307\u5b9a\u689d\u4ef6\u7684\u884c,\u5b8c\u6574\u6bd4\u5c0d<\/strong><br>ex:<br>df[ df.name != &#8216;Tina&#8217; ]<\/p>\n\n\n\n<p><strong>\u9078\u64c7\u6307\u5b9a\u689d\u4ef6\u7684\u884c,\u7ce2\u7cca\u6bd4\u5c0d<\/strong><br>.str.contains( &lt; string&gt; )<br>ex:<br>df[ df[\u2018filed1&#8242;].str.contains(&#8216;@&#8217;,na = False) ]<\/p>\n\n\n\n<p><strong>\u9078\u53d6\u7279\u5b9a\u6b04\u4f4d\u975enull\u7684\u884c<\/strong><br>&lt; column name&gt;.notnull()<br>ex:<br>data[data[&#8216;\u653b\u64ca\u8005\u4f7f\u7528\u8005\u540d\u7a31&#8217;].notnull()]<\/p>\n\n\n\n<p><strong>\u9078\u53d6\u7279\u5b9a\u6b04\u4f4d\u70banull\u7684\u884c<\/strong><br>&lt; column name&gt;.isnull()<br>ex:<br>data[data[&#8216;\u653b\u64ca\u8005\u4f7f\u7528\u8005\u540d\u7a31&#8217;].isnull()]<\/p>\n\n\n\n<p><strong>\u9078\u53d6\u7279\u5b9a\u578b\u614b\u7684\u6b04\u4f4d<\/strong><br>select_dtypes()<br>ex:<br>df.select_dtypes(include=[&#8216;int64&#8217;])<br>df.select_dtypes(exclude=[object])<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6aa2\u8996dataframe\u72c0\u6cc1 &#8230;&#8230;&#038;# &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"fifu_image_url":"","fifu_image_alt":"","_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":false,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2}},"categories":[14],"tags":[],"class_list":["post-513","post","type-post","status-publish","format-standard","hentry","category-develop"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/posts\/513","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/comments?post=513"}],"version-history":[{"count":0,"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/posts\/513\/revisions"}],"wp:attachment":[{"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/media?parent=513"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/categories?post=513"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/systw.net\/note\/wp-json\/wp\/v2\/tags?post=513"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}