#!/usr/bin/env python
import os
import argparse
from collections import OrderedDict
import json
listdironly = lambda d: [os.path.join(d, o) for o in os.listdir(d) if os.path.isdir(os.path.join(d, o))]
listfileonly = lambda d: [os.path.join(d, f) for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
def get_file_list_recursively(d):
# get all files and dirs
subfiles = listfileonly(d)
subdirs = listdironly(d)
if len(subdirs) == 0:
return subfiles
else:
for subd in subdirs:
# print (subd)
subfiles = subfiles + get_file_list_recursively(subd)
return subfiles
"""
+-----------+---------+-------+------------+
| Names | Weights | Costs | Unit_Costs |
+===========+=========+=======+============+
| bar | 0.050 | 2 | 40 |
+-----------+---------+-------+------------+
| chocolate | 0.100 | 5 | 50 |
+-----------+---------+-------+------------+
| chips | 0.250 | 3 | 12 |
+-----------+---------+-------+------------+
OrderedDict{
'header': ['Names', 'Weights' ...],
'bar': [0.05, 2, 40 ...],
...
}
"""
def print_texttab(tab_kv):
import texttable as tt
tab = tt.Texttable()
for i, (k, v) in enumerate(tab_kv.items()):
if i == 0:
headings = tab_kv['header']
tab.header(headings)
tab.set_cols_align(["c"] * len(headings))
else:
if isinstance(v, (tuple, list)):
row_data = [k] + v
else:
row_data = [k, v]
tab.add_row(row_data)
tab_output = tab.draw()
print (tab_output)
def main(args):
curr_dirs = listdironly(args.dir)
count_dict = {os.path.basename(k):0 for k in curr_dirs}
for curr_d in curr_dirs:
curr_d_file_list = get_file_list_recursively(curr_d)
k_d = os.path.basename(curr_d)
count_dict[k_d] = len(curr_d_file_list)
count_rank = sorted(count_dict.items(), key=lambda x: x[1], reverse=True)
# create table
table_rank = OrderedDict()
table_rank['header'] = ['Directory', 'File Count']
for count_item in count_rank:
table_rank[count_item[0]] = count_item[1]
print_texttab(table_rank)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Count files number.")
parser.add_argument('--dir', type=str, default='.')
args = parser.parse_args()
main(args)
显示效果如下:
+----------------------+------------+
| Directory | File Count |
+======================+============+
| Apps | 44254 |
+----------------------+------------+
| Pictures | 2349 |
+----------------------+------------+
就几行,我也懒得贴 gist 了 :)
1
MakeItGreat 2020-12-29 23:27:33 +08:00 via Android
如果你是 win 用户的话
Powershell 更方便 我就这样干的 |
2
douglas1997 OP @MakeItGreat 好吧,python 可能更通用一些。
|
3
xmoiduts 2020-12-30 00:47:06 +08:00
不懂就问:我的 onedrive 存了 20w+的中小文件(但仅为占位符),这会多占用很多电脑内存( RAM )吗?
|
4
qq316107934 2020-12-30 01:09:55 +08:00
@xmoiduts #3 算个数,假设每个文件名为 100 字节,作为一个 key 存在一个 map 里,20w 文件名只占用 19M 左右的内存。
|
5
douglas1997 OP @xmoiduts 不是占 RAM,是每一次启动的时候,Onedrive 会对比时间戳然后判断要不要更新。如果文件很多的话,容易导致电脑变得非常卡顿。
|