Python3自动化_文件批量处理(文本、PDF;读取、筛选、导出)
利用Python3脚本语言的简练语法,高级语言的丰富类库,快速写了几个文件读取、筛选、导出的“脚本”。
这里简单总结一下关键功能。
读取ini配置文件
检查ini文件是否存在;检查输入的key在ini文件里是否有定义。
import configparser 2
def getConfigInfo(_ini_nm):
# Open Ini File
config = configparser.ConfigParser()
if not config.read(os.path.join(os.getcwd(), _ini_nm + r‘.ini‘)):
printLog(‘E‘, ‘Read Ini file fail.‘)
while True:
sysCode = input(r‘Please input the system code : (Press [Enter] to quit):‘).strip()
if 0 == len(sysCode.strip()):
exit()
# Init ConnectionSettings
if sysCode in config.sections():
return dict(config[sysCode])
else:
print(‘Ini info of System [%s] is blank.\n‘ % sysCode)多参数输入的获取
检查参数个数;检查参数合法性(长度,是否目录);检查参数是否整个都是汉字。
def _main():
path = ‘‘
keyWord = ‘‘
while True:
para = input(r‘Please input the PDF directory and Key Word: (Press [Enter] to quit):‘).strip().split()
if 2 != len(para): continue
path = para[0]
keyWord = para[1]
if 0 == len(path.strip()):
exit()
if not os.path.exists(path):
print(‘input path is not a exists path.‘ + ‘\n‘)
continue
flg = True
for char in keyWord.strip():
if char <= u‘\u4e00‘ or char >= u‘\u9fa5‘:
flg = False
break
if not flg:
print(‘Please input the Chinese Key Word for search.(Such as \‘物流\‘).‘ + ‘\n‘)
continue
breakPostgreSQL数据库处理
根据ini文件定义的数据库连接信息,尝试连库;执行SQL文。
import psycopg2 4 import traceback
def connDB(_cfg):
try:
conn = psycopg2.connect(database=_cfg[‘servicename‘],
user=_cfg[‘dbuser‘],
password=_cfg[‘dbpw‘],
host=_cfg[‘host‘],
port=_cfg[‘port‘])
return conn
except Exception:
printLog(‘E‘, ‘Exception occur at DB Connection.‘ + ‘\n‘ + traceback.format_exc())
def executeSql(_cfg, _sql):
try:
conn = connDB(_cfg)
cur = conn.cursor()
cur.execute(_sql)
results = cur.fetchall()
return list(map(lambda x: x[0], results))
except Exception:
printLog(‘E‘, ‘Exception occur at Execute SQL.‘ + ‘\n‘ + traceback.format_exc())
finally:
cur.close()
conn.rollback()
conn.close()日志处理
定义输出日志的级别;异常级别时,处理结束。
logging.basicConfig(filename=‘log_‘ + datetime.now().strftime(‘%Y%m%d‘) + ‘.txt‘,
level=logging.INFO,
format=‘ %(asctime)s - %(levelname)s - %(message)s‘)
logLevel = {‘D‘: logging.DEBUG,
‘I‘: logging.INFO,
‘W‘: logging.WARNING,
‘E‘: logging.ERROR,
‘C‘: logging.CRITICAL}
def printLog(_lvl, _msg):
logging.log(logLevel[_lvl], _msg)
if logging.ERROR == logLevel[_lvl]:
print(_msg)
exit()
printLog(‘E‘, ‘srcpath is not a exists path.‘)
printLog(‘I‘, ‘Get Src Path : %s‘ % srcPath)MAP函数运用
列表元素批量处理,按第二个下划线字符截取字符串。
def getPreOfNm(x):
if 1 < x.count(‘_‘):
return x[0:x.find(‘_‘, x.find(‘_‘) + 1)]
else:
return x
# Get prefix of CRUD object name
prefixObjNm = list(set(map(getPreOfNm, lstTb)))
prefixObjNm.sort()目录处理
目录/文件判断;目录的路径分割;完整路径的文件名取得;
# Check the srcPath
fullFilePaths = []
if os.path.isdir(srcPath):
for folderName, subFolders, fileNames in os.walk(srcPath):
if os.path.split(folderName)[1] in [‘tcs‘, ‘doc‘]: continue
for fn in fileNames:
# Get src file
mObj = fileNmReg.search(fn)
if mObj:
fullFilePaths.append(os.path.join(folderName, fn))
elif os.path.isfile(srcPath):
# Get src file
fn = os.path.basename(os.path.realpath(srcPath))
mObj = fileNmReg.search(fn)
if mObj:
fullFilePaths.append(srcPath)PDF文件读取
来源:https://www.cnblogs.com/alexzhang92/p/11488949.html
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
import os
def read_pdf(pdf):
# resource manager
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
# device
device = TextConverter(rsrcmgr, retstr, laparams=laparams)
process_pdf(rsrcmgr, device, pdf)
device.close()
content = retstr.getvalue()
retstr.close()
# 获取所有行
contents = str(content).split("\n")
return contentsCSV文件导出
# Init result file rstFile = open(os.path.join(srcPath, ‘[CRUD]‘ + datetime.now().strftime(‘%Y%m%d%H%M%S‘) + ‘.csv‘), ‘w‘, newline=‘‘) rstWtr = csv.writer(rstFile, delimiter=‘\t‘, lineterminator=‘\n‘) # Write head rstWtr.writerow([‘TYPE‘, ‘CI‘, ‘ENCODE‘, ‘LINE NUM‘, ‘CRUD‘, ‘TABLE NM‘, ‘FULL PATH‘])
转载请注明原文链接,谢谢。
相关推荐
chuckchen 2020-10-31
Dreamhome 2020-10-09
xirongxudlut 2020-09-28
星辰大海的路上 2020-09-13
chaochao 2020-08-31
猪猪侠喜欢躲猫猫 2020-08-17
快递小可 2020-08-16
shengge0 2020-07-26
巩庆奎 2020-07-21
张文倩数据库学生 2020-07-19
xirongxudlut 2020-07-18
Ericbig 2020-07-18
kyelu 2020-07-09
liangzhouqu 2020-07-07
GuoSir 2020-06-28
chaigang 2020-06-27
pythonxuexi 2020-06-25