利用shell编程,提取注释文件中指定范围内的行
目的:
经常需要对信息分析中的结果文件进行分析,有时可能期望提取指定染色体范围内,指定区间内的文件。
代码:
#!/bin/bash
echo -e "\033[32m\n地球是圆的,而看似像终点的地方可能也只是起点。\n \033[0m"
# 如果不指定输入结果名称,使用默认名称
outfile="result."`date +%Y-%m-%d`
function Site() {
echo -e "\033[31m输出结果名称为:$outfile\033[0m"
less $infile | awk -F "\t" ‘NR < 2{print $0}NR>1{if($2 == ‘‘‘$chr‘‘‘ && $3 > ‘‘‘$newstart‘‘‘ && $3 < ‘‘‘$newend‘‘‘)print $0}‘ > $outfile
}
function x2t() {
echo -e "结果文件进行格式转换:${outfile} ===> ${outfile}.xlsx"
python /WORK/Disease/lmt/Code/xls2txt/xls2txt.py ${outfile} ${outfile}.xlsx t2x
}
while getopts ":i:c:s:e:o:hf" opt;do
case $opt in
i)
infile=$OPTARG
echo -e "\033[32m输入文件为:$infile\033[0m"
;;
c)
chr=$OPTARG
echo -e "\033[32m查询的染色体为:$chr\033[0m"
;;
s)
start=$OPTARG
newstart=`expr ${start} - 1`
echo -e "\033[32m查询的起始位置为:$start\033[0m"
;;
e)
end=$OPTARG
newend=`expr ${end} + 1`
echo -e "\033[32m查询的终止位置为:$end\033[0m"
;;
o)
outfile=$OPTARG
;;
f)
;;
h)
echo -e ‘‘‘
使用帮助:
1)
site -i infile -c chr -s start -e end
2)
site -i infile -c chr -s start -e end -o result # 自己指定输出结果名称
3)
site -i infile -c chr -s start -e end [-o result] -f # 是否对生成的结果文件进行格式转换
4)
site -o file -f # 对file文件进行格式转换
‘‘‘
esac
done
if [ $infile"x" != "x" -a $chr"x" != "x" -a $start"x" != "x" -a $end"x" != "x" ]; then
echo "开始提取结果......"
Site
if [[ $f"x" == "x" ]]; then
x2t
fi
elif [ -s $outfile -a $f"x" == "x" ]; then
x2t
elif [[ "" == "x" ]]; then
# echo "小朋友, 你是不是忘记啥啦......"
# echo "查看帮助: site -h"
site -h
fi中间有个格式转换的代码,如下:
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import openpyxl
from openpyxl import load_workbook #读取excel文件
from openpyxl import Workbook # 创建xlsx文件
class ChangeType(object):
def __init__(self, args):
self.source = args["source"]
self.target = args["target"]
def xls2txt(self):
wb = load_workbook(self.source)
ws = wb.active
out = open(self.target, ‘w‘)
for row in ws.rows:
for cell in row:
value = cell.value
out.write("{value}\t".format(**locals()))
out.write("\n")
def txt2xls(self):
wb = Workbook()
ws = wb.create_sheet("Result", index=0)
with open(self.source, ‘r‘) as fr:
for line in fr:
linelist = line.strip().split("\t")
ws.append(linelist)
wb.save(self.target)
def main():
demo = ChangeType(args)
if args["type"]=="x2t":
demo.xls2txt()
elif args["type"] == "t2x":
demo.txt2xls()
else:
exit("请输入想要转换的格式")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("source", help="原格式文件")
parser.add_argument("target", help="想要转换的格式文件")
parser.add_argument("type", help="请输入想要转换的格式", choices=["x2t", "t2x"])
args = vars(parser.parse_args())
main()