123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- #coding=utf-8
- import datetime
- import traceback
- import tablib
- import base64
- from datetime import timedelta
- from docx import Document
- from docx.oxml.ns import qn
- from openpyxl.utils.exceptions import InvalidFileException
- def strfdate(d):
- if d:
- return d.strftime('%Y-%m-%d')
- else:
- return ''
- def strftime(t):
- if t:
- return t.strftime('%Y-%m-%d %H:%M')
- else:
- return ''
- def strfsecond(second):
- sec = timedelta(seconds=second)
- d = datetime.datetime(1,1,1) + sec
- if d.hour > 0:
- if d.minute > 0:
- if d.second > 0:
- retval = "%d小时%d分钟%d秒" % (d.hour, d.minute, d.second)
- else:
- retval = "%d小时%d分钟" % (d.hour, d.minute)
- else:
- if d.second > 0:
- retval = "%d小时%d秒" % (d.hour, d.second)
- else:
- retval = "%d小时" % (d.hour)
- else:
- if d.minute > 0:
- if d.second > 0:
- retval = "%d分钟%d秒" % (d.minute, d.second)
- else:
- retval = "%d分钟" % (d.minute)
- else:
- retval = "%d秒" % (d.second)
- return retval
- def clean_datetime_range(data, fieldname):
- if data is not None and fieldname in data and data[fieldname] != '':
- t = data[fieldname].split(' - ')
- data = data.copy()
- data[fieldname+'_after'] = t[0]
- data[fieldname+'_before'] = t[1] + ' 23:59:59'
- data.pop(fieldname)
- return data
- class ExcelImporter():
- @staticmethod
- def validity(file):
- status = {
- 'success': True,
- 'errors': '',
- 'data': []
- }
- if file == None:
- status['success'] = False
- status['errors'] = u"请上传数据文件"
- else:
- try:
- data = tablib.import_set(file, format='xlsx').dict
- if len(data) == 0:
- status['success'] = False
- status['errors'] = u"上传的文件内没有发现数据"
- else:
- status['data'] = data
- except InvalidFileException:
- status['success'] = False
- status['errors'] = u"请上传<strong>xlsx</strong>格式的数据文件,老版本的xls格式不被支持!"
- except Exception as e:
- traceback.print_exc()
- status['success'] = False
- status['errors'] = u"导入失败"
- return status
- class WordImporter():
- @staticmethod
- def parse(file):
- doc = Document(file)
- question_data = []
- question = {}
- is_option = False
- is_image = False
- for para in doc.paragraphs:
- text = para.text.strip()
- if text:
- if text.startswith("【序号】"):
- question = {}
- is_option = False
- is_image = False
- question_data.append(question)
- question['num'] = text.split("【序号】")[-1].strip()
- elif text.startswith("【题文】"):
- question['title'] = text.split("【题文】")[-1].strip()
- elif text.startswith("【图片】"):
- is_image = True
- question['images'] = []
- elif text.startswith("【章节】"):
- question['chapter'] = text.split("【章节】")[-1].strip()
- elif text.startswith("【题型】"):
- #print("====>", text)
- question['type'] = text.split("【题型】")[-1].strip()
- elif text.startswith("【答案】"):
- print("====>", text)
- question['answer'] = text.split("【答案】")[-1].strip()
- elif text.startswith("【解析】"):
- question['analysis'] = text.split("【解析】")[-1].strip()
- elif text.startswith("【难度】"):
- question['difficulty'] = text.split("【难度】")[-1].strip()
-
- elif text.startswith("【分数】"):
- question['score'] = text.split("【分数】")[-1].strip()
- elif text.startswith("【选项】"):
- is_option = True
- question['options'] = []
-
- elif is_option and text:
- question['options'].append(text)
- elif len(para.runs) > 0 and is_image:
- image_data = WordImporter.extract_image(para)
- image_data = base64.b64encode(image_data).decode('utf-8')
- question['images'].append(image_data)
- #print(question_data)
- return question_data
- @staticmethod
- def extract_image(para):
- for run in para.runs:
- if run.element.xpath('.//a:blip'):
- blip = run.element.xpath('.//a:blip')[0]
- embed = blip.get(qn('r:embed'))
- related_part = run.part.related_parts[embed]
- image_bytes = related_part.blob
- return image_bytes
- return None
|