#coding=utf-8 import datetime import traceback import tablib import base64 from datetime import timedelta from docx import Document from docx.oxml.ns import qn from openpyxl.utils.exceptions import InvalidFileException def strfdate(d): if d: return d.strftime('%Y-%m-%d') else: return '' def strftime(t): if t: return t.strftime('%Y-%m-%d %H:%M') else: return '' def strfsecond(second): sec = timedelta(seconds=second) d = datetime.datetime(1,1,1) + sec if d.hour > 0: if d.minute > 0: if d.second > 0: retval = "%d小时%d分钟%d秒" % (d.hour, d.minute, d.second) else: retval = "%d小时%d分钟" % (d.hour, d.minute) else: if d.second > 0: retval = "%d小时%d秒" % (d.hour, d.second) else: retval = "%d小时" % (d.hour) else: if d.minute > 0: if d.second > 0: retval = "%d分钟%d秒" % (d.minute, d.second) else: retval = "%d分钟" % (d.minute) else: retval = "%d秒" % (d.second) return retval def clean_datetime_range(data, fieldname): if data is not None and fieldname in data and data[fieldname] != '': t = data[fieldname].split(' - ') data = data.copy() data[fieldname+'_after'] = t[0] data[fieldname+'_before'] = t[1] + ' 23:59:59' data.pop(fieldname) return data class ExcelImporter(): @staticmethod def validity(file): status = { 'success': True, 'errors': '', 'data': [] } if file == None: status['success'] = False status['errors'] = u"请上传数据文件" else: try: data = tablib.import_set(file, format='xlsx').dict if len(data) == 0: status['success'] = False status['errors'] = u"上传的文件内没有发现数据" else: status['data'] = data except InvalidFileException: status['success'] = False status['errors'] = u"请上传xlsx格式的数据文件,老版本的xls格式不被支持!" except Exception as e: traceback.print_exc() status['success'] = False status['errors'] = u"导入失败" return status class WordImporter(): @staticmethod def parse(file): doc = Document(file) question_data = [] question = {} is_option = False is_image = False for para in doc.paragraphs: text = para.text.strip() if text: if text.startswith("【序号】"): question = {} is_option = False is_image = False question_data.append(question) question['num'] = text.split("【序号】")[-1].strip() elif text.startswith("【题文】"): question['title'] = text.split("【题文】")[-1].strip() elif text.startswith("【图片】"): is_image = True question['images'] = [] elif text.startswith("【章节】"): question['chapter'] = text.split("【章节】")[-1].strip() elif text.startswith("【题型】"): #print("====>", text) question['type'] = text.split("【题型】")[-1].strip() elif text.startswith("【答案】"): question['answer'] = text.split("【答案】")[-1].strip() elif text.startswith("【解析】"): question['analysis'] = text.split("【解析】")[-1].strip() elif text.startswith("【难度】"): question['difficulty'] = text.split("【难度】")[-1].strip() elif text.startswith("【分数】"): question['score'] = text.split("【分数】")[-1].strip() elif text.startswith("【选项】"): is_option = True question['options'] = [] elif is_option and text: question['options'].append(text) elif len(para.runs) > 0 and is_image: image_data = WordImporter.extract_image(para) image_data = base64.b64encode(image_data).decode('utf-8') question['images'].append(image_data) #print(question_data) return question_data @staticmethod def extract_image(para): for run in para.runs: if run.element.xpath('.//a:blip'): blip = run.element.xpath('.//a:blip')[0] embed = blip.get(qn('r:embed')) related_part = run.part.related_parts[embed] image_bytes = related_part.blob return image_bytes return None