from django import forms from django.utils.translation import ugettext_lazy as _ class UploadedFileInMemoryError(Exception): pass class DocField(forms.FileField): """ This is form field for PDF or Microsoft Word Document (both .doc and .docx) It will validate the file uploaded as a valid PDF and MS Word Document. ~~~~~~~~~ Usage: import DocField doc = models.DocField() ~~~~~~~~~ It extends a forms.FileField, so you can put all the arguments relevant to FileField. IMPORTANT NOTE: The method of validation is actually run thru *nix OS shell command 'file', therefore, 1. only *nix system can use this class. 2. The file uploaded must be saved on disk, meaning you need to set your upload handler to use TempoaryFileUploadHandler Only. # (i.e. put this in your settings.py) FILE_UPLOAD_HANDLERS = ( "django.core.files.uploadhandler.TemporaryFileUploadHandler", ) """ default_error_messages = { 'invalid': _(u"No file was submitted. Check the encoding type on the form."), 'missing': _(u"No file was submitted."), 'empty': _(u"The submitted file is empty."), 'not_doc': _(u"Upload a valid document. The file you uploaded was not a acceptable document or a corrupted document."), } def clean(self, data, initial=None): super(DocField, self).clean(initial or data) #before save check if the writing sample is valid import os, re from django.forms.util import ValidationError match = r'PDF document|Microsoft Office Document|Zip archive data' if hasattr(data, 'temporary_file_path'): file = data.temporary_file_path() else: # throw an error because uploaded file in memory raise UploadedFileInMemoryError('The file uploaded is stored in memory instead of disk and the validation cannot be performed.') out = os.popen('file %s' % file) ck = re.search(match, out.read()) if ck == None: raise ValidationError(self.error_messages['not_doc']) # check further for docx file as it's zip file if ck.group(0)[0] == 'Z': import zipfile docx = 'word/document.xml' if not zipfile.is_zipfile(file): raise ValidationError(self.error_messages['not_doc']) zf = zipfile.ZipFile(file) if not docx in zf.namelist(): raise ValidationError(self.error_messages['not_doc']) return data