1# Copyright (c) 2006,2007 Mitch Garnaat http://garnaat.org/ 2# 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the 5# "Software"), to deal in the Software without restriction, including 6# without limitation the rights to use, copy, modify, merge, publish, dis- 7# tribute, sublicense, and/or sell copies of the Software, and to permit 8# persons to whom the Software is furnished to do so, subject to the fol- 9# lowing conditions: 10# 11# The above copyright notice and this permission notice shall be included 12# in all copies or substantial portions of the Software. 13# 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 16# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 17# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22import xml.sax.saxutils 23 24class Question(object): 25 template = "<Question>%(items)s</Question>" 26 27 def __init__(self, identifier, content, answer_spec, 28 is_required=False, display_name=None): 29 # copy all of the parameters into object attributes 30 self.__dict__.update(vars()) 31 del self.self 32 33 def get_as_params(self, label='Question'): 34 return {label: self.get_as_xml()} 35 36 def get_as_xml(self): 37 items = [ 38 SimpleField('QuestionIdentifier', self.identifier), 39 SimpleField('IsRequired', str(self.is_required).lower()), 40 self.content, 41 self.answer_spec, 42 ] 43 if self.display_name is not None: 44 items.insert(1, SimpleField('DisplayName', self.display_name)) 45 items = ''.join(item.get_as_xml() for item in items) 46 return self.template % vars() 47 48try: 49 from lxml import etree 50 51 class ValidatingXML(object): 52 53 def validate(self): 54 import urllib2 55 schema_src_file = urllib2.urlopen(self.schema_url) 56 schema_doc = etree.parse(schema_src_file) 57 schema = etree.XMLSchema(schema_doc) 58 doc = etree.fromstring(self.get_as_xml()) 59 schema.assertValid(doc) 60except ImportError: 61 class ValidatingXML(object): 62 63 def validate(self): 64 pass 65 66 67class ExternalQuestion(ValidatingXML): 68 """ 69 An object for constructing an External Question. 70 """ 71 schema_url = "http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd" 72 template = '<ExternalQuestion xmlns="%(schema_url)s"><ExternalURL>%%(external_url)s</ExternalURL><FrameHeight>%%(frame_height)s</FrameHeight></ExternalQuestion>' % vars() 73 74 def __init__(self, external_url, frame_height): 75 self.external_url = xml.sax.saxutils.escape( external_url ) 76 self.frame_height = frame_height 77 78 def get_as_params(self, label='ExternalQuestion'): 79 return {label: self.get_as_xml()} 80 81 def get_as_xml(self): 82 return self.template % vars(self) 83 84 85class XMLTemplate(object): 86 def get_as_xml(self): 87 return self.template % vars(self) 88 89 90class SimpleField(XMLTemplate): 91 """ 92 A Simple name/value pair that can be easily rendered as XML. 93 94 >>> SimpleField('Text', 'A text string').get_as_xml() 95 '<Text>A text string</Text>' 96 """ 97 template = '<%(field)s>%(value)s</%(field)s>' 98 99 def __init__(self, field, value): 100 self.field = field 101 self.value = value 102 103 104class Binary(XMLTemplate): 105 template = """<Binary><MimeType><Type>%(type)s</Type><SubType>%(subtype)s</SubType></MimeType><DataURL>%(url)s</DataURL><AltText>%(alt_text)s</AltText></Binary>""" 106 107 def __init__(self, type, subtype, url, alt_text): 108 self.__dict__.update(vars()) 109 del self.self 110 111 112class List(list): 113 """A bulleted list suitable for OrderedContent or Overview content""" 114 def get_as_xml(self): 115 items = ''.join('<ListItem>%s</ListItem>' % item for item in self) 116 return '<List>%s</List>' % items 117 118 119class Application(object): 120 template = "<Application><%(class_)s>%(content)s</%(class_)s></Application>" 121 parameter_template = "<Name>%(name)s</Name><Value>%(value)s</Value>" 122 123 def __init__(self, width, height, **parameters): 124 self.width = width 125 self.height = height 126 self.parameters = parameters 127 128 def get_inner_content(self, content): 129 content.append_field('Width', self.width) 130 content.append_field('Height', self.height) 131 for name, value in self.parameters.items(): 132 value = self.parameter_template % vars() 133 content.append_field('ApplicationParameter', value) 134 135 def get_as_xml(self): 136 content = OrderedContent() 137 self.get_inner_content(content) 138 content = content.get_as_xml() 139 class_ = self.__class__.__name__ 140 return self.template % vars() 141 142 143class HTMLQuestion(ValidatingXML): 144 schema_url = 'http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd' 145 template = '<HTMLQuestion xmlns=\"%(schema_url)s\"><HTMLContent><![CDATA[<!DOCTYPE html>%%(html_form)s]]></HTMLContent><FrameHeight>%%(frame_height)s</FrameHeight></HTMLQuestion>' % vars() 146 147 def __init__(self, html_form, frame_height): 148 self.html_form = html_form 149 self.frame_height = frame_height 150 151 def get_as_params(self, label="HTMLQuestion"): 152 return {label: self.get_as_xml()} 153 154 def get_as_xml(self): 155 return self.template % vars(self) 156 157 158class JavaApplet(Application): 159 def __init__(self, path, filename, *args, **kwargs): 160 self.path = path 161 self.filename = filename 162 super(JavaApplet, self).__init__(*args, **kwargs) 163 164 def get_inner_content(self, content): 165 content = OrderedContent() 166 content.append_field('AppletPath', self.path) 167 content.append_field('AppletFilename', self.filename) 168 super(JavaApplet, self).get_inner_content(content) 169 170 171class Flash(Application): 172 def __init__(self, url, *args, **kwargs): 173 self.url = url 174 super(Flash, self).__init__(*args, **kwargs) 175 176 def get_inner_content(self, content): 177 content = OrderedContent() 178 content.append_field('FlashMovieURL', self.url) 179 super(Flash, self).get_inner_content(content) 180 181 182class FormattedContent(XMLTemplate): 183 schema_url = 'http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/FormattedContentXHTMLSubset.xsd' 184 template = '<FormattedContent><![CDATA[%(content)s]]></FormattedContent>' 185 186 def __init__(self, content): 187 self.content = content 188 189 190class OrderedContent(list): 191 192 def append_field(self, field, value): 193 self.append(SimpleField(field, value)) 194 195 def get_as_xml(self): 196 return ''.join(item.get_as_xml() for item in self) 197 198 199class Overview(OrderedContent): 200 template = '<Overview>%(content)s</Overview>' 201 202 def get_as_params(self, label='Overview'): 203 return {label: self.get_as_xml()} 204 205 def get_as_xml(self): 206 content = super(Overview, self).get_as_xml() 207 return self.template % vars() 208 209 210class QuestionForm(ValidatingXML, list): 211 """ 212 From the AMT API docs: 213 214 The top-most element of the QuestionForm data structure is a 215 QuestionForm element. This element contains optional Overview 216 elements and one or more Question elements. There can be any 217 number of these two element types listed in any order. The 218 following example structure has an Overview element and a 219 Question element followed by a second Overview element and 220 Question element--all within the same QuestionForm. 221 222 :: 223 224 <QuestionForm xmlns="[the QuestionForm schema URL]"> 225 <Overview> 226 [...] 227 </Overview> 228 <Question> 229 [...] 230 </Question> 231 <Overview> 232 [...] 233 </Overview> 234 <Question> 235 [...] 236 </Question> 237 [...] 238 </QuestionForm> 239 240 QuestionForm is implemented as a list, so to construct a 241 QuestionForm, simply append Questions and Overviews (with at least 242 one Question). 243 """ 244 schema_url = "http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionForm.xsd" 245 xml_template = """<QuestionForm xmlns="%(schema_url)s">%%(items)s</QuestionForm>""" % vars() 246 247 def is_valid(self): 248 return ( 249 any(isinstance(item, Question) for item in self) 250 and 251 all(isinstance(item, (Question, Overview)) for item in self) 252 ) 253 254 def get_as_xml(self): 255 assert self.is_valid(), "QuestionForm contains invalid elements" 256 items = ''.join(item.get_as_xml() for item in self) 257 return self.xml_template % vars() 258 259 260class QuestionContent(OrderedContent): 261 template = '<QuestionContent>%(content)s</QuestionContent>' 262 263 def get_as_xml(self): 264 content = super(QuestionContent, self).get_as_xml() 265 return self.template % vars() 266 267 268class AnswerSpecification(object): 269 template = '<AnswerSpecification>%(spec)s</AnswerSpecification>' 270 271 def __init__(self, spec): 272 self.spec = spec 273 274 def get_as_xml(self): 275 spec = self.spec.get_as_xml() 276 return self.template % vars() 277 278 279class Constraints(OrderedContent): 280 template = '<Constraints>%(content)s</Constraints>' 281 282 def get_as_xml(self): 283 content = super(Constraints, self).get_as_xml() 284 return self.template % vars() 285 286 287class Constraint(object): 288 def get_attributes(self): 289 pairs = zip(self.attribute_names, self.attribute_values) 290 attrs = ' '.join( 291 '%s="%d"' % (name, value) 292 for (name, value) in pairs 293 if value is not None 294 ) 295 return attrs 296 297 def get_as_xml(self): 298 attrs = self.get_attributes() 299 return self.template % vars() 300 301 302class NumericConstraint(Constraint): 303 attribute_names = 'minValue', 'maxValue' 304 template = '<IsNumeric %(attrs)s />' 305 306 def __init__(self, min_value=None, max_value=None): 307 self.attribute_values = min_value, max_value 308 309 310class LengthConstraint(Constraint): 311 attribute_names = 'minLength', 'maxLength' 312 template = '<Length %(attrs)s />' 313 314 def __init__(self, min_length=None, max_length=None): 315 self.attribute_values = min_length, max_length 316 317 318class RegExConstraint(Constraint): 319 attribute_names = 'regex', 'errorText', 'flags' 320 template = '<AnswerFormatRegex %(attrs)s />' 321 322 def __init__(self, pattern, error_text=None, flags=None): 323 self.attribute_values = pattern, error_text, flags 324 325 def get_attributes(self): 326 pairs = zip(self.attribute_names, self.attribute_values) 327 attrs = ' '.join( 328 '%s="%s"' % (name, value) 329 for (name, value) in pairs 330 if value is not None 331 ) 332 return attrs 333 334 335class NumberOfLinesSuggestion(object): 336 template = '<NumberOfLinesSuggestion>%(num_lines)s</NumberOfLinesSuggestion>' 337 338 def __init__(self, num_lines=1): 339 self.num_lines = num_lines 340 341 def get_as_xml(self): 342 num_lines = self.num_lines 343 return self.template % vars() 344 345 346class FreeTextAnswer(object): 347 template = '<FreeTextAnswer>%(items)s</FreeTextAnswer>' 348 349 def __init__(self, default=None, constraints=None, num_lines=None): 350 self.default = default 351 if constraints is None: 352 self.constraints = Constraints() 353 else: 354 self.constraints = Constraints(constraints) 355 self.num_lines = num_lines 356 357 def get_as_xml(self): 358 items = [self.constraints] 359 if self.default: 360 items.append(SimpleField('DefaultText', self.default)) 361 if self.num_lines: 362 items.append(NumberOfLinesSuggestion(self.num_lines)) 363 items = ''.join(item.get_as_xml() for item in items) 364 return self.template % vars() 365 366 367class FileUploadAnswer(object): 368 template = """<FileUploadAnswer><MaxFileSizeInBytes>%(max_bytes)d</MaxFileSizeInBytes><MinFileSizeInBytes>%(min_bytes)d</MinFileSizeInBytes></FileUploadAnswer>""" 369 370 def __init__(self, min_bytes, max_bytes): 371 assert 0 <= min_bytes <= max_bytes <= 2 * 10 ** 9 372 self.min_bytes = min_bytes 373 self.max_bytes = max_bytes 374 375 def get_as_xml(self): 376 return self.template % vars(self) 377 378 379class SelectionAnswer(object): 380 """ 381 A class to generate SelectionAnswer XML data structures. 382 Does not yet implement Binary selection options. 383 """ 384 SELECTIONANSWER_XML_TEMPLATE = """<SelectionAnswer>%s%s<Selections>%s</Selections></SelectionAnswer>""" # % (count_xml, style_xml, selections_xml) 385 SELECTION_XML_TEMPLATE = """<Selection><SelectionIdentifier>%s</SelectionIdentifier>%s</Selection>""" # (identifier, value_xml) 386 SELECTION_VALUE_XML_TEMPLATE = """<%s>%s</%s>""" # (type, value, type) 387 STYLE_XML_TEMPLATE = """<StyleSuggestion>%s</StyleSuggestion>""" # (style) 388 MIN_SELECTION_COUNT_XML_TEMPLATE = """<MinSelectionCount>%s</MinSelectionCount>""" # count 389 MAX_SELECTION_COUNT_XML_TEMPLATE = """<MaxSelectionCount>%s</MaxSelectionCount>""" # count 390 ACCEPTED_STYLES = ['radiobutton', 'dropdown', 'checkbox', 'list', 'combobox', 'multichooser'] 391 OTHER_SELECTION_ELEMENT_NAME = 'OtherSelection' 392 393 def __init__(self, min=1, max=1, style=None, selections=None, type='text', other=False): 394 395 if style is not None: 396 if style in SelectionAnswer.ACCEPTED_STYLES: 397 self.style_suggestion = style 398 else: 399 raise ValueError("style '%s' not recognized; should be one of %s" % (style, ', '.join(SelectionAnswer.ACCEPTED_STYLES))) 400 else: 401 self.style_suggestion = None 402 403 if selections is None: 404 raise ValueError("SelectionAnswer.__init__(): selections must be a non-empty list of (content, identifier) tuples") 405 else: 406 self.selections = selections 407 408 self.min_selections = min 409 self.max_selections = max 410 411 assert len(selections) >= self.min_selections, "# of selections is less than minimum of %d" % self.min_selections 412 #assert len(selections) <= self.max_selections, "# of selections exceeds maximum of %d" % self.max_selections 413 414 self.type = type 415 416 self.other = other 417 418 def get_as_xml(self): 419 if self.type == 'text': 420 TYPE_TAG = "Text" 421 elif self.type == 'binary': 422 TYPE_TAG = "Binary" 423 else: 424 raise ValueError("illegal type: %s; must be either 'text' or 'binary'" % str(self.type)) 425 426 # build list of <Selection> elements 427 selections_xml = "" 428 for tpl in self.selections: 429 value_xml = SelectionAnswer.SELECTION_VALUE_XML_TEMPLATE % (TYPE_TAG, tpl[0], TYPE_TAG) 430 selection_xml = SelectionAnswer.SELECTION_XML_TEMPLATE % (tpl[1], value_xml) 431 selections_xml += selection_xml 432 433 if self.other: 434 # add OtherSelection element as xml if available 435 if hasattr(self.other, 'get_as_xml'): 436 assert isinstance(self.other, FreeTextAnswer), 'OtherSelection can only be a FreeTextAnswer' 437 selections_xml += self.other.get_as_xml().replace('FreeTextAnswer', 'OtherSelection') 438 else: 439 selections_xml += "<OtherSelection />" 440 441 if self.style_suggestion is not None: 442 style_xml = SelectionAnswer.STYLE_XML_TEMPLATE % self.style_suggestion 443 else: 444 style_xml = "" 445 446 if self.style_suggestion != 'radiobutton': 447 count_xml = SelectionAnswer.MIN_SELECTION_COUNT_XML_TEMPLATE %self.min_selections 448 count_xml += SelectionAnswer.MAX_SELECTION_COUNT_XML_TEMPLATE %self.max_selections 449 else: 450 count_xml = "" 451 452 ret = SelectionAnswer.SELECTIONANSWER_XML_TEMPLATE % (count_xml, style_xml, selections_xml) 453 454 # return XML 455 return ret 456