• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2"""
3Python Markdown
4
5A Python implementation of John Gruber's Markdown.
6
7Documentation: https://python-markdown.github.io/
8GitHub: https://github.com/Python-Markdown/markdown/
9PyPI: https://pypi.org/project/Markdown/
10
11Started by Manfred Stienstra (http://www.dwerg.net/).
12Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
13Currently maintained by Waylan Limberg (https://github.com/waylan),
14Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
15
16Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
17Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
18Copyright 2004 Manfred Stienstra (the original version)
19
20License: BSD (see LICENSE.md for details).
21"""
22
23from markdown.test_tools import TestCase
24import markdown
25
26
27class TestHTMLBlocks(TestCase):
28
29    def test_raw_paragraph(self):
30        self.assertMarkdownRenders(
31            '<p>A raw paragraph.</p>',
32            '<p>A raw paragraph.</p>'
33        )
34
35    def test_raw_skip_inline_markdown(self):
36        self.assertMarkdownRenders(
37            '<p>A *raw* paragraph.</p>',
38            '<p>A *raw* paragraph.</p>'
39        )
40
41    def test_raw_indent_one_space(self):
42        self.assertMarkdownRenders(
43            ' <p>A *raw* paragraph.</p>',
44            '<p>A *raw* paragraph.</p>'
45        )
46
47    def test_raw_indent_two_spaces(self):
48        self.assertMarkdownRenders(
49            '  <p>A *raw* paragraph.</p>',
50            '<p>A *raw* paragraph.</p>'
51        )
52
53    def test_raw_indent_three_spaces(self):
54        self.assertMarkdownRenders(
55            '   <p>A *raw* paragraph.</p>',
56            '<p>A *raw* paragraph.</p>'
57        )
58
59    def test_raw_indent_four_spaces(self):
60        self.assertMarkdownRenders(
61            '    <p>code block</p>',
62            self.dedent(
63                """
64                <pre><code>&lt;p&gt;code block&lt;/p&gt;
65                </code></pre>
66                """
67            )
68        )
69
70    def test_raw_span(self):
71        self.assertMarkdownRenders(
72            '<span>*inline*</span>',
73            '<p><span><em>inline</em></span></p>'
74        )
75
76    def test_code_span(self):
77        self.assertMarkdownRenders(
78            '`<p>code span</p>`',
79            '<p><code>&lt;p&gt;code span&lt;/p&gt;</code></p>'
80        )
81
82    def test_code_span_open_gt(self):
83        self.assertMarkdownRenders(
84            '*bar* `<` *foo*',
85            '<p><em>bar</em> <code>&lt;</code> <em>foo</em></p>'
86        )
87
88    def test_raw_empty(self):
89        self.assertMarkdownRenders(
90            '<p></p>',
91            '<p></p>'
92        )
93
94    def test_raw_empty_space(self):
95        self.assertMarkdownRenders(
96            '<p> </p>',
97            '<p> </p>'
98        )
99
100    def test_raw_empty_newline(self):
101        self.assertMarkdownRenders(
102            '<p>\n</p>',
103            '<p>\n</p>'
104        )
105
106    def test_raw_empty_blank_line(self):
107        self.assertMarkdownRenders(
108            '<p>\n\n</p>',
109            '<p>\n\n</p>'
110        )
111
112    def test_raw_uppercase(self):
113        self.assertMarkdownRenders(
114            '<DIV>*foo*</DIV>',
115            '<DIV>*foo*</DIV>'
116        )
117
118    def test_raw_uppercase_multiline(self):
119        self.assertMarkdownRenders(
120            self.dedent(
121                """
122                <DIV>
123                *foo*
124                </DIV>
125                """
126            ),
127            self.dedent(
128                """
129                <DIV>
130                *foo*
131                </DIV>
132                """
133            )
134        )
135
136    def test_multiple_raw_single_line(self):
137        self.assertMarkdownRenders(
138            '<p>*foo*</p><div>*bar*</div>',
139            self.dedent(
140                """
141                <p>*foo*</p>
142                <div>*bar*</div>
143                """
144            )
145        )
146
147    def test_multiple_raw_single_line_with_pi(self):
148        self.assertMarkdownRenders(
149            "<p>*foo*</p><?php echo '>'; ?>",
150            self.dedent(
151                """
152                <p>*foo*</p>
153                <?php echo '>'; ?>
154                """
155            )
156        )
157
158    def test_multiline_raw(self):
159        self.assertMarkdownRenders(
160            self.dedent(
161                """
162                <p>
163                    A raw paragraph
164                    with multiple lines.
165                </p>
166                """
167            ),
168            self.dedent(
169                """
170                <p>
171                    A raw paragraph
172                    with multiple lines.
173                </p>
174                """
175            )
176        )
177
178    def test_blank_lines_in_raw(self):
179        self.assertMarkdownRenders(
180            self.dedent(
181                """
182                <p>
183
184                    A raw paragraph...
185
186                    with many blank lines.
187
188                </p>
189                """
190            ),
191            self.dedent(
192                """
193                <p>
194
195                    A raw paragraph...
196
197                    with many blank lines.
198
199                </p>
200                """
201            )
202        )
203
204    def test_raw_surrounded_by_Markdown(self):
205        self.assertMarkdownRenders(
206            self.dedent(
207                """
208                Some *Markdown* text.
209
210                <p>*Raw* HTML.</p>
211
212                More *Markdown* text.
213                """
214            ),
215            self.dedent(
216                """
217                <p>Some <em>Markdown</em> text.</p>
218                <p>*Raw* HTML.</p>
219
220                <p>More <em>Markdown</em> text.</p>
221                """
222            )
223        )
224
225    def test_raw_surrounded_by_text_without_blank_lines(self):
226        self.assertMarkdownRenders(
227            self.dedent(
228                """
229                Some *Markdown* text.
230                <p>*Raw* HTML.</p>
231                More *Markdown* text.
232                """
233            ),
234            self.dedent(
235                """
236                <p>Some <em>Markdown</em> text.</p>
237                <p>*Raw* HTML.</p>
238                <p>More <em>Markdown</em> text.</p>
239                """
240            )
241        )
242
243    def test_multiline_markdown_with_code_span(self):
244        self.assertMarkdownRenders(
245            self.dedent(
246                """
247                A paragraph with a block-level
248                `<p>code span</p>`, which is
249                at the start of a line.
250                """
251            ),
252            self.dedent(
253                """
254                <p>A paragraph with a block-level
255                <code>&lt;p&gt;code span&lt;/p&gt;</code>, which is
256                at the start of a line.</p>
257                """
258            )
259        )
260
261    def test_raw_block_preceded_by_markdown_code_span_with_unclosed_block_tag(self):
262        self.assertMarkdownRenders(
263            self.dedent(
264                """
265                A paragraph with a block-level code span: `<div>`.
266
267                <p>*not markdown*</p>
268
269                This is *markdown*
270                """
271            ),
272            self.dedent(
273                """
274                <p>A paragraph with a block-level code span: <code>&lt;div&gt;</code>.</p>
275                <p>*not markdown*</p>
276
277                <p>This is <em>markdown</em></p>
278                """
279            )
280        )
281
282    def test_raw_one_line_followed_by_text(self):
283        self.assertMarkdownRenders(
284            '<p>*foo*</p>*bar*',
285            self.dedent(
286                """
287                <p>*foo*</p>
288                <p><em>bar</em></p>
289                """
290            )
291        )
292
293    def test_raw_one_line_followed_by_span(self):
294        self.assertMarkdownRenders(
295            "<p>*foo*</p><span>*bar*</span>",
296            self.dedent(
297                """
298                <p>*foo*</p>
299                <p><span><em>bar</em></span></p>
300                """
301            )
302        )
303
304    def test_raw_with_markdown_blocks(self):
305        self.assertMarkdownRenders(
306            self.dedent(
307                """
308                <div>
309                    Not a Markdown paragraph.
310
311                    * Not a list item.
312                    * Another non-list item.
313
314                    Another non-Markdown paragraph.
315                </div>
316                """
317            ),
318            self.dedent(
319                """
320                <div>
321                    Not a Markdown paragraph.
322
323                    * Not a list item.
324                    * Another non-list item.
325
326                    Another non-Markdown paragraph.
327                </div>
328                """
329            )
330        )
331
332    def test_adjacent_raw_blocks(self):
333        self.assertMarkdownRenders(
334            self.dedent(
335                """
336                <p>A raw paragraph.</p>
337                <p>A second raw paragraph.</p>
338                """
339            ),
340            self.dedent(
341                """
342                <p>A raw paragraph.</p>
343                <p>A second raw paragraph.</p>
344                """
345            )
346        )
347
348    def test_adjacent_raw_blocks_with_blank_lines(self):
349        self.assertMarkdownRenders(
350            self.dedent(
351                """
352                <p>A raw paragraph.</p>
353
354                <p>A second raw paragraph.</p>
355                """
356            ),
357            self.dedent(
358                """
359                <p>A raw paragraph.</p>
360
361                <p>A second raw paragraph.</p>
362                """
363            )
364        )
365
366    def test_nested_raw_one_line(self):
367        self.assertMarkdownRenders(
368            '<div><p>*foo*</p></div>',
369            '<div><p>*foo*</p></div>'
370        )
371
372    def test_nested_raw_block(self):
373        self.assertMarkdownRenders(
374            self.dedent(
375                """
376                <div>
377                <p>A raw paragraph.</p>
378                </div>
379                """
380            ),
381            self.dedent(
382                """
383                <div>
384                <p>A raw paragraph.</p>
385                </div>
386                """
387            )
388        )
389
390    def test_nested_indented_raw_block(self):
391        self.assertMarkdownRenders(
392            self.dedent(
393                """
394                <div>
395                    <p>A raw paragraph.</p>
396                </div>
397                """
398            ),
399            self.dedent(
400                """
401                <div>
402                    <p>A raw paragraph.</p>
403                </div>
404                """
405            )
406        )
407
408    def test_nested_raw_blocks(self):
409        self.assertMarkdownRenders(
410            self.dedent(
411                """
412                <div>
413                <p>A raw paragraph.</p>
414                <p>A second raw paragraph.</p>
415                </div>
416                """
417            ),
418            self.dedent(
419                """
420                <div>
421                <p>A raw paragraph.</p>
422                <p>A second raw paragraph.</p>
423                </div>
424                """
425            )
426        )
427
428    def test_nested_raw_blocks_with_blank_lines(self):
429        self.assertMarkdownRenders(
430            self.dedent(
431                """
432                <div>
433
434                <p>A raw paragraph.</p>
435
436                <p>A second raw paragraph.</p>
437
438                </div>
439                """
440            ),
441            self.dedent(
442                """
443                <div>
444
445                <p>A raw paragraph.</p>
446
447                <p>A second raw paragraph.</p>
448
449                </div>
450                """
451            )
452        )
453
454    def test_nested_inline_one_line(self):
455        self.assertMarkdownRenders(
456            '<p><em>foo</em><br></p>',
457            '<p><em>foo</em><br></p>'
458        )
459
460    def test_raw_nested_inline(self):
461        self.assertMarkdownRenders(
462            self.dedent(
463                """
464                <div>
465                    <p>
466                        <span>*text*</span>
467                    </p>
468                </div>
469                """
470            ),
471            self.dedent(
472                """
473                <div>
474                    <p>
475                        <span>*text*</span>
476                    </p>
477                </div>
478                """
479            )
480        )
481
482    def test_raw_nested_inline_with_blank_lines(self):
483        self.assertMarkdownRenders(
484            self.dedent(
485                """
486                <div>
487
488                    <p>
489
490                        <span>*text*</span>
491
492                    </p>
493
494                </div>
495                """
496            ),
497            self.dedent(
498                """
499                <div>
500
501                    <p>
502
503                        <span>*text*</span>
504
505                    </p>
506
507                </div>
508                """
509            )
510        )
511
512    def test_raw_html5(self):
513        self.assertMarkdownRenders(
514            self.dedent(
515                """
516                <section>
517                    <header>
518                        <hgroup>
519                            <h1>Hello :-)</h1>
520                        </hgroup>
521                    </header>
522                    <figure>
523                        <img src="image.png" alt="" />
524                        <figcaption>Caption</figcaption>
525                    </figure>
526                    <footer>
527                        <p>Some footer</p>
528                    </footer>
529                </section>
530                """
531            ),
532            self.dedent(
533                """
534                <section>
535                    <header>
536                        <hgroup>
537                            <h1>Hello :-)</h1>
538                        </hgroup>
539                    </header>
540                    <figure>
541                        <img src="image.png" alt="" />
542                        <figcaption>Caption</figcaption>
543                    </figure>
544                    <footer>
545                        <p>Some footer</p>
546                    </footer>
547                </section>
548                """
549            )
550        )
551
552    def test_raw_pre_tag(self):
553        self.assertMarkdownRenders(
554            self.dedent(
555                """
556                Preserve whitespace in raw html
557
558                <pre>
559                class Foo():
560                    bar = 'bar'
561
562                    @property
563                    def baz(self):
564                        return self.bar
565                </pre>
566                """
567            ),
568            self.dedent(
569                """
570                <p>Preserve whitespace in raw html</p>
571                <pre>
572                class Foo():
573                    bar = 'bar'
574
575                    @property
576                    def baz(self):
577                        return self.bar
578                </pre>
579                """
580            )
581        )
582
583    def test_raw_pre_tag_nested_escaped_html(self):
584        self.assertMarkdownRenders(
585            self.dedent(
586                """
587                <pre>
588                &lt;p&gt;foo&lt;/p&gt;
589                </pre>
590                """
591            ),
592            self.dedent(
593                """
594                <pre>
595                &lt;p&gt;foo&lt;/p&gt;
596                </pre>
597                """
598            )
599        )
600
601    def test_raw_p_no_end_tag(self):
602        self.assertMarkdownRenders(
603            '<p>*text*',
604            '<p>*text*'
605        )
606
607    def test_raw_multiple_p_no_end_tag(self):
608        self.assertMarkdownRenders(
609            self.dedent(
610                """
611                <p>*text*'
612
613                <p>more *text*
614                """
615            ),
616            self.dedent(
617                """
618                <p>*text*'
619
620                <p>more *text*
621                """
622            )
623        )
624
625    def test_raw_p_no_end_tag_followed_by_blank_line(self):
626        self.assertMarkdownRenders(
627            self.dedent(
628                """
629                <p>*raw text*'
630
631                Still part of *raw* text.
632                """
633            ),
634            self.dedent(
635                """
636                <p>*raw text*'
637
638                Still part of *raw* text.
639                """
640            )
641        )
642
643    def test_raw_nested_p_no_end_tag(self):
644        self.assertMarkdownRenders(
645            '<div><p>*text*</div>',
646            '<div><p>*text*</div>'
647        )
648
649    def test_raw_open_bracket_only(self):
650        self.assertMarkdownRenders(
651            '<',
652            '<p>&lt;</p>'
653        )
654
655    def test_raw_open_bracket_followed_by_space(self):
656        self.assertMarkdownRenders(
657            '< foo',
658            '<p>&lt; foo</p>'
659        )
660
661    def test_raw_missing_close_bracket(self):
662        self.assertMarkdownRenders(
663            '<foo',
664            '<p>&lt;foo</p>'
665        )
666
667    def test_raw_unclosed_tag_in_code_span(self):
668        self.assertMarkdownRenders(
669            self.dedent(
670                """
671                `<div`.
672
673                <div>
674                hello
675                </div>
676                """
677            ),
678            self.dedent(
679                """
680                <p><code>&lt;div</code>.</p>
681                <div>
682                hello
683                </div>
684                """
685            )
686        )
687
688    def test_raw_unclosed_tag_in_code_span_space(self):
689        self.assertMarkdownRenders(
690            self.dedent(
691                """
692                ` <div `.
693
694                <div>
695                hello
696                </div>
697                """
698            ),
699            self.dedent(
700                """
701                <p><code>&lt;div</code>.</p>
702                <div>
703                hello
704                </div>
705                """
706            )
707        )
708
709    def test_raw_attributes(self):
710        self.assertMarkdownRenders(
711            '<p id="foo", class="bar baz", style="margin: 15px; line-height: 1.5; text-align: center;">text</p>',
712            '<p id="foo", class="bar baz", style="margin: 15px; line-height: 1.5; text-align: center;">text</p>'
713        )
714
715    def test_raw_attributes_nested(self):
716        self.assertMarkdownRenders(
717            self.dedent(
718                """
719                <div id="foo, class="bar", style="background: #ffe7e8; border: 2px solid #e66465;">
720                    <p id="baz", style="margin: 15px; line-height: 1.5; text-align: center;">
721                        <img scr="../foo.jpg" title="with 'quoted' text." valueless_attr weirdness="<i>foo</i>" />
722                    </p>
723                </div>
724                """
725            ),
726            self.dedent(
727                """
728                <div id="foo, class="bar", style="background: #ffe7e8; border: 2px solid #e66465;">
729                    <p id="baz", style="margin: 15px; line-height: 1.5; text-align: center;">
730                        <img scr="../foo.jpg" title="with 'quoted' text." valueless_attr weirdness="<i>foo</i>" />
731                    </p>
732                </div>
733                """
734            )
735        )
736
737    def test_raw_comment_one_line(self):
738        self.assertMarkdownRenders(
739            '<!-- *foo* -->',
740            '<!-- *foo* -->'
741        )
742
743    def test_raw_comment_one_line_with_tag(self):
744        self.assertMarkdownRenders(
745            '<!-- <tag> -->',
746            '<!-- <tag> -->'
747        )
748
749    def test_comment_in_code_span(self):
750        self.assertMarkdownRenders(
751            '`<!-- *foo* -->`',
752            '<p><code>&lt;!-- *foo* --&gt;</code></p>'
753        )
754
755    def test_raw_comment_one_line_followed_by_text(self):
756        self.assertMarkdownRenders(
757            '<!-- *foo* -->*bar*',
758            self.dedent(
759                """
760                <!-- *foo* -->
761                <p><em>bar</em></p>
762                """
763            )
764        )
765
766    def test_raw_comment_one_line_followed_by_html(self):
767        self.assertMarkdownRenders(
768            '<!-- *foo* --><p>*bar*</p>',
769            self.dedent(
770                """
771                <!-- *foo* -->
772                <p>*bar*</p>
773                """
774            )
775        )
776
777    # Note: Trailing (insignificant) whitespace is not preserved, which does not match the
778    # reference implementation. However, it is not a change in behavior for Python-Markdown.
779    def test_raw_comment_trailing_whitespace(self):
780        self.assertMarkdownRenders(
781            '<!-- *foo* --> ',
782            '<!-- *foo* -->'
783        )
784
785    # Note: this is a change in behavior for Python-Markdown, which does *not* match the reference
786    # implementation. However, it does match the HTML5 spec. Declarations must start with either
787    # `<!DOCTYPE` or `<![`. Anything else that starts with `<!` is a comment. According to the
788    # HTML5 spec, a comment without the hyphens is a "bogus comment", but a comment nonetheless.
789    # See https://www.w3.org/TR/html52/syntax.html#markup-declaration-open-state.
790    # If we wanted to change this behavior, we could override `HTMLParser.parse_bogus_comment()`.
791    def test_bogus_comment(self):
792        self.assertMarkdownRenders(
793            '<!*foo*>',
794            '<!--*foo*-->'
795        )
796
797    def test_raw_multiline_comment(self):
798        self.assertMarkdownRenders(
799            self.dedent(
800                """
801                <!--
802                *foo*
803                -->
804                """
805            ),
806            self.dedent(
807                """
808                <!--
809                *foo*
810                -->
811                """
812            )
813        )
814
815    def test_raw_multiline_comment_with_tag(self):
816        self.assertMarkdownRenders(
817            self.dedent(
818                """
819                <!--
820                <tag>
821                -->
822                """
823            ),
824            self.dedent(
825                """
826                <!--
827                <tag>
828                -->
829                """
830            )
831        )
832
833    def test_raw_multiline_comment_first_line(self):
834        self.assertMarkdownRenders(
835            self.dedent(
836                """
837                <!-- *foo*
838                -->
839                """
840            ),
841            self.dedent(
842                """
843                <!-- *foo*
844                -->
845                """
846            )
847        )
848
849    def test_raw_multiline_comment_last_line(self):
850        self.assertMarkdownRenders(
851            self.dedent(
852                """
853                <!--
854                *foo* -->
855                """
856            ),
857            self.dedent(
858                """
859                <!--
860                *foo* -->
861                """
862            )
863        )
864
865    def test_raw_comment_with_blank_lines(self):
866        self.assertMarkdownRenders(
867            self.dedent(
868                """
869                <!--
870
871                *foo*
872
873                -->
874                """
875            ),
876            self.dedent(
877                """
878                <!--
879
880                *foo*
881
882                -->
883                """
884            )
885        )
886
887    def test_raw_comment_with_blank_lines_with_tag(self):
888        self.assertMarkdownRenders(
889            self.dedent(
890                """
891                <!--
892
893                <tag>
894
895                -->
896                """
897            ),
898            self.dedent(
899                """
900                <!--
901
902                <tag>
903
904                -->
905                """
906            )
907        )
908
909    def test_raw_comment_with_blank_lines_first_line(self):
910        self.assertMarkdownRenders(
911            self.dedent(
912                """
913                <!-- *foo*
914
915                -->
916                """
917            ),
918            self.dedent(
919                """
920                <!-- *foo*
921
922                -->
923                """
924            )
925        )
926
927    def test_raw_comment_with_blank_lines_last_line(self):
928        self.assertMarkdownRenders(
929            self.dedent(
930                """
931                <!--
932
933                *foo* -->
934                """
935            ),
936            self.dedent(
937                """
938                <!--
939
940                *foo* -->
941                """
942            )
943        )
944
945    def test_raw_comment_indented(self):
946        self.assertMarkdownRenders(
947            self.dedent(
948                """
949                <!--
950
951                    *foo*
952
953                -->
954                """
955            ),
956            self.dedent(
957                """
958                <!--
959
960                    *foo*
961
962                -->
963                """
964            )
965        )
966
967    def test_raw_comment_indented_with_tag(self):
968        self.assertMarkdownRenders(
969            self.dedent(
970                """
971                <!--
972
973                    <tag>
974
975                -->
976                """
977            ),
978            self.dedent(
979                """
980                <!--
981
982                    <tag>
983
984                -->
985                """
986            )
987        )
988
989    def test_raw_comment_nested(self):
990        self.assertMarkdownRenders(
991            self.dedent(
992                """
993                <div>
994                <!-- *foo* -->
995                </div>
996                """
997            ),
998            self.dedent(
999                """
1000                <div>
1001                <!-- *foo* -->
1002                </div>
1003                """
1004            )
1005        )
1006
1007    def test_comment_in_code_block(self):
1008        self.assertMarkdownRenders(
1009            '    <!-- *foo* -->',
1010            self.dedent(
1011                """
1012                <pre><code>&lt;!-- *foo* --&gt;
1013                </code></pre>
1014                """
1015            )
1016        )
1017
1018    # Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
1019    # as browsers and all text after the opening comment tag was considered to be in a comment. However,
1020    # that did not match the reference implementation. The new behavior does.
1021    def test_unclosed_comment_(self):
1022        self.assertMarkdownRenders(
1023            self.dedent(
1024                """
1025                <!-- unclosed comment
1026
1027                *not* a comment
1028                """
1029            ),
1030            self.dedent(
1031                """
1032                <p>&lt;!-- unclosed comment</p>
1033                <p><em>not</em> a comment</p>
1034                """
1035            )
1036        )
1037
1038    def test_raw_processing_instruction_one_line(self):
1039        self.assertMarkdownRenders(
1040            "<?php echo '>'; ?>",
1041            "<?php echo '>'; ?>"
1042        )
1043
1044    # This is a change in behavior and does not match the reference implementation.
1045    # We have no way to determine if text is on the same line, so we get this. TODO: reevaluate!
1046    def test_raw_processing_instruction_one_line_followed_by_text(self):
1047        self.assertMarkdownRenders(
1048            "<?php echo '>'; ?>*bar*",
1049            self.dedent(
1050                """
1051                <?php echo '>'; ?>
1052                <p><em>bar</em></p>
1053                """
1054            )
1055        )
1056
1057    def test_raw_multiline_processing_instruction(self):
1058        self.assertMarkdownRenders(
1059            self.dedent(
1060                """
1061                <?php
1062                echo '>';
1063                ?>
1064                """
1065            ),
1066            self.dedent(
1067                """
1068                <?php
1069                echo '>';
1070                ?>
1071                """
1072            )
1073        )
1074
1075    def test_raw_processing_instruction_with_blank_lines(self):
1076        self.assertMarkdownRenders(
1077            self.dedent(
1078                """
1079                <?php
1080
1081                echo '>';
1082
1083                ?>
1084                """
1085            ),
1086            self.dedent(
1087                """
1088                <?php
1089
1090                echo '>';
1091
1092                ?>
1093                """
1094            )
1095        )
1096
1097    def test_raw_processing_instruction_indented(self):
1098        self.assertMarkdownRenders(
1099            self.dedent(
1100                """
1101                <?php
1102
1103                    echo '>';
1104
1105                ?>
1106                """
1107            ),
1108            self.dedent(
1109                """
1110                <?php
1111
1112                    echo '>';
1113
1114                ?>
1115                """
1116            )
1117        )
1118
1119    def test_raw_processing_instruction_code_span(self):
1120        self.assertMarkdownRenders(
1121            self.dedent(
1122                """
1123                `<?php`
1124
1125                <div>
1126                foo
1127                </div>
1128                """
1129            ),
1130            self.dedent(
1131                """
1132                <p><code>&lt;?php</code></p>
1133                <div>
1134                foo
1135                </div>
1136                """
1137            )
1138        )
1139
1140    def test_raw_declaration_one_line(self):
1141        self.assertMarkdownRenders(
1142            '<!DOCTYPE html>',
1143            '<!DOCTYPE html>'
1144        )
1145
1146    # This is a change in behavior and does not match the reference implementation.
1147    # We have no way to determine if text is on the same line, so we get this. TODO: reevaluate!
1148    def test_raw_declaration_one_line_followed_by_text(self):
1149        self.assertMarkdownRenders(
1150            '<!DOCTYPE html>*bar*',
1151            self.dedent(
1152                """
1153                <!DOCTYPE html>
1154                <p><em>bar</em></p>
1155                """
1156            )
1157        )
1158
1159    def test_raw_multiline_declaration(self):
1160        self.assertMarkdownRenders(
1161            self.dedent(
1162                """
1163                <!DOCTYPE html PUBLIC
1164                  "-//W3C//DTD XHTML 1.1//EN"
1165                  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
1166                """
1167            ),
1168            self.dedent(
1169                """
1170                <!DOCTYPE html PUBLIC
1171                  "-//W3C//DTD XHTML 1.1//EN"
1172                  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
1173                """
1174            )
1175        )
1176
1177    def test_raw_declaration_code_span(self):
1178        self.assertMarkdownRenders(
1179            self.dedent(
1180                """
1181                `<!`
1182
1183                <div>
1184                foo
1185                </div>
1186                """
1187            ),
1188            self.dedent(
1189                """
1190                <p><code>&lt;!</code></p>
1191                <div>
1192                foo
1193                </div>
1194                """
1195            )
1196        )
1197
1198    def test_raw_cdata_one_line(self):
1199        self.assertMarkdownRenders(
1200            '<![CDATA[ document.write(">"); ]]>',
1201            '<![CDATA[ document.write(">"); ]]>'
1202        )
1203
1204    # Note: this is a change. Neither previous output nor this match reference implementation.
1205    def test_raw_cdata_one_line_followed_by_text(self):
1206        self.assertMarkdownRenders(
1207            '<![CDATA[ document.write(">"); ]]>*bar*',
1208            self.dedent(
1209                """
1210                <![CDATA[ document.write(">"); ]]>
1211                <p><em>bar</em></p>
1212                """
1213            )
1214        )
1215
1216    def test_raw_multiline_cdata(self):
1217        self.assertMarkdownRenders(
1218            self.dedent(
1219                """
1220                <![CDATA[
1221                document.write(">");
1222                ]]>
1223                """
1224            ),
1225            self.dedent(
1226                """
1227                <![CDATA[
1228                document.write(">");
1229                ]]>
1230                """
1231            )
1232        )
1233
1234    def test_raw_cdata_with_blank_lines(self):
1235        self.assertMarkdownRenders(
1236            self.dedent(
1237                """
1238                <![CDATA[
1239
1240                document.write(">");
1241
1242                ]]>
1243                """
1244            ),
1245            self.dedent(
1246                """
1247                <![CDATA[
1248
1249                document.write(">");
1250
1251                ]]>
1252                """
1253            )
1254        )
1255
1256    def test_raw_cdata_indented(self):
1257        self.assertMarkdownRenders(
1258            self.dedent(
1259                """
1260                <![CDATA[
1261
1262                    document.write(">");
1263
1264                ]]>
1265                """
1266            ),
1267            self.dedent(
1268                """
1269                <![CDATA[
1270
1271                    document.write(">");
1272
1273                ]]>
1274                """
1275            )
1276        )
1277
1278    def test_raw_cdata_code_span(self):
1279        self.assertMarkdownRenders(
1280            self.dedent(
1281                """
1282                `<![`
1283
1284                <div>
1285                foo
1286                </div>
1287                """
1288            ),
1289            self.dedent(
1290                """
1291                <p><code>&lt;![</code></p>
1292                <div>
1293                foo
1294                </div>
1295                """
1296            )
1297        )
1298
1299    def test_charref(self):
1300        self.assertMarkdownRenders(
1301            '&sect;',
1302            '<p>&sect;</p>'
1303        )
1304
1305    def test_nested_charref(self):
1306        self.assertMarkdownRenders(
1307            '<p>&sect;</p>',
1308            '<p>&sect;</p>'
1309        )
1310
1311    def test_entityref(self):
1312        self.assertMarkdownRenders(
1313            '&#167;',
1314            '<p>&#167;</p>'
1315        )
1316
1317    def test_nested_entityref(self):
1318        self.assertMarkdownRenders(
1319            '<p>&#167;</p>',
1320            '<p>&#167;</p>'
1321        )
1322
1323    def test_amperstand(self):
1324        self.assertMarkdownRenders(
1325            'AT&T & AT&amp;T',
1326            '<p>AT&amp;T &amp; AT&amp;T</p>'
1327        )
1328
1329    def test_startendtag(self):
1330        self.assertMarkdownRenders(
1331            '<hr>',
1332            '<hr>'
1333        )
1334
1335    def test_startendtag_with_attrs(self):
1336        self.assertMarkdownRenders(
1337            '<hr id="foo" class="bar">',
1338            '<hr id="foo" class="bar">'
1339        )
1340
1341    def test_startendtag_with_space(self):
1342        self.assertMarkdownRenders(
1343            '<hr >',
1344            '<hr >'
1345        )
1346
1347    def test_closed_startendtag(self):
1348        self.assertMarkdownRenders(
1349            '<hr />',
1350            '<hr />'
1351        )
1352
1353    def test_closed_startendtag_without_space(self):
1354        self.assertMarkdownRenders(
1355            '<hr/>',
1356            '<hr/>'
1357        )
1358
1359    def test_closed_startendtag_with_attrs(self):
1360        self.assertMarkdownRenders(
1361            '<hr id="foo" class="bar" />',
1362            '<hr id="foo" class="bar" />'
1363        )
1364
1365    def test_nested_startendtag(self):
1366        self.assertMarkdownRenders(
1367            '<div><hr></div>',
1368            '<div><hr></div>'
1369        )
1370
1371    def test_nested_closed_startendtag(self):
1372        self.assertMarkdownRenders(
1373            '<div><hr /></div>',
1374            '<div><hr /></div>'
1375        )
1376
1377    def test_auto_links_dont_break_parser(self):
1378        self.assertMarkdownRenders(
1379            self.dedent(
1380                """
1381                <https://example.com>
1382
1383                <email@example.com>
1384                """
1385            ),
1386            '<p><a href="https://example.com">https://example.com</a></p>\n'
1387            '<p><a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#101;&#109;'
1388            '&#97;&#105;&#108;&#64;&#101;&#120;&#97;&#109;&#112;&#108;&#101;'
1389            '&#46;&#99;&#111;&#109;">&#101;&#109;&#97;&#105;&#108;&#64;&#101;'
1390            '&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;</a></p>'
1391        )
1392
1393    def test_text_links_ignored(self):
1394        self.assertMarkdownRenders(
1395            self.dedent(
1396                """
1397                https://example.com
1398
1399                email@example.com
1400                """
1401            ),
1402            self.dedent(
1403                """
1404                <p>https://example.com</p>
1405                <p>email@example.com</p>
1406                """
1407            ),
1408        )
1409
1410    def text_invalid_tags(self):
1411        self.assertMarkdownRenders(
1412            self.dedent(
1413                """
1414                <some [weird](http://example.com) stuff>
1415
1416                <some>> <<unbalanced>> <<brackets>
1417                """
1418            ),
1419            self.dedent(
1420                """
1421                <p><some <a href="http://example.com">weird</a> stuff></p>
1422                <p><some>&gt; &lt;<unbalanced>&gt; &lt;<brackets></p>
1423                """
1424            )
1425        )
1426
1427    def test_script_tags(self):
1428        self.assertMarkdownRenders(
1429            self.dedent(
1430                """
1431                <script>
1432                *random stuff* <div> &amp;
1433                </script>
1434
1435                <style>
1436                **more stuff**
1437                </style>
1438                """
1439            ),
1440            self.dedent(
1441                """
1442                <script>
1443                *random stuff* <div> &amp;
1444                </script>
1445
1446                <style>
1447                **more stuff**
1448                </style>
1449                """
1450            )
1451        )
1452
1453    def test_unclosed_script_tag(self):
1454        # Ensure we have a working fix for https://bugs.python.org/issue41989
1455        self.assertMarkdownRenders(
1456            self.dedent(
1457                """
1458                <script>
1459                *random stuff* <div> &amp;
1460
1461                Still part of the *script* tag
1462                """
1463            ),
1464            self.dedent(
1465                """
1466                <script>
1467                *random stuff* <div> &amp;
1468
1469                Still part of the *script* tag
1470                """
1471            )
1472        )
1473
1474    def test_inline_script_tags(self):
1475        # Ensure inline script tags doesn't cause the parser to eat content (see #1036).
1476        self.assertMarkdownRenders(
1477            self.dedent(
1478                """
1479                Text `<script>` more *text*.
1480
1481                <div>
1482                *foo*
1483                </div>
1484
1485                <div>
1486
1487                bar
1488
1489                </div>
1490
1491                A new paragraph with a closing `</script>` tag.
1492                """
1493            ),
1494            self.dedent(
1495                """
1496                <p>Text <code>&lt;script&gt;</code> more <em>text</em>.</p>
1497                <div>
1498                *foo*
1499                </div>
1500
1501                <div>
1502
1503                bar
1504
1505                </div>
1506
1507                <p>A new paragraph with a closing <code>&lt;/script&gt;</code> tag.</p>
1508                """
1509            )
1510        )
1511
1512    def test_hr_only_start(self):
1513        self.assertMarkdownRenders(
1514            self.dedent(
1515                """
1516                *emphasis1*
1517                <hr>
1518                *emphasis2*
1519                """
1520            ),
1521            self.dedent(
1522                """
1523                <p><em>emphasis1</em></p>
1524                <hr>
1525                <p><em>emphasis2</em></p>
1526                """
1527            )
1528        )
1529
1530    def test_hr_self_close(self):
1531        self.assertMarkdownRenders(
1532            self.dedent(
1533                """
1534                *emphasis1*
1535                <hr/>
1536                *emphasis2*
1537                """
1538            ),
1539            self.dedent(
1540                """
1541                <p><em>emphasis1</em></p>
1542                <hr/>
1543                <p><em>emphasis2</em></p>
1544                """
1545            )
1546        )
1547
1548    def test_hr_start_and_end(self):
1549        # Browsers ignore ending hr tags, so we don't try to do anything to handle them special.
1550        self.assertMarkdownRenders(
1551            self.dedent(
1552                """
1553                *emphasis1*
1554                <hr></hr>
1555                *emphasis2*
1556                """
1557            ),
1558            self.dedent(
1559                """
1560                <p><em>emphasis1</em></p>
1561                <hr>
1562                <p></hr>
1563                <em>emphasis2</em></p>
1564                """
1565            )
1566        )
1567
1568    def test_hr_only_end(self):
1569        # Browsers ignore ending hr tags, so we don't try to do anything to handle them special.
1570        self.assertMarkdownRenders(
1571            self.dedent(
1572                """
1573                *emphasis1*
1574                </hr>
1575                *emphasis2*
1576                """
1577            ),
1578            self.dedent(
1579                """
1580                <p><em>emphasis1</em>
1581                </hr>
1582                <em>emphasis2</em></p>
1583                """
1584            )
1585        )
1586
1587    def test_hr_with_content(self):
1588        # Browsers ignore ending hr tags, so we don't try to do anything to handle them special.
1589        # Content is not allowed and will be treated as normal content between two hr tags.
1590        self.assertMarkdownRenders(
1591            self.dedent(
1592                """
1593                *emphasis1*
1594                <hr>
1595                **content**
1596                </hr>
1597                *emphasis2*
1598                """
1599            ),
1600            self.dedent(
1601                """
1602                <p><em>emphasis1</em></p>
1603                <hr>
1604                <p><strong>content</strong>
1605                </hr>
1606                <em>emphasis2</em></p>
1607                """
1608            )
1609        )
1610
1611    def test_placeholder_in_source(self):
1612        # This should never occur, but third party extensions could create weird edge cases.
1613        md = markdown.Markdown()
1614        # Ensure there is an htmlstash so relevant code (nested in `if replacements`) is run.
1615        md.htmlStash.store('foo')
1616        # Run with a placeholder which is not in the stash
1617        placeholder = md.htmlStash.get_placeholder(md.htmlStash.html_counter + 1)
1618        result = md.postprocessors['raw_html'].run(placeholder)
1619        self.assertEqual(placeholder, result)
1620