xref: /AOO41X/main/editeng/source/editeng/eehtml.cxx (revision 190118d08a3be86671f4129b3e9a490e144719cd)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_editeng.hxx"
26 
27 #include <vcl/wrkwin.hxx>
28 #include <vcl/dialog.hxx>
29 #include <vcl/msgbox.hxx>
30 #include <vcl/svapp.hxx>
31 #include <eehtml.hxx>
32 #include <impedit.hxx>
33 #include <editeng/adjitem.hxx>
34 #include <editeng/flditem.hxx>
35 #include <tools/urlobj.hxx>
36 #include <editeng/fhgtitem.hxx>
37 #include <editeng/fontitem.hxx>
38 #include <editeng/ulspitem.hxx>
39 #include <editeng/wghtitem.hxx>
40 #include <svtools/htmltokn.h>
41 #include <svtools/htmlkywd.hxx>
42 #include <tools/tenccvt.hxx>
43 
44 #define ACTION_INSERTTEXT         1
45 #define ACTION_INSERTPARABRK      2
46 
47 #define STYLE_PRE               101
48 
EditHTMLParser(SvStream & rIn,const String & rBaseURL,SvKeyValueIterator * pHTTPHeaderAttrs)49 EditHTMLParser::EditHTMLParser( SvStream& rIn, const String& rBaseURL, SvKeyValueIterator* pHTTPHeaderAttrs )
50     : HTMLParser( rIn, true )
51     , aBaseURL( rBaseURL )
52 {
53     pImpEditEngine = 0;
54     pCurAnchor = 0;
55     bInPara = sal_False;
56     bWasInPara = sal_False;
57     nInTable = 0;
58     nInCell = 0;
59     bInTitle = sal_False;
60     nDefListLevel = 0;
61     nBulletLevel = 0;
62     nNumberingLevel = 0;
63     bFieldsInserted = sal_False;
64 
65     DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ), "EditHTMLParser::EditHTMLParser: Where does the encoding come from?" );
66     DBG_ASSERT( !IsSwitchToUCS2(), "EditHTMLParser::::EditHTMLParser: Switch to UCS2?" );
67 
68     // Altough the real default encoding is ISO8859-1, we use MS-1252
69     // als default encoding.
70     SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  RTL_TEXTENCODING_ISO_8859_1 ) );
71 
72     // If the file starts with a BOM, switch to UCS2.
73     SetSwitchToUCS2( sal_True );
74 
75     if ( pHTTPHeaderAttrs )
76         SetEncodingByHTTPHeader( pHTTPHeaderAttrs );
77 }
78 
~EditHTMLParser()79 EditHTMLParser::~EditHTMLParser()
80 {
81     delete pCurAnchor;
82 }
83 
CallParser(ImpEditEngine * pImpEE,const EditPaM & rPaM)84 SvParserState EditHTMLParser::CallParser( ImpEditEngine* pImpEE, const EditPaM& rPaM )
85 {
86     DBG_ASSERT( pImpEE, "CallParser: ImpEditEngine ?!" );
87     pImpEditEngine = pImpEE;
88     SvParserState _eState = SVPAR_NOTSTARTED;
89     if ( pImpEditEngine )
90     {
91         // Umbrechmimik vom RTF-Import einbauen?
92         aCurSel = EditSelection( rPaM, rPaM );
93 
94         if ( pImpEditEngine->aImportHdl.IsSet() )
95         {
96             ImportInfo aImportInfo( HTMLIMP_START, this, pImpEditEngine->CreateESel( aCurSel ) );
97             pImpEditEngine->aImportHdl.Call( &aImportInfo );
98         }
99 
100         ImpSetStyleSheet( 0 );
101         _eState = HTMLParser::CallParser();
102 
103         if ( pImpEditEngine->aImportHdl.IsSet() )
104         {
105             ImportInfo aImportInfo( HTMLIMP_END, this, pImpEditEngine->CreateESel( aCurSel ) );
106             pImpEditEngine->aImportHdl.Call( &aImportInfo );
107         }
108 
109         if ( bFieldsInserted )
110             pImpEditEngine->UpdateFields();
111     }
112     return _eState;
113 }
114 
NextToken(int nToken)115 void EditHTMLParser::NextToken( int nToken )
116 {
117     #ifdef DBG_UTIL
118         HTML_TOKEN_IDS xID = (HTML_TOKEN_IDS)nToken;
119         (void)xID;
120     #endif
121 
122     switch( nToken )
123     {
124     case HTML_META:
125     {
126         const HTMLOptions *_pOptions = GetOptions();
127         sal_uInt16 nArrLen = _pOptions->Count();
128         sal_Bool bEquiv = sal_False;
129         for ( sal_uInt16 i = 0; i < nArrLen; i++ )
130         {
131             const HTMLOption *pOption = (*_pOptions)[i];
132             switch( pOption->GetToken() )
133             {
134                 case HTML_O_HTTPEQUIV:
135                 {
136                     bEquiv = sal_True;
137                 }
138                 break;
139                 case HTML_O_CONTENT:
140                 {
141                     if ( bEquiv )
142                     {
143                         rtl_TextEncoding eEnc = GetEncodingByMIME( pOption->GetString() );
144                         if ( eEnc != RTL_TEXTENCODING_DONTKNOW )
145                             SetSrcEncoding( eEnc );
146                     }
147                 }
148                 break;
149             }
150         }
151 
152     }
153     break;
154     case HTML_PLAINTEXT_ON:
155     case HTML_PLAINTEXT2_ON:
156         bInPara = sal_True;
157     break;
158     case HTML_PLAINTEXT_OFF:
159     case HTML_PLAINTEXT2_OFF:
160         bInPara = sal_False;
161     break;
162 
163     case HTML_LINEBREAK:
164     case HTML_NEWPARA:
165     {
166         if ( ( bInPara || nInTable ) &&
167             ( ( nToken == HTML_LINEBREAK ) || HasTextInCurrentPara() ) )
168         {
169             ImpInsertParaBreak();
170         }
171     }
172     break;
173     case HTML_HORZRULE:
174     {
175         if ( HasTextInCurrentPara() )
176             ImpInsertParaBreak();
177         ImpInsertParaBreak();
178     }
179     case HTML_NONBREAKSPACE:
180     {
181         if ( bInPara )
182         {
183             ImpInsertText( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ) );
184         }
185     }
186     break;
187     case HTML_TEXTTOKEN:
188     {
189         // #i110937# for <title> content, call aImportHdl (no SkipGroup), but don't insert the text into the EditEngine
190         if (!bInTitle)
191         {
192             if ( !bInPara )
193                 StartPara( sal_False );
194 
195             // if ( bInPara || pCurAnchor )
196 
197             String aText = aToken;
198             if ( aText.Len() && ( aText.GetChar( 0 ) == ' ' )
199                     && ThrowAwayBlank() && !IsReadPRE() )
200                 aText.Erase( 0, 1 );
201 
202             if ( pCurAnchor )
203             {
204                 pCurAnchor->aText += aText;
205             }
206             else
207             {
208                 // Nur bis HTML mit 319 geschrieben ?!
209                 if ( IsReadPRE() )
210                 {
211                     sal_uInt16 nTabPos = aText.Search( '\t', 0 );
212                     while ( nTabPos != STRING_NOTFOUND )
213                     {
214                         aText.Erase( nTabPos, 1 );
215                         aText.Insert( String( RTL_CONSTASCII_USTRINGPARAM( "        " ) ), nTabPos );
216                         nTabPos = aText.Search( '\t', nTabPos+8 );
217                     }
218                 }
219                 ImpInsertText( aText );
220             }
221         }
222     }
223     break;
224 
225     case HTML_CENTER_ON:
226     case HTML_CENTER_OFF:   // if ( bInPara )
227                             {
228                                 sal_uInt16 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
229                                 SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
230                                 aItems.ClearItem( EE_PARA_JUST );
231                                 if ( nToken == HTML_CENTER_ON )
232                                     aItems.Put( SvxAdjustItem( SVX_ADJUST_CENTER, EE_PARA_JUST ) );
233                                 pImpEditEngine->SetParaAttribs( nNode, aItems );
234                             }
235                             break;
236 
237     case HTML_ANCHOR_ON:    AnchorStart();
238                             break;
239     case HTML_ANCHOR_OFF:   AnchorEnd();
240                             break;
241 
242     case HTML_PARABREAK_ON:
243         if( bInPara && HasTextInCurrentPara() )
244             EndPara( sal_True );
245         StartPara( sal_True );
246         break;
247 
248     case HTML_PARABREAK_OFF:
249         if( bInPara )
250             EndPara( sal_True );
251         break;
252 
253     case HTML_HEAD1_ON:
254     case HTML_HEAD2_ON:
255     case HTML_HEAD3_ON:
256     case HTML_HEAD4_ON:
257     case HTML_HEAD5_ON:
258     case HTML_HEAD6_ON:
259     {
260         HeadingStart( nToken );
261     }
262     break;
263 
264     case HTML_HEAD1_OFF:
265     case HTML_HEAD2_OFF:
266     case HTML_HEAD3_OFF:
267     case HTML_HEAD4_OFF:
268     case HTML_HEAD5_OFF:
269     case HTML_HEAD6_OFF:
270     {
271         HeadingEnd( nToken );
272     }
273     break;
274 
275     case HTML_PREFORMTXT_ON:
276     case HTML_XMP_ON:
277     case HTML_LISTING_ON:
278     {
279         StartPara( sal_True );
280         ImpSetStyleSheet( STYLE_PRE );
281     }
282     break;
283 
284     case HTML_DEFLIST_ON:
285     {
286         nDefListLevel++;
287     }
288     break;
289 
290     case HTML_DEFLIST_OFF:
291     {
292         if( nDefListLevel )
293             nDefListLevel--;
294     }
295     break;
296 
297     case HTML_TABLE_ON:     nInTable++;
298                             break;
299     case HTML_TABLE_OFF:    DBG_ASSERT( nInTable, "Nicht in Table, aber TABLE_OFF?" );
300                             nInTable--;
301                             break;
302 
303     case HTML_TABLEHEADER_ON:
304     case HTML_TABLEDATA_ON:
305         nInCell++;
306     // fallthru
307     case HTML_BLOCKQUOTE_ON:
308     case HTML_BLOCKQUOTE_OFF:
309     case HTML_BLOCKQUOTE30_ON:
310     case HTML_BLOCKQUOTE30_OFF:
311     case HTML_LISTHEADER_ON:
312     case HTML_LI_ON:
313     case HTML_DD_ON:
314     case HTML_DT_ON:
315     case HTML_ORDERLIST_ON:
316     case HTML_UNORDERLIST_ON:
317     {
318         sal_Bool bHasText = HasTextInCurrentPara();
319         if ( bHasText )
320             ImpInsertParaBreak();
321         StartPara( sal_False );
322     }
323     break;
324 
325     case HTML_TABLEHEADER_OFF:
326     case HTML_TABLEDATA_OFF:
327     {
328         if ( nInCell )
329             nInCell--;
330     }
331     // fallthru
332     case HTML_LISTHEADER_OFF:
333     case HTML_LI_OFF:
334     case HTML_DD_OFF:
335     case HTML_DT_OFF:
336     case HTML_ORDERLIST_OFF:
337     case HTML_UNORDERLIST_OFF:  EndPara( sal_False );
338                                 break;
339 
340     case HTML_TABLEROW_ON:
341     case HTML_TABLEROW_OFF: // Nur nach einem CELL ein RETURN, fuer Calc
342 
343     case HTML_COL_ON:
344     case HTML_COLGROUP_ON:
345     case HTML_COLGROUP_OFF: break;
346 
347     case HTML_FONT_ON:      // ...
348                             break;
349     case HTML_FONT_OFF:     // ...
350                             break;
351 
352 
353     // #58335# kein SkipGroup on/off auf inline markup etc.
354 
355     case HTML_TITLE_ON:
356         bInTitle = sal_True;
357         break;
358     case HTML_TITLE_OFF:
359         bInTitle = sal_False;
360         break;
361 
362     // globals
363     case HTML_HTML_ON:
364     case HTML_HTML_OFF:
365     case HTML_BODY_ON:
366     case HTML_BODY_OFF:
367     case HTML_HEAD_ON:
368     case HTML_HEAD_OFF:
369     case HTML_FORM_ON:
370     case HTML_FORM_OFF:
371     case HTML_THEAD_ON:
372     case HTML_THEAD_OFF:
373     case HTML_TBODY_ON:
374     case HTML_TBODY_OFF:
375     // inline elements, structural markup
376     // HTML 3.0
377     case HTML_BANNER_ON:
378     case HTML_BANNER_OFF:
379     case HTML_DIVISION_ON:
380     case HTML_DIVISION_OFF:
381 //  case HTML_LISTHEADER_ON:        //! special handling
382 //  case HTML_LISTHEADER_OFF:
383     case HTML_NOTE_ON:
384     case HTML_NOTE_OFF:
385     // inline elements, logical markup
386     // HTML 2.0
387     case HTML_ADDRESS_ON:
388     case HTML_ADDRESS_OFF:
389 //  case HTML_BLOCKQUOTE_ON:        //! extra Behandlung
390 //  case HTML_BLOCKQUOTE_OFF:
391     case HTML_CITIATION_ON:
392     case HTML_CITIATION_OFF:
393     case HTML_CODE_ON:
394     case HTML_CODE_OFF:
395     case HTML_DEFINSTANCE_ON:
396     case HTML_DEFINSTANCE_OFF:
397     case HTML_EMPHASIS_ON:
398     case HTML_EMPHASIS_OFF:
399     case HTML_KEYBOARD_ON:
400     case HTML_KEYBOARD_OFF:
401     case HTML_SAMPLE_ON:
402     case HTML_SAMPLE_OFF:
403     case HTML_STRIKE_ON:
404     case HTML_STRIKE_OFF:
405     case HTML_STRONG_ON:
406     case HTML_STRONG_OFF:
407     case HTML_VARIABLE_ON:
408     case HTML_VARIABLE_OFF:
409     // HTML 3.0
410     case HTML_ABBREVIATION_ON:
411     case HTML_ABBREVIATION_OFF:
412     case HTML_ACRONYM_ON:
413     case HTML_ACRONYM_OFF:
414     case HTML_AUTHOR_ON:
415     case HTML_AUTHOR_OFF:
416 //  case HTML_BLOCKQUOTE30_ON:      //! extra Behandlung
417 //  case HTML_BLOCKQUOTE30_OFF:
418     case HTML_DELETEDTEXT_ON:
419     case HTML_DELETEDTEXT_OFF:
420     case HTML_INSERTEDTEXT_ON:
421     case HTML_INSERTEDTEXT_OFF:
422     case HTML_LANGUAGE_ON:
423     case HTML_LANGUAGE_OFF:
424     case HTML_PERSON_ON:
425     case HTML_PERSON_OFF:
426     case HTML_SHORTQUOTE_ON:
427     case HTML_SHORTQUOTE_OFF:
428     case HTML_SUBSCRIPT_ON:
429     case HTML_SUBSCRIPT_OFF:
430     case HTML_SUPERSCRIPT_ON:
431     case HTML_SUPERSCRIPT_OFF:
432     // inline elements, visual markup
433     // HTML 2.0
434     case HTML_BOLD_ON:
435     case HTML_BOLD_OFF:
436     case HTML_ITALIC_ON:
437     case HTML_ITALIC_OFF:
438     case HTML_TELETYPE_ON:
439     case HTML_TELETYPE_OFF:
440     case HTML_UNDERLINE_ON:
441     case HTML_UNDERLINE_OFF:
442     // HTML 3.0
443     case HTML_BIGPRINT_ON:
444     case HTML_BIGPRINT_OFF:
445     case HTML_STRIKETHROUGH_ON:
446     case HTML_STRIKETHROUGH_OFF:
447     case HTML_SMALLPRINT_ON:
448     case HTML_SMALLPRINT_OFF:
449     // figures
450     case HTML_FIGURE_ON:
451     case HTML_FIGURE_OFF:
452     case HTML_CAPTION_ON:
453     case HTML_CAPTION_OFF:
454     case HTML_CREDIT_ON:
455     case HTML_CREDIT_OFF:
456     // misc
457     case HTML_DIRLIST_ON:
458     case HTML_DIRLIST_OFF:
459     case HTML_FOOTNOTE_ON:          //! landen so im Text
460     case HTML_FOOTNOTE_OFF:
461     case HTML_MENULIST_ON:
462     case HTML_MENULIST_OFF:
463 //  case HTML_PLAINTEXT_ON:         //! extra Behandlung
464 //  case HTML_PLAINTEXT_OFF:
465 //  case HTML_PREFORMTXT_ON:        //! extra Behandlung
466 //  case HTML_PREFORMTXT_OFF:
467     case HTML_SPAN_ON:
468     case HTML_SPAN_OFF:
469     // obsolete
470 //  case HTML_XMP_ON:               //! extra Behandlung
471 //  case HTML_XMP_OFF:
472 //  case HTML_LISTING_ON:           //! extra Behandlung
473 //  case HTML_LISTING_OFF:
474     // Netscape
475     case HTML_BLINK_ON:
476     case HTML_BLINK_OFF:
477     case HTML_NOBR_ON:
478     case HTML_NOBR_OFF:
479     case HTML_NOEMBED_ON:
480     case HTML_NOEMBED_OFF:
481     case HTML_NOFRAMES_ON:
482     case HTML_NOFRAMES_OFF:
483     // Internet Explorer
484     case HTML_MARQUEE_ON:
485     case HTML_MARQUEE_OFF:
486 //  case HTML_PLAINTEXT2_ON:        //! extra Behandlung
487 //  case HTML_PLAINTEXT2_OFF:
488     break;
489 
490     default:
491     {
492         if ( nToken & HTML_TOKEN_ONOFF )
493         {
494             if ( ( nToken == HTML_UNKNOWNCONTROL_ON ) || ( nToken == HTML_UNKNOWNCONTROL_OFF ) )
495             {
496                 ;
497             }
498             else if ( !(nToken & 1) )
499             {
500                 DBG_ASSERT( !( nToken & 1 ), "Kein Start-Token ?!" );
501                 SkipGroup( nToken + 1 );
502             }
503         }
504     }
505     }   // SWITCH
506 
507     if ( pImpEditEngine->aImportHdl.IsSet() )
508     {
509         ImportInfo aImportInfo( HTMLIMP_NEXTTOKEN, this, pImpEditEngine->CreateESel( aCurSel ) );
510         aImportInfo.nToken = nToken;
511         aImportInfo.nTokenValue = (short)nTokenValue;
512         if ( nToken == HTML_TEXTTOKEN )
513             aImportInfo.aText = aToken;
514         pImpEditEngine->aImportHdl.Call( &aImportInfo );
515     }
516 
517 }
518 
ImpInsertParaBreak()519 void EditHTMLParser::ImpInsertParaBreak()
520 {
521     if ( pImpEditEngine->aImportHdl.IsSet() )
522     {
523         ImportInfo aImportInfo( HTMLIMP_INSERTPARA, this, pImpEditEngine->CreateESel( aCurSel ) );
524         pImpEditEngine->aImportHdl.Call( &aImportInfo );
525     }
526     aCurSel = pImpEditEngine->ImpInsertParaBreak( aCurSel );
527     nLastAction = ACTION_INSERTPARABRK;
528 }
529 
ImpSetAttribs(const SfxItemSet & rItems,EditSelection * pSel)530 void EditHTMLParser::ImpSetAttribs( const SfxItemSet& rItems, EditSelection* pSel )
531 {
532     // pSel, wenn Zeichenattribute, sonst Absatzattribute fuer den
533     // aktuellen Absatz.
534     DBG_ASSERT( pSel || ( aCurSel.Min().GetNode() == aCurSel.Max().GetNode() ), "ImpInsertAttribs: Selektion?" );
535 
536     EditPaM aStartPaM( pSel ? pSel->Min() : aCurSel.Min() );
537     EditPaM aEndPaM( pSel ? pSel->Max() : aCurSel.Max() );
538 
539     if ( !pSel )
540     {
541         aStartPaM.SetIndex( 0 );
542         aEndPaM.SetIndex( aEndPaM.GetNode()->Len() );
543     }
544 
545     if ( pImpEditEngine->aImportHdl.IsSet() )
546     {
547         EditSelection aSel( aStartPaM, aEndPaM );
548         ImportInfo aImportInfo( HTMLIMP_SETATTR, this, pImpEditEngine->CreateESel( aSel ) );
549         aImportInfo.pAttrs = (void*)&rItems;
550         pImpEditEngine->aImportHdl.Call( &aImportInfo );
551     }
552 
553     ContentNode* pSN = aStartPaM.GetNode();
554     sal_uInt16 nStartNode = pImpEditEngine->GetEditDoc().GetPos( pSN );
555 
556     // Wenn ein Attribut von 0 bis aktuelle Absatzlaenge geht,
557     // soll es ein Absatz-Attribut sein!
558 
559     // Achtung: Selektion kann ueber mehrere Absaetze gehen.
560     // Alle vollstaendigen Absaetze sind Absatzattribute...
561 
562     // HTML eigentlich nicht:
563 #ifdef DBG_UTIL
564     ContentNode* pEN = aEndPaM.GetNode();
565     sal_uInt16 nEndNode = pImpEditEngine->GetEditDoc().GetPos( pEN );
566     DBG_ASSERT( nStartNode == nEndNode, "ImpSetAttribs: Mehrere Absaetze?" );
567 #endif
568 
569 /*
570     for ( sal_uInt16 z = nStartNode+1; z < nEndNode; z++ )
571     {
572         DBG_ASSERT( pImpEditEngine->GetEditDoc().SaveGetObject( z ), "Node existiert noch nicht(RTF)" );
573         pImpEditEngine->SetParaAttribs( z, rSet.GetAttrSet() );
574     }
575 
576     if ( aStartPaM.GetNode() != aEndPaM.GetNode() )
577     {
578         // Den Rest des StartNodes...
579         if ( aStartPaM.GetIndex() == 0 )
580             pImpEditEngine->SetParaAttribs( nStartNode, rSet.GetAttrSet() );
581         else
582             pImpEditEngine->SetAttribs( EditSelection( aStartPaM, EditPaM( aStartPaM.GetNode(), aStartPaM.GetNode()->Len() ) ), rSet.GetAttrSet() );
583 
584         // Den Anfang des EndNodes....
585         if ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() )
586             pImpEditEngine->SetParaAttribs( nEndNode, rSet.GetAttrSet() );
587         else
588             pImpEditEngine->SetAttribs( EditSelection( EditPaM( aEndPaM.GetNode(), 0 ), aEndPaM ), rSet.GetAttrSet() );
589     }
590     else
591 */
592     {
593         if ( ( aStartPaM.GetIndex() == 0 ) && ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() ) )
594         {
595             // Muesse gemergt werden:
596             SfxItemSet aItems( pImpEditEngine->GetParaAttribs( nStartNode ) );
597             aItems.Put( rItems );
598             pImpEditEngine->SetParaAttribs( nStartNode, aItems );
599         }
600         else
601             pImpEditEngine->SetAttribs( EditSelection( aStartPaM, aEndPaM ), rItems );
602     }
603 }
604 
ImpSetStyleSheet(sal_uInt16 nHLevel)605 void EditHTMLParser::ImpSetStyleSheet( sal_uInt16 nHLevel )
606 {
607     /*
608         nHLevel:    0:          Ausschalten
609                     1-6:        Heading
610                     STYLE_PRE:  Preformatted
611     */
612 
613 //      if ( pImpEditEngine->GetStatus().DoImportRTFStyleSheets() )
614 //      {
615 //          SvxRTFStyleType* pS = GetStyleTbl().Get( rSet.StyleNo() );
616 //          DBG_ASSERT( pS, "Vorlage in RTF nicht definiert!" );
617 //          if ( pS )
618 //              pImpEditEngine->SetStyleSheet( EditSelection( aStartPaM, aEndPaM ), pS->sName, SFX_STYLE_FAMILY_ALL );
619 //      }
620 //      else
621         {
622             // Harte Attribute erzeugen...
623             // Reicht fuer Calc, bei StyleSheets muesste noch geklaert werden,
624             // dass diese auch in der App liegen sollten, damit sie beim
625             // fuettern in eine andere Engine auch noch da sind...
626 
627             sal_uInt16 nNode = pImpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
628 //          SfxItemSet aItems( pImpEditEngine->GetEmptyItemSet() );
629             SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
630 
631             aItems.ClearItem( EE_PARA_ULSPACE );
632             aItems.ClearItem( EE_CHAR_FONTHEIGHT );
633             aItems.ClearItem( EE_CHAR_FONTINFO );
634             aItems.ClearItem( EE_CHAR_WEIGHT );
635 
636             // Fett in den ersten 3 Headings
637             if ( ( nHLevel >= 1 ) && ( nHLevel <= 3 ) )
638             {
639                 SvxWeightItem aWeightItem( WEIGHT_BOLD, EE_CHAR_WEIGHT );
640                 aItems.Put( aWeightItem );
641             }
642 
643             // Fonthoehe und Abstaende, wenn LogicToLogic moeglich:
644             MapUnit eUnit = pImpEditEngine->GetRefMapMode().GetMapUnit();
645             if ( ( eUnit != MAP_PIXEL ) && ( eUnit != MAP_SYSFONT ) &&
646                  ( eUnit != MAP_APPFONT ) && ( eUnit != MAP_RELATIVE ) )
647             {
648                 long nPoints = 10;
649                 if ( nHLevel == 1 )
650                     nPoints = 22;
651                 else if ( nHLevel == 2 )
652                     nPoints = 16;
653                 else if ( nHLevel == 3 )
654                     nPoints = 12;
655                 else if ( nHLevel == 4 )
656                     nPoints = 11;
657 
658                 nPoints = OutputDevice::LogicToLogic( nPoints, MAP_POINT, eUnit );
659                 SvxFontHeightItem aHeightItem( nPoints, 100, EE_CHAR_FONTHEIGHT );
660                 aItems.Put( aHeightItem );
661 
662                 // Absatzabstaende, wenn Heading:
663                 if ( !nHLevel || ((nHLevel >= 1) && (nHLevel <= 6)) )
664                 {
665                     SvxULSpaceItem aULSpaceItem( EE_PARA_ULSPACE );
666                     aULSpaceItem.SetUpper( (sal_uInt16)OutputDevice::LogicToLogic( 42, MAP_10TH_MM, eUnit ) );
667                     aULSpaceItem.SetLower( (sal_uInt16)OutputDevice::LogicToLogic( 35, MAP_10TH_MM, eUnit ) );
668                     aItems.Put( aULSpaceItem );
669                 }
670             }
671 
672             // Bei Pre einen proportionalen Font waehlen
673             if ( nHLevel == STYLE_PRE )
674             {
675                 Font aFont = OutputDevice::GetDefaultFont( DEFAULTFONT_FIXED, LANGUAGE_SYSTEM, 0 );
676                 SvxFontItem aFontItem( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO );
677                 aItems.Put( aFontItem );
678             }
679 
680             pImpEditEngine->SetParaAttribs( nNode, aItems );
681         }
682 }
683 
ImpInsertText(const String & rText)684 void EditHTMLParser::ImpInsertText( const String& rText )
685 {
686     String aText( rText );
687     if ( pImpEditEngine->aImportHdl.IsSet() )
688     {
689         ImportInfo aImportInfo( HTMLIMP_INSERTTEXT, this, pImpEditEngine->CreateESel( aCurSel ) );
690         aImportInfo.aText = aText;
691         pImpEditEngine->aImportHdl.Call( &aImportInfo );
692     }
693 
694     aCurSel = pImpEditEngine->ImpInsertText( aCurSel, aText );
695     nLastAction = ACTION_INSERTTEXT;
696 }
697 
SkipGroup(int nEndToken)698 void EditHTMLParser::SkipGroup( int nEndToken )
699 {
700     // #69109# groups in cells are closed upon leaving the cell, because those
701     // ******* web authors don't know their job
702     // for example: <td><form></td>   lacks a closing </form>
703     sal_uInt8 nCellLevel = nInCell;
704     int nToken;
705     while( nCellLevel <= nInCell && ( (nToken = GetNextToken() ) != nEndToken ) && nToken )
706     {
707         switch ( nToken )
708         {
709             case HTML_TABLEHEADER_ON:
710             case HTML_TABLEDATA_ON:
711                 nInCell++;
712             break;
713             case HTML_TABLEHEADER_OFF:
714             case HTML_TABLEDATA_OFF:
715                 if ( nInCell )
716                     nInCell--;
717             break;
718         }
719     }
720 }
721 
StartPara(sal_Bool bReal)722 void EditHTMLParser::StartPara( sal_Bool bReal )
723 {
724     if ( bReal )
725     {
726         const HTMLOptions *_pOptions = GetOptions();
727         sal_uInt16 nArrLen = _pOptions->Count();
728         SvxAdjust eAdjust = SVX_ADJUST_LEFT;
729         for ( sal_uInt16 i = 0; i < nArrLen; i++ )
730         {
731             const HTMLOption *pOption = (*_pOptions)[i];
732             switch( pOption->GetToken() )
733             {
734                 case HTML_O_ALIGN:
735                 {
736                     if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_right ) == COMPARE_EQUAL )
737                         eAdjust = SVX_ADJUST_RIGHT;
738                     else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_middle ) == COMPARE_EQUAL )
739                         eAdjust = SVX_ADJUST_CENTER;
740                     else if ( pOption->GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_center ) == COMPARE_EQUAL )
741                         eAdjust = SVX_ADJUST_CENTER;
742                     else
743                         eAdjust = SVX_ADJUST_LEFT;
744                 }
745                 break;
746             }
747         }
748         SfxItemSet aItemSet( pImpEditEngine->GetEmptyItemSet() );
749         aItemSet.Put( SvxAdjustItem( eAdjust, EE_PARA_JUST ) );
750         ImpSetAttribs( aItemSet );
751     }
752     bInPara = sal_True;
753 }
754 
EndPara(sal_Bool)755 void EditHTMLParser::EndPara( sal_Bool )
756 {
757     if ( bInPara )
758     {
759         sal_Bool bHasText = HasTextInCurrentPara();
760         if ( bHasText )
761             ImpInsertParaBreak();
762         // Nur, wenn ohne Absatzabstaende gearbeitet wird...
763 //      if ( !nInTable && bReal && (nNumberingLevel<=1) && (nBulletLevel<=1) )
764 //          ImpInsertParaBreak();
765     }
766     bInPara = sal_False;
767 }
768 
ThrowAwayBlank()769 sal_Bool EditHTMLParser::ThrowAwayBlank()
770 {
771     // Ein Blank muss weggeschmissen werden, wenn der neue Text mit einem
772     // Blank beginnt und der aktuelle Absatz leer ist oder mit einem
773     // Blank endet...
774     ContentNode* pNode = aCurSel.Max().GetNode();
775     if ( pNode->Len() && ( pNode->GetChar( pNode->Len()-1 ) != ' ' ) )
776         return sal_False;
777     return sal_True;
778 }
779 
HasTextInCurrentPara()780 sal_Bool EditHTMLParser::HasTextInCurrentPara()
781 {
782     return aCurSel.Max().GetNode()->Len() ? sal_True : sal_False;
783 }
784 
AnchorStart()785 void EditHTMLParser::AnchorStart()
786 {
787     // Anker im Anker ignoriern
788     if ( !pCurAnchor )
789     {
790         const HTMLOptions* _pOptions = GetOptions();
791         sal_uInt16 nArrLen = _pOptions->Count();
792 
793         String aRef;
794 
795         for ( sal_uInt16 i = 0; i < nArrLen; i++ )
796         {
797             const HTMLOption* pOption = (*_pOptions)[i];
798             switch( pOption->GetToken() )
799             {
800                 case HTML_O_HREF:
801                     aRef = pOption->GetString();
802                 break;
803             }
804         }
805 
806         if ( aRef.Len() )
807         {
808             String aURL = aRef;
809             if ( aURL.Len() && ( aURL.GetChar( 0 ) != '#' ) )
810             {
811                 INetURLObject aTargetURL;
812                 INetURLObject aRootURL( aBaseURL );
813                 aRootURL.GetNewAbsURL( aRef, &aTargetURL );
814                 aURL = aTargetURL.GetMainURL( INetURLObject::DECODE_TO_IURI );
815             }
816             pCurAnchor = new AnchorInfo;
817             pCurAnchor->aHRef = aURL;
818         }
819     }
820 }
821 
AnchorEnd()822 void EditHTMLParser::AnchorEnd()
823 {
824     if ( pCurAnchor )
825     {
826         // Als URL-Feld einfuegen...
827         SvxFieldItem aFld( SvxURLField( pCurAnchor->aHRef, pCurAnchor->aText, SVXURLFORMAT_REPR ), EE_FEATURE_FIELD  );
828         aCurSel = pImpEditEngine->InsertField( aCurSel, aFld );
829         bFieldsInserted = sal_True;
830         delete pCurAnchor;
831         pCurAnchor = 0;
832 
833         if ( pImpEditEngine->aImportHdl.IsSet() )
834         {
835             ImportInfo aImportInfo( HTMLIMP_INSERTFIELD, this, pImpEditEngine->CreateESel( aCurSel ) );
836             pImpEditEngine->aImportHdl.Call( &aImportInfo );
837         }
838     }
839 }
840 
HeadingStart(int nToken)841 void EditHTMLParser::HeadingStart( int nToken )
842 {
843     bWasInPara = bInPara;
844     StartPara( sal_False );
845 
846     if ( bWasInPara && HasTextInCurrentPara() )
847         ImpInsertParaBreak();
848 
849     sal_uInt16 nId = sal::static_int_cast< sal_uInt16 >(
850         1 + ( ( nToken - HTML_HEAD1_ON ) / 2 ) );
851     DBG_ASSERT( (nId >= 1) && (nId <= 9), "HeadingStart: ID kann nicht stimmen!" );
852     ImpSetStyleSheet( nId );
853 }
854 
HeadingEnd(int)855 void EditHTMLParser::HeadingEnd( int )
856 {
857     EndPara( sal_False );
858     ImpSetStyleSheet( 0 );
859 
860     if ( bWasInPara )
861     {
862         bInPara = sal_True;
863         bWasInPara = sal_False;
864     }
865 }
866