/** * Copyright (c) 2013-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. An additional grant * of patent rights can be found in the PATENTS file in the same directory. * * @providesModule convertFromHTMLToContentBlocks * @format * */ 'use strict'; var _extends = _assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; var _knownListItemDepthCl, _assign = require('object-assign'); function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } var CharacterMetadata = require('./CharacterMetadata'); var ContentBlock = require('./ContentBlock'); var ContentBlockNode = require('./ContentBlockNode'); var DefaultDraftBlockRenderMap = require('./DefaultDraftBlockRenderMap'); var DraftEntity = require('./DraftEntity'); var DraftFeatureFlags = require('./DraftFeatureFlags'); var Immutable = require('immutable'); var _require = require('immutable'), Set = _require.Set; var URI = require('fbjs/lib/URI'); var cx = require('fbjs/lib/cx'); var generateRandomKey = require('./generateRandomKey'); var getSafeBodyFromHTML = require('./getSafeBodyFromHTML'); var invariant = require('fbjs/lib/invariant'); var sanitizeDraftText = require('./sanitizeDraftText'); var experimentalTreeDataSupport = DraftFeatureFlags.draft_tree_data_support; var List = Immutable.List, OrderedSet = Immutable.OrderedSet; var NBSP = ' '; var SPACE = ' '; // Arbitrary max indent var MAX_DEPTH = 4; // used for replacing characters in HTML var REGEX_CR = new RegExp('\r', 'g'); var REGEX_LF = new RegExp('\n', 'g'); var REGEX_NBSP = new RegExp(NBSP, 'g'); var REGEX_CARRIAGE = new RegExp(' ?', 'g'); var REGEX_ZWS = new RegExp('?', 'g'); // https://developer.mozilla.org/en-US/docs/Web/CSS/font-weight var boldValues = ['bold', 'bolder', '500', '600', '700', '800', '900']; var notBoldValues = ['light', 'lighter', '100', '200', '300', '400']; // Block tag flow is different because LIs do not have // a deterministic style ;_; var inlineTags = { b: 'BOLD', code: 'CODE', del: 'STRIKETHROUGH', em: 'ITALIC', i: 'ITALIC', s: 'STRIKETHROUGH', strike: 'STRIKETHROUGH', strong: 'BOLD', u: 'UNDERLINE' }; var knownListItemDepthClasses = (_knownListItemDepthCl = {}, _defineProperty(_knownListItemDepthCl, cx('public/DraftStyleDefault/depth0'), 0), _defineProperty(_knownListItemDepthCl, cx('public/DraftStyleDefault/depth1'), 1), _defineProperty(_knownListItemDepthCl, cx('public/DraftStyleDefault/depth2'), 2), _defineProperty(_knownListItemDepthCl, cx('public/DraftStyleDefault/depth3'), 3), _defineProperty(_knownListItemDepthCl, cx('public/DraftStyleDefault/depth4'), 4), _knownListItemDepthCl); var anchorAttr = ['className', 'href', 'rel', 'target', 'title']; var imgAttr = ['alt', 'className', 'height', 'src', 'width']; var lastBlock = void 0; var EMPTY_CHUNK = { text: '', inlines: [], entities: [], blocks: [] }; var EMPTY_BLOCK = { children: List(), depth: 0, key: '', type: '' }; var getListBlockType = function getListBlockType(tag, lastList) { if (tag === 'li') { return lastList === 'ol' ? 'ordered-list-item' : 'unordered-list-item'; } return null; }; var getBlockMapSupportedTags = function getBlockMapSupportedTags(blockRenderMap) { var unstyledElement = blockRenderMap.get('unstyled').element; var tags = Set([]); blockRenderMap.forEach(function (draftBlock) { if (draftBlock.aliasedElements) { draftBlock.aliasedElements.forEach(function (tag) { tags = tags.add(tag); }); } tags = tags.add(draftBlock.element); }); return tags.filter(function (tag) { return tag && tag !== unstyledElement; }).toArray().sort(); }; // custom element conversions var getMultiMatchedType = function getMultiMatchedType(tag, lastList, multiMatchExtractor) { for (var ii = 0; ii < multiMatchExtractor.length; ii++) { var matchType = multiMatchExtractor[ii](tag, lastList); if (matchType) { return matchType; } } return null; }; var getBlockTypeForTag = function getBlockTypeForTag(tag, lastList, blockRenderMap) { var matchedTypes = blockRenderMap.filter(function (draftBlock) { return draftBlock.element === tag || draftBlock.wrapper === tag || draftBlock.aliasedElements && draftBlock.aliasedElements.some(function (alias) { return alias === tag; }); }).keySeq().toSet().toArray().sort(); // if we dont have any matched type, return unstyled // if we have one matched type return it // if we have multi matched types use the multi-match function to gather type switch (matchedTypes.length) { case 0: return 'unstyled'; case 1: return matchedTypes[0]; default: return getMultiMatchedType(tag, lastList, [getListBlockType]) || 'unstyled'; } }; var processInlineTag = function processInlineTag(tag, node, currentStyle) { var styleToCheck = inlineTags[tag]; if (styleToCheck) { currentStyle = currentStyle.add(styleToCheck).toOrderedSet(); } else if (node instanceof HTMLElement) { var htmlElement = node; currentStyle = currentStyle.withMutations(function (style) { var fontWeight = htmlElement.style.fontWeight; var fontStyle = htmlElement.style.fontStyle; var textDecoration = htmlElement.style.textDecoration; if (boldValues.indexOf(fontWeight) >= 0) { style.add('BOLD'); } else if (notBoldValues.indexOf(fontWeight) >= 0) { style.remove('BOLD'); } if (fontStyle === 'italic') { style.add('ITALIC'); } else if (fontStyle === 'normal') { style.remove('ITALIC'); } if (textDecoration === 'underline') { style.add('UNDERLINE'); } if (textDecoration === 'line-through') { style.add('STRIKETHROUGH'); } if (textDecoration === 'none') { style.remove('UNDERLINE'); style.remove('STRIKETHROUGH'); } }).toOrderedSet(); } return currentStyle; }; var joinChunks = function joinChunks(A, B, experimentalHasNestedBlocks) { // Sometimes two blocks will touch in the DOM and we need to strip the // extra delimiter to preserve niceness. var lastInA = A.text.slice(-1); var firstInB = B.text.slice(0, 1); if (lastInA === '\r' && firstInB === '\r' && !experimentalHasNestedBlocks) { A.text = A.text.slice(0, -1); A.inlines.pop(); A.entities.pop(); A.blocks.pop(); } // Kill whitespace after blocks if (lastInA === '\r') { if (B.text === SPACE || B.text === '\n') { return A; } else if (firstInB === SPACE || firstInB === '\n') { B.text = B.text.slice(1); B.inlines.shift(); B.entities.shift(); } } return { text: A.text + B.text, inlines: A.inlines.concat(B.inlines), entities: A.entities.concat(B.entities), blocks: A.blocks.concat(B.blocks) }; }; /** * Check to see if we have anything like
... to create * block tags from. If we do, we can use those and ignore
tags. If we * don't, we can treattags as meaningful (unstyled) blocks. */ var containsSemanticBlockMarkup = function containsSemanticBlockMarkup(html, blockTags) { return blockTags.some(function (tag) { return html.indexOf('<' + tag) !== -1; }); }; var hasValidLinkText = function hasValidLinkText(link) { !(link instanceof HTMLAnchorElement) ? process.env.NODE_ENV !== 'production' ? invariant(false, 'Link must be an HTMLAnchorElement.') : invariant(false) : void 0; var protocol = link.protocol; return protocol === 'http:' || protocol === 'https:' || protocol === 'mailto:'; }; var getWhitespaceChunk = function getWhitespaceChunk(inEntity) { var entities = new Array(1); if (inEntity) { entities[0] = inEntity; } return _extends({}, EMPTY_CHUNK, { text: SPACE, inlines: [OrderedSet()], entities: entities }); }; var getSoftNewlineChunk = function getSoftNewlineChunk() { return _extends({}, EMPTY_CHUNK, { text: '\n', inlines: [OrderedSet()], entities: new Array(1) }); }; var getChunkedBlock = function getChunkedBlock() { var props = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; return _extends({}, EMPTY_BLOCK, props); }; var getBlockDividerChunk = function getBlockDividerChunk(block, depth) { var parentKey = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : null; return { text: '\r', inlines: [OrderedSet()], entities: new Array(1), blocks: [getChunkedBlock({ parent: parentKey, key: generateRandomKey(), type: block, depth: Math.max(0, Math.min(MAX_DEPTH, depth)) })] }; }; /** * If we're pasting from one DraftEditor to another we can check to see if * existing list item depth classes are being used and preserve this style */ var getListItemDepth = function getListItemDepth(node) { var depth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0; Object.keys(knownListItemDepthClasses).some(function (depthClass) { if (node.classList.contains(depthClass)) { depth = knownListItemDepthClasses[depthClass]; } }); return depth; }; var genFragment = function genFragment(entityMap, node, inlineStyle, lastList, inBlock, blockTags, depth, blockRenderMap, inEntity, parentKey) { var lastLastBlock = lastBlock; var nodeName = node.nodeName.toLowerCase(); var newEntityMap = entityMap; var nextBlockType = 'unstyled'; var newBlock = false; var inBlockType = inBlock && getBlockTypeForTag(inBlock, lastList, blockRenderMap); var chunk = _extends({}, EMPTY_CHUNK); var newChunk = null; var blockKey = void 0; // Base Case if (nodeName === '#text') { var _text = node.textContent; var nodeTextContent = _text.trim(); // We should not create blocks for leading spaces that are // existing around ol/ul and their children list items if (lastList && nodeTextContent === '' && node.parentElement) { var parentNodeName = node.parentElement.nodeName.toLowerCase(); if (parentNodeName === 'ol' || parentNodeName === 'ul') { return { chunk: _extends({}, EMPTY_CHUNK), entityMap: entityMap }; } } if (nodeTextContent === '' && inBlock !== 'pre') { return { chunk: getWhitespaceChunk(inEntity), entityMap: entityMap }; } if (inBlock !== 'pre') { // Can't use empty string because MSWord _text = _text.replace(REGEX_LF, SPACE); } // save the last block so we can use it later lastBlock = nodeName; return { chunk: { text: _text, inlines: Array(_text.length).fill(inlineStyle), entities: Array(_text.length).fill(inEntity), blocks: [] }, entityMap: entityMap }; } // save the last block so we can use it later lastBlock = nodeName; // BR tags if (nodeName === 'br') { if (lastLastBlock === 'br' && (!inBlock || inBlockType === 'unstyled')) { return { chunk: getBlockDividerChunk('unstyled', depth, parentKey), entityMap: entityMap }; } return { chunk: getSoftNewlineChunk(), entityMap: entityMap }; } // IMG tags if (nodeName === 'img' && node instanceof HTMLImageElement && node.attributes.getNamedItem('src') && node.attributes.getNamedItem('src').value) { var image = node; var entityConfig = {}; imgAttr.forEach(function (attr) { var imageAttribute = image.getAttribute(attr); if (imageAttribute) { entityConfig[attr] = imageAttribute; } }); // Forcing this node to have children because otherwise no entity will be // created for this node. // The child text node cannot just have a space or return as content - // we strip those out. // See https://github.com/facebook/draft-js/issues/231 for some context. node.textContent = '\uD83D\uDCF7'; // TODO: update this when we remove DraftEntity entirely inEntity = DraftEntity.__create('IMAGE', 'MUTABLE', entityConfig || {}); } // Inline tags inlineStyle = processInlineTag(nodeName, node, inlineStyle); // Handle lists if (nodeName === 'ul' || nodeName === 'ol') { if (lastList) { depth += 1; } lastList = nodeName; } if (!experimentalTreeDataSupport && nodeName === 'li' && node instanceof HTMLElement) { depth = getListItemDepth(node, depth); } var blockType = getBlockTypeForTag(nodeName, lastList, blockRenderMap); var inListBlock = lastList && inBlock === 'li' && nodeName === 'li'; var inBlockOrHasNestedBlocks = (!inBlock || experimentalTreeDataSupport) && blockTags.indexOf(nodeName) !== -1; // Block Tags if (inListBlock || inBlockOrHasNestedBlocks) { chunk = getBlockDividerChunk(blockType, depth, parentKey); blockKey = chunk.blocks[0].key; inBlock = nodeName; newBlock = !experimentalTreeDataSupport; } // this is required so that we can handle 'ul' and 'ol' if (inListBlock) { nextBlockType = lastList === 'ul' ? 'unordered-list-item' : 'ordered-list-item'; } // Recurse through children var child = node.firstChild; if (child != null) { nodeName = child.nodeName.toLowerCase(); } var entityId = null; while (child) { if (child instanceof HTMLAnchorElement && child.href && hasValidLinkText(child)) { (function () { var anchor = child; var entityConfig = {}; anchorAttr.forEach(function (attr) { var anchorAttribute = anchor.getAttribute(attr); if (anchorAttribute) { entityConfig[attr] = anchorAttribute; } }); entityConfig.url = new URI(anchor.href).toString(); // TODO: update this when we remove DraftEntity completely entityId = DraftEntity.__create('LINK', 'MUTABLE', entityConfig || {}); })(); } else { entityId = undefined; } var _genFragment = genFragment(newEntityMap, child, inlineStyle, lastList, inBlock, blockTags, depth, blockRenderMap, entityId || inEntity, experimentalTreeDataSupport ? blockKey : null), generatedChunk = _genFragment.chunk, maybeUpdatedEntityMap = _genFragment.entityMap; newChunk = generatedChunk; newEntityMap = maybeUpdatedEntityMap; chunk = joinChunks(chunk, newChunk, experimentalTreeDataSupport); var sibling = child.nextSibling; // Put in a newline to break up blocks inside blocks if (!parentKey && sibling && blockTags.indexOf(nodeName) >= 0 && inBlock) { chunk = joinChunks(chunk, getSoftNewlineChunk()); } if (sibling) { nodeName = sibling.nodeName.toLowerCase(); } child = sibling; } if (newBlock) { chunk = joinChunks(chunk, getBlockDividerChunk(nextBlockType, depth, parentKey)); } return { chunk: chunk, entityMap: newEntityMap }; }; var getChunkForHTML = function getChunkForHTML(html, DOMBuilder, blockRenderMap, entityMap) { html = html.trim().replace(REGEX_CR, '').replace(REGEX_NBSP, SPACE).replace(REGEX_CARRIAGE, '').replace(REGEX_ZWS, ''); var supportedBlockTags = getBlockMapSupportedTags(blockRenderMap); var safeBody = DOMBuilder(html); if (!safeBody) { return null; } lastBlock = null; // Sometimes we aren't dealing with content that contains nice semantic // tags. In this case, use divs to separate everything out into paragraphs // and hope for the best. var workingBlocks = containsSemanticBlockMarkup(html, supportedBlockTags) ? supportedBlockTags : ['div']; // Start with -1 block depth to offset the fact that we are passing in a fake // UL block to start with. var fragment = genFragment(entityMap, safeBody, OrderedSet(), 'ul', null, workingBlocks, -1, blockRenderMap); var chunk = fragment.chunk; var newEntityMap = fragment.entityMap; // join with previous block to prevent weirdness on paste if (chunk.text.indexOf('\r') === 0) { chunk = { text: chunk.text.slice(1), inlines: chunk.inlines.slice(1), entities: chunk.entities.slice(1), blocks: chunk.blocks }; } // Kill block delimiter at the end if (chunk.text.slice(-1) === '\r') { chunk.text = chunk.text.slice(0, -1); chunk.inlines = chunk.inlines.slice(0, -1); chunk.entities = chunk.entities.slice(0, -1); chunk.blocks.pop(); } // If we saw no block tags, put an unstyled one in if (chunk.blocks.length === 0) { chunk.blocks.push(_extends({}, EMPTY_CHUNK, { type: 'unstyled', depth: 0 })); } // Sometimes we start with text that isn't in a block, which is then // followed by blocks. Need to fix up the blocks to add in // an unstyled block for this content if (chunk.text.split('\r').length === chunk.blocks.length + 1) { chunk.blocks.unshift({ type: 'unstyled', depth: 0 }); } return { chunk: chunk, entityMap: newEntityMap }; }; var convertChunkToContentBlocks = function convertChunkToContentBlocks(chunk) { if (!chunk || !chunk.text || !Array.isArray(chunk.blocks)) { return null; } var initialState = { cacheRef: {}, contentBlocks: [] }; var start = 0; var rawBlocks = chunk.blocks, rawInlines = chunk.inlines, rawEntities = chunk.entities; var BlockNodeRecord = experimentalTreeDataSupport ? ContentBlockNode : ContentBlock; return chunk.text.split('\r').reduce(function (acc, textBlock, index) { // Make absolutely certain that our text is acceptable. textBlock = sanitizeDraftText(textBlock); var block = rawBlocks[index]; var end = start + textBlock.length; var inlines = rawInlines.slice(start, end); var entities = rawEntities.slice(start, end); var characterList = List(inlines.map(function (style, index) { var data = { style: style, entity: null }; if (entities[index]) { data.entity = entities[index]; } return CharacterMetadata.create(data); })); start = end + 1; var depth = block.depth, type = block.type, parent = block.parent; var key = block.key || generateRandomKey(); var parentTextNodeKey = null; // will be used to store container text nodes // childrens add themselves to their parents since we are iterating in order if (parent) { var parentIndex = acc.cacheRef[parent]; var parentRecord = acc.contentBlocks[parentIndex]; // if parent has text we need to split it into a separate unstyled element if (parentRecord.getChildKeys().isEmpty() && parentRecord.getText()) { var parentCharacterList = parentRecord.getCharacterList(); var parentText = parentRecord.getText(); parentTextNodeKey = generateRandomKey(); var textNode = new ContentBlockNode({ key: parentTextNodeKey, text: parentText, characterList: parentCharacterList, parent: parent, nextSibling: key }); acc.contentBlocks.push(textNode); parentRecord = parentRecord.withMutations(function (block) { block.set('characterList', List()).set('text', '').set('children', parentRecord.children.push(textNode.getKey())); }); } acc.contentBlocks[parentIndex] = parentRecord.set('children', parentRecord.children.push(key)); } var blockNode = new BlockNodeRecord({ key: key, parent: parent, type: type, depth: depth, text: textBlock, characterList: characterList, prevSibling: parentTextNodeKey || (index === 0 || rawBlocks[index - 1].parent !== parent ? null : rawBlocks[index - 1].key), nextSibling: index === rawBlocks.length - 1 || rawBlocks[index + 1].parent !== parent ? null : rawBlocks[index + 1].key }); // insert node acc.contentBlocks.push(blockNode); // cache ref for building links acc.cacheRef[blockNode.key] = index; return acc; }, initialState).contentBlocks; }; var convertFromHTMLtoContentBlocks = function convertFromHTMLtoContentBlocks(html) { var DOMBuilder = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : getSafeBodyFromHTML; var blockRenderMap = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : DefaultDraftBlockRenderMap; // Be ABSOLUTELY SURE that the dom builder you pass here won't execute // arbitrary code in whatever environment you're running this in. For an // example of how we try to do this in-browser, see getSafeBodyFromHTML. // TODO: replace DraftEntity with an OrderedMap here var chunkData = getChunkForHTML(html, DOMBuilder, blockRenderMap, DraftEntity); if (chunkData == null) { return null; } var chunk = chunkData.chunk, entityMap = chunkData.entityMap; var contentBlocks = convertChunkToContentBlocks(chunk); return { contentBlocks: contentBlocks, entityMap: entityMap }; }; module.exports = convertFromHTMLtoContentBlocks;