|
java.lang.Object
Node
Inline
SpecialChar
FieldChar
com.aspose.words.FieldStart
- All Implemented Interfaces:
- java.lang.Iterable, java.lang.Cloneable
public class FieldStart - extends FieldChar
Represents a start of a Word field in a document.
FieldStart is an inline-level node and represented by the
ControlChar.FIELD_START_CHAR control character in the document. FieldStart can only be a child of Paragraph. A complete field in a Microsoft Word document is a complex structure consisting of
a field start character, field code, field separator character, field result
and field end character. Some fields only have field start, field code and field end. To easily insert a new field into a document, use the DocumentBuilder.insertField(java.lang.String, java.lang.String)
method. Example: Finds all hyperlinks in a Word document and changes their URL and display name.
import com.aspose.words.*;
import java.lang.Exception;
import java.text.MessageFormat;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/// <summary>
/// Shows how to replace hyperlinks in a Word document.
/// </summary>
public class ExReplaceHyperlinks extends ExBase
{
/// <summary>
/// Finds all hyperlinks in a Word document and changes their URL and display name.
/// </summary>
public void ReplaceHyperlinks() throws Exception
{
// Specify your document name here.
Document doc = new Document(getMyDir() + "ReplaceHyperlinks.doc");
// Hyperlinks in a Word documents are fields, select all field start nodes so we can find the hyperlinks.
NodeList fieldStarts = doc.selectNodes("//FieldStart");
for (int i = 0; i < fieldStarts.getCount(); i++)
{
FieldStart fieldStart = (FieldStart)fieldStarts.get(i);
if (fieldStart.getFieldType() == FieldType.FIELD_HYPERLINK)
{
// The field is a hyperlink field, use the "facade" class to help to deal with the field.
Hyperlink hyperlink = new Hyperlink(fieldStart);
// Some hyperlinks can be local (links to bookmarks inside the document), ignore these.
if (hyperlink.getIsLocal())
continue;
// The Hyperlink class allows to set the target URL and the display name
// of the link easily by setting the properties.
hyperlink.setTarget(NewUrl);
hyperlink.setName(NewName);
}
}
doc.save(getMyDir() + "ReplaceHyperlinks Out.doc");
}
private final String NewUrl = "http://www.aspose.com";
private final String NewName = "Aspose - The .NET & Java Component Publisher";
}
/// <summary>
/// This "facade" class makes it easier to work with a hyperlink field in a Word document.
///
/// A hyperlink is represented by a HYPERLINK field in a Word document. A field in Aspose.Words
/// consists of several nodes and it might be difficult to work with all those nodes directly.
/// Note this is a simple implementation and will work only if the hyperlink code and name
/// each consist of one Run only.
///
/// [FieldStart][Run - field code][FieldSeparator][Run - field result][FieldEnd]
///
/// The field code contains a string in one of these formats:
/// HYPERLINK "url"
/// HYPERLINK \l "bookmark name"
///
/// The field result contains text that is displayed to the user.
/// </summary>
class Hyperlink
{
Hyperlink(FieldStart fieldStart) throws Exception
{
if (fieldStart == null)
throw new Exception("Argument 'fieldStart' is null");
if (fieldStart.getFieldType() != FieldType.FIELD_HYPERLINK)
throw new Exception("Field start type must be FieldHyperlink.");
mFieldStart = fieldStart;
// Find field separator node.
mFieldSeparator = FindNextSibling(mFieldStart, NodeType.FIELD_SEPARATOR);
if (mFieldSeparator == null)
throw new Exception("Cannot find field separator.");
// Find field end node. Normally field end will always be found, but in the example document
// there happens to be a paragraph break included in the hyperlink and this puts the field end
// in the next paragraph. It will be much more complicated to handle fields which span several
// paragraphs correctly, but in this case allowing field end to be null is enough for our purposes.
mFieldEnd = FindNextSibling(mFieldSeparator, NodeType.FIELD_END);
// Field code looks something like [ HYPERLINK "http:\\www.myurl.com" ], but it can consist of several runs.
String fieldCode = GetTextSameParent(mFieldStart.getNextSibling(), mFieldSeparator);
Matcher match = gRegex.matcher(fieldCode.trim());
if (match.matches())
{
mIsLocal = match.group(1) != null; //The link is local if \l is present in the field code.
mTarget = match.group(2);
}
}
/// <summary>
/// Gets or sets the display name of the hyperlink.
/// </summary>
public String getName() throws Exception
{
return GetTextSameParent(mFieldSeparator, mFieldEnd);
}
public void setName(String value) throws Exception
{
// Hyperlink display name is stored in the field result which is a Run
// node between field separator and field end.
Run fieldResult = (Run)mFieldSeparator.getNextSibling();
fieldResult.setText(value);
// But sometimes the field result can consist of more than one run, delete these runs.
RemoveSameParent(fieldResult.getNextSibling(), mFieldEnd);
}
/// <summary>
/// Gets or sets the target url or bookmark name of the hyperlink.
/// </summary>
public String getTarget()
{
return mTarget;
}
public String setTarget(String value) throws Exception
{
mTarget = value;
UpdateFieldCode();
return mTarget;
}
/// <summary>
/// True if the hyperlink's target is a bookmark inside the document. False if the hyperlink is a url.
/// </summary>
public boolean getIsLocal()
{
return mIsLocal;
}
public boolean setIsLocal(boolean value) throws Exception {
mIsLocal = value;
UpdateFieldCode();
return mIsLocal;
}
private void UpdateFieldCode() throws Exception {
// Field code is stored in a Run node between field start and field separator.
Run fieldCode = (Run)mFieldStart.getNextSibling();
fieldCode.setText(MessageFormat.format("HYPERLINK {0}\"{1}\"", ((mIsLocal) ? "\\l " : ""), mTarget));
// But sometimes the field code can consist of more than one run, delete these runs.
RemoveSameParent(fieldCode.getNextSibling(), mFieldSeparator);
}
/// <summary>
/// Goes through siblings starting from the start node until it finds a node of the specified type or null.
/// </summary>
private static Node FindNextSibling(Node startNode, int nodeType)
{
for (Node node = startNode; node != null; node = node.getNextSibling())
{
if (node.getNodeType() == nodeType)
return node;
}
return null;
}
/// <summary>
/// Retrieves text from start up to but not including the end node.
/// </summary>
private static String GetTextSameParent(Node startNode, Node endNode) throws Exception
{
if ((endNode != null) && (startNode.getParentNode() != endNode.getParentNode()))
throw new Exception("Start and end nodes are expected to have the same parent.");
StringBuilder builder = new StringBuilder();
for (Node child = startNode; child != endNode; child = child.getNextSibling())
builder.append(child.getText());
return builder.toString();
}
/// <summary>
/// Removes nodes from start up to but not including the end node.
/// Start and end are assumed to have the same parent.
/// </summary>
private static void RemoveSameParent(Node startNode, Node endNode) throws Exception
{
if ((endNode != null) && (startNode.getParentNode() != endNode.getParentNode()))
throw new Exception("Start and end nodes are expected to have the same parent.");
Node curChild = startNode;
while (curChild != endNode)
{
Node nextChild = curChild.getNextSibling();
curChild.remove();
curChild = nextChild;
}
}
private Node mFieldStart;
private Node mFieldSeparator;
private Node mFieldEnd;
private String mTarget;
private boolean mIsLocal;
/// <summary>
/// RK I am notoriously bad at regexes. It seems I don't understand their way of thinking.
/// </summary>
private static Pattern gRegex = Pattern.compile(
"\\S+" + // one or more non spaces HYPERLINK or other word in other languages
"\\s+" + // one or more spaces
"(?:\"\"\\s+)?" + // non capturing optional "" and one or more spaces, found in one of the customers files.
"(\\\\l\\s+)?" + // optional \l flag followed by one or more spaces
"\"" + // one apostrophe
"([^\"]+)" + // one or more chars except apostrophe (hyperlink target)
"\"" // one closing apostrophe
);
}
|