Why Clydesdale?
Expertise
Strategic Partners
Blog
Contact
Home
Clydesdale Software Logo

Clydesdale Software, Inc.

355 South Teller St.

Suite 200

Lakewood, CO 80226

info@ClydesdaleSoftware.com


Follow Us: Twitter RSS



Go Back

Simple Movable Type Parser

Recently I moved my blog from one provider to Sitefinity.  My old provider had an export to the MovableType format, once I had that I need to parse the file and get into some objects that I could work with.

 

First off I defined the classes to hold the MovableType data.

  122 public class OldPost

  123 {

  124     private List<OldPostComment> comments = new List<OldPostComment>();

  125 

  126     #region Properties

  127 

  128     public bool AllowComments { get; set; }

  129     public string Author { get; set; }

  130 

  131     public string Body { get; set; }

  132     public DateTime Date { get; set; }

  133 

  134     public string Status { get; set; }

  135     public string Title { get; set; }

  136 

  137     public List<OldPostComment> Comments

  138     {

  139         get { return comments; }

  140     }

  141 

  142     #endregion

  143 }

  144 

  145 public class OldPostComment

  146 {

  147     public string Comment { get; set; }

  148     public string Author { get; set; }

  149     public string Email { get; set; }

  150     public DateTime Date { get; set; }

  151 }

 

Next I needed to parse the file and fill up these objects.  The parser leverages regular expression to get through the data fast.

 

  55 public static class MovableTypeParser

   56 {

   57     #region Fields

   58 

   59     private const string Movabeltype_BlogPost_Regex = @"AUTHOR:(.|\s)*?(?=(--------))";

   60 

   61     private const string Movabletype_AllowComments_Regex = "(?<=ALLOW COMMENTS: ).*";

   62     private const string Movabletype_Author_Regex = "(?<=AUTHOR: ).*";

   63     private const string Movabletype_Email_Regex = "(?<=EMAIL: ).*";

   64     private const string Movabletype_Body_Regex = @"(?<=BODY:)(.|\s)*?(?=(-----))";

   65     private const string Movabletype_Comment_Regex = @"(?<=COMMENT:)(.|\s)*?(?=(-----))";

   66     private const string Movabletype_Date_Regex = "(?<=DATE: ).*";

   67     private const string Movabletype_Status_Regex = "(?<=STATUS: ).*";

   68     private const string Movabletype_Title_Regex = "(?<=TITLE: ).*";

   69 

   70     #endregion

   71 

   72     #region Static Methods

   73 

   74     public static List<OldPost> ParseMovableTypeContent(string fileContents)

   75     {

   76         var oldPosts = new List<OldPost>();

   77 

   78         try

   79         {

   80             //match each blog post

   81             var postMatches = Regex.Matches(fileContents, Movabeltype_BlogPost_Regex);

   82             foreach (Match postMatch in postMatches)

   83             {

   84                 var post = new OldPost();

   85                 post.Author = Regex.Match(postMatch.Value, Movabletype_Author_Regex).Value;

   86                 post.Title = Regex.Match(postMatch.Value, Movabletype_Title_Regex).Value;

   87                 post.Status = Regex.Match(postMatch.Value, Movabletype_Status_Regex).Value;

   88                 var stringAllowComments = Regex.Match(postMatch.Value, Movabletype_AllowComments_Regex).Value;

   89                 post.AllowComments = stringAllowComments == "1" ? true : false;

   90                 var stringDate = Regex.Match(postMatch.Value, Movabletype_Date_Regex).Value;

   91                 post.Date = DateTime.Parse(stringDate);

   92                 post.Body = Regex.Match(postMatch.Value, Movabletype_Body_Regex).Value;

   93 

   94                 var commentMatches = Regex.Matches(postMatch.Value, Movabletype_Comment_Regex);

   95                 foreach (Match commentMatch in commentMatches)

   96                 {

   97                     var comment = new OldPostComment();

   98                     comment.Author = Regex.Match(commentMatch.Value, Movabletype_Author_Regex).Value;

   99                     var commentStringDate = Regex.Match(commentMatch.Value, Movabletype_Date_Regex).Value;

  100                     comment.Date = DateTime.Parse(commentStringDate);

  101                     var emailMatch = Regex.Match(commentMatch.Value, Movabletype_Email_Regex);

  102                     comment.Email = emailMatch.Success == true ? emailMatch.Value : string.Empty;

  103                     comment.Comment = Regex.Match(commentMatch.Value, Movabletype_Comment_Regex).Value;

  104 

  105                     post.Comments.Add(comment);

  106                 }

  107 

  108                 oldPosts.Add(post);

  109             }

  110         }

  111         catch (Exception ex)

  112         {

  113             HttpContext.Current.Response.Write(ex.ToString());

  114         }

  115 

  116         return oldPosts;

  117     }

  118 

  119     #endregion

  120 }

 

It is that simple, just read the contents of your MovableType file then call MovableTypeParser.ParseMovableTypeContent passing in the file contents.  One thing to note is I had to change to encoding on the file to utf-8 for everything to be parsed correctly.

 

In my next post I will show how I used this to import the blog posts into Sitefinity.

Facebook Twitter DZone It! Digg It! StumbleUpon Technorati Del.icio.us NewsVine Reddit Blinklist Add diigo bookmark