I recently needed to replace the non-standard extended characters that ms-word uses for quotes in over 80 pages. The octal notation available in regular expressions proved to be very helpful. To use octal in a regular expression just escape the three number code like \200.
[code lang="php"]
#!/usr/bin/php -q
for($id = 1; $id<82; $id++) {
$text=mysql_result($result,0,"content");
$new_text = ereg_replace("â\200\234","\"",$text);
$text = $new_text;
$new_text = ereg_replace("â\200\235","\"",$text);
$text = $new_text;
$new_text = ereg_replace("â\200\223","-",$text);
$text = $new_text;
$new_text = ereg_replace("â\200\231","'",$text);
$text = $new_text;
$new_text = ereg_replace("â\200\230","'",$text);
$text = $new_text;
$new_text = ereg_replace("â\200","",$text);
$text = $new_text;
$new_text = ereg_replace("â","\"",$text);
$text = $new_text;
$new_text = ereg_replace("\r\n","\n",$text);
$escaped_text = mysql_escape_string($text);
}
?>
[/code]